Skip to content

Commit 3d33a98

Browse files
authored
Merge branch 'opensearch-project:main' into main
2 parents 6332d7a + 66bcde7 commit 3d33a98

File tree

2 files changed

+129
-13
lines changed

2 files changed

+129
-13
lines changed

docs/model_serving_framework/deploy_sparse_model_to_SageMaker.ipynb

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,13 @@
5252
"outputs": [],
5353
"source": [
5454
"%%writefile handler/code/requirements.txt\n",
55-
"sentence-transformers==5.0.0"
55+
"transformers==4.56.1\n",
56+
"huggingface_hub==0.35.0\n",
57+
"hf_xet==1.1.10\n",
58+
"tokenizers==0.22.0\n",
59+
"regex==2025.9.1\n",
60+
"safetensors==0.6.2\n",
61+
"sentence-transformers==5.1.0"
5662
]
5763
},
5864
{
@@ -134,30 +140,64 @@
134140
" )\n",
135141
" print(f\"Using device: {self.device}\")\n",
136142
" self.model = SparseEncoder(model_id, device=self.device, trust_remote_code=trust_remote_code)\n",
143+
" self._warmup()\n",
137144
" self.initialized = True\n",
138145
"\n",
139-
" def preprocess(self, requests):\n",
146+
" def _warmup(self):\n",
147+
" input_data = [{\"body\": [\"hello world\"] * 10}]\n",
148+
" self.handle(input_data, None)\n",
149+
"\n",
150+
" def _preprocess(self, requests):\n",
140151
" inputSentence = []\n",
141152
" batch_idx = []\n",
153+
" formats = [] # per-text format: \"word\" or \"token_id\"\n",
142154
"\n",
143155
" for request in requests:\n",
144156
" request_body = request.get(\"body\")\n",
145157
" if isinstance(request_body, bytearray):\n",
146158
" request_body = request_body.decode(\"utf-8\")\n",
147159
" request_body = json.loads((request_body))\n",
148-
" if isinstance(request_body, list):\n",
160+
"\n",
161+
" # dict-based new schema: {\"texts\": str | list[str], \"sparse_embedding_format\": str}\n",
162+
" if isinstance(request_body, dict):\n",
163+
" texts = request_body.get(\"texts\")\n",
164+
" fmt = request_body.get(\"sparse_embedding_format\", \"word\")\n",
165+
" fmt = \"token_id\" if isinstance(fmt, str) and fmt.lower() == \"token_id\" else \"word\"\n",
166+
"\n",
167+
" if isinstance(texts, list):\n",
168+
" inputSentence += texts\n",
169+
" batch_idx.append(len(texts))\n",
170+
" formats += [fmt] * len(texts)\n",
171+
" else:\n",
172+
" inputSentence.append(texts)\n",
173+
" batch_idx.append(1)\n",
174+
" formats.append(fmt)\n",
175+
"\n",
176+
" # legacy schemas\n",
177+
" elif isinstance(request_body, list):\n",
149178
" inputSentence += request_body\n",
150179
" batch_idx.append(len(request_body))\n",
180+
" formats += [\"word\"] * len(request_body)\n",
151181
" else:\n",
152182
" inputSentence.append(request_body)\n",
153183
" batch_idx.append(1)\n",
184+
" formats.append(\"word\")\n",
185+
"\n",
186+
" return inputSentence, batch_idx, formats\n",
154187
"\n",
155-
" return inputSentence, batch_idx\n",
188+
" def _convert_token_ids(self, sparse_embedding):\n",
189+
" token_ids = self.model.tokenizer.convert_tokens_to_ids([x[0] for x in sparse_embedding])\n",
190+
" return [(str(token_ids[i]), sparse_embedding[i][1]) for i in range(len(token_ids))]\n",
156191
"\n",
157192
" def handle(self, data, context):\n",
158-
" inputSentence, batch_idx = self.preprocess(data)\n",
193+
" inputSentence, batch_idx, formats = self._preprocess(data)\n",
159194
" model_output = self.model.encode_document(inputSentence, batch_size=max_bs)\n",
160-
" sparse_embedding = list(map(dict,self.model.decode(model_output)))\n",
195+
"\n",
196+
" sparse_embedding_word = self.model.decode(model_output)\n",
197+
" for i, fmt in enumerate(formats):\n",
198+
" if fmt == \"token_id\":\n",
199+
" sparse_embedding_word[i] = self._convert_token_ids(sparse_embedding_word[i])\n",
200+
" sparse_embedding = list(map(dict, sparse_embedding_word))\n",
161201
"\n",
162202
" outputs = [sparse_embedding[s:e]\n",
163203
" for s, e in zip([0]+list(itertools.accumulate(batch_idx))[:-1],\n",
@@ -424,8 +464,8 @@
424464
"```json\n",
425465
"POST /_plugins/_ml/connectors/_create\n",
426466
"{\n",
427-
" \"name\": \"test\",\n",
428-
" \"description\": \"Test connector for Sagemaker model\",\n",
467+
" \"name\": \"Sagemaker Connector: embedding\",\n",
468+
" \"description\": \"The connector to sagemaker embedding model\",\n",
429469
" \"version\": 1,\n",
430470
" \"protocol\": \"aws_sigv4\",\n",
431471
" \"credential\": {\n",
@@ -436,6 +476,7 @@
436476
" \"region\": \"{region}\",\n",
437477
" \"service_name\": \"sagemaker\",\n",
438478
" \"input_docs_processed_step_size\": 2,\n",
479+
" \"sparse_embedding_format\": \"word\"\n",
439480
" },\n",
440481
" \"actions\": [\n",
441482
" {\n",
@@ -445,7 +486,12 @@
445486
" \"content-type\": \"application/json\"\n",
446487
" },\n",
447488
" \"url\": \"https://runtime.sagemaker.{region}.amazonaws.com/endpoints/{predictor.endpoint_name}/invocations\",\n",
448-
" \"request_body\": \"${parameters.input}\"\n",
489+
" \"request_body\": \"\"\"\n",
490+
" {\n",
491+
" \"texts\": ${parameters.input},\n",
492+
" \"sparse_embedding_format\": \"${parameters.sparse_embedding_format}\"\n",
493+
" }\n",
494+
" \"\"\"\n",
449495
" }\n",
450496
" ],\n",
451497
" \"client_config\":{\n",

docs/remote_inference_blueprints/standard_blueprints/sagemaker_semantic_highlighter_standard_blueprint.md

Lines changed: 74 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# AWS SageMaker Semantic Highlighter Model Standard Blueprint
22

3-
This blueprint demonstrates how to deploy a semantic highlighter model using AWS SageMaker and integrate it with OpenSearch. For a detailed Python-based tutorial on deploying the model to SageMaker, please refer to the [Deploying OpenSearch Sentence Highlighter Model To AWS SageMaker Guide](https://github.com/opensearch-project/opensearch-py-ml/blob/main/docs/source/examples/aws_sagemaker_sentence_highlighter_model/README.md).
3+
This blueprint demonstrates how to deploy a semantic highlighter model using AWS SageMaker and integrate it with OpenSearch. For a detailed Python-based tutorial on deploying the model to SageMaker, please refer to the [Deploying OpenSearch Sentence Highlighter Model To AWS SageMaker Guide](https://github.com/opensearch-project/opensearch-py-ml/blob/main/docs/source/examples/semantic_highlighting/README.md).
44

55
## Overview
66

@@ -11,6 +11,8 @@ The semantic highlighter model helps identify and highlight the most relevant pa
1111
3. Register and deploy the model
1212
4. Test the model inference
1313

14+
**Note:** Batch inference semantic highlighting support requires OpenSearch 3.3 or later. For OpenSearch 3.0-3.2, only single document inference is supported.
15+
1416
## Prerequisites
1517

1618
1. AWS account with SageMaker access
@@ -19,6 +21,8 @@ The semantic highlighter model helps identify and highlight the most relevant pa
1921

2022
## Steps
2123

24+
> **Note:** This connector supports both single document inference (OpenSearch 3.0+) and batch inference (OpenSearch 3.3+). The unified pre-process function automatically handles both formats for backward compatibility.
25+
2226
### 1. Create SageMaker Connector
2327

2428
```json
@@ -47,8 +51,8 @@ POST /_plugins/_ml/connectors/_create
4751
"content-type": "application/json"
4852
},
4953
"url": "https://runtime.sagemaker.${parameters.region}.amazonaws.com/endpoints/${parameters.model}/invocations",
50-
"request_body": "{ \"question\": \"${parameters.question}\", \"context\": \"${parameters.context}\" }",
51-
"pre_process_function": "// Extract question and context directly from params\nif (params.question != null && params.context != null) {\n return '{\"parameters\":{\"question\":\"' + params.question + '\",\"context\":\"' + params.context + '\"}}'; \n} \nelse {\n throw new IllegalArgumentException(\"Missing required parameters: question and context\");\n}"
54+
"request_body": "{ \"question\": \"${parameters.question:-}\", \"context\": \"${parameters.context:-}\", \"inputs\": ${parameters.inputs:-[]} }",
55+
"pre_process_function": "// Unified pre-process function for backward compatibility\nif (params.question != null && params.context != null && params.inputs == null) {\n // Single document format from older versions\n return '{\"parameters\":{\"question\":\"' + params.question + '\",\"context\":\"' + params.context + '\"}}';\n}\nelse if (params.inputs != null) {\n // Batch format from newer versions - pass inputs as JSON string\n String inputsJson = params.inputs.toString();\n return '{\"parameters\":{\"inputs\":' + inputsJson + '}}';\n}\nelse {\n throw new IllegalArgumentException(\"Invalid input format: must provide either (question and context) or (inputs)\");\n}"
5256
}
5357
]
5458
}
@@ -102,8 +106,36 @@ POST /_plugins/_ml/models/<MODEL_ID>/_predict
102106

103107
Replace `<MODEL_ID>` with your deployed model ID.
104108

109+
### 5. Test Batch Inference (OpenSearch 3.3+)
110+
111+
```json
112+
POST /_plugins/_ml/models/<MODEL_ID>/_predict
113+
{
114+
"parameters": {
115+
"inputs": [
116+
{
117+
"question": "What are the symptoms of heart failure?",
118+
"context": "Heart failure symptoms include shortness of breath, swelling in the feet and ankles, fatigue, and irregular pulse. Patients may also experience difficulty sleeping flat in bed."
119+
},
120+
{
121+
"question": "What causes high blood pressure?",
122+
"context": "High blood pressure can be caused by various factors including genetics, poor diet, lack of exercise, and stress. Sodium intake and obesity are major contributors."
123+
},
124+
{
125+
"question": "How is diabetes managed?",
126+
"context": "Diabetes management involves monitoring blood sugar levels, maintaining a healthy diet, regular exercise, and medication when necessary. Insulin therapy may be required for some patients."
127+
}
128+
]
129+
}
130+
}
131+
```
132+
133+
Replace `<MODEL_ID>` with your deployed model ID.
134+
105135
## Example Response
106136

137+
### Single Document Response
138+
107139
```json
108140
{
109141
"inference_results": [
@@ -126,8 +158,46 @@ Replace `<MODEL_ID>` with your deployed model ID.
126158
}
127159
```
128160

161+
### Batch Inference Response
162+
163+
```json
164+
{
165+
"inference_results": [
166+
{
167+
"output": [
168+
{
169+
"highlights": [
170+
{
171+
"start": 0,
172+
"end": 145
173+
}
174+
]
175+
},
176+
{
177+
"highlights": [
178+
{
179+
"start": 62,
180+
"end": 134
181+
}
182+
]
183+
},
184+
{
185+
"highlights": [
186+
{
187+
"start": 0,
188+
"end": 108
189+
}
190+
]
191+
}
192+
],
193+
"status_code": 200
194+
}
195+
]
196+
}
197+
```
198+
129199
## References
130-
- [Deploying OpenSearch Sentence Highlighter Model To AWS SageMaker Guide](https://github.com/opensearch-project/opensearch-py-ml/docs/source/examples/aws_sagemaker_sentence_highlighter_model/README.md)
200+
- [Deploying OpenSearch Sentence Highlighter Model To AWS SageMaker Guide](https://github.com/opensearch-project/opensearch-py-ml/docs/source/examples/semantic_highlighting/README.md)
131201
- [Using OpenSearch Semantic Highlighting Guide](https://docs.opensearch.org/docs/latest/tutorials/vector-search/semantic-highlighting-tutorial/)
132202
- [OpenSearch ML Commons Documentation](https://opensearch.org/docs/latest/ml-commons-plugin/remote-models/index/)
133203
- [SageMaker Endpoints Documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/deploy-model.html)

0 commit comments

Comments
 (0)