Merge branch 'opensearch-project:main' into main

owaiskazi19 · web-flow · commit 3d33a98273d7 · 2025-10-22T16:53:12.000-07:00
diff --git a/docs/model_serving_framework/deploy_sparse_model_to_SageMaker.ipynb b/docs/model_serving_framework/deploy_sparse_model_to_SageMaker.ipynb
@@ -52,7 +52,13 @@
    "outputs": [],
    "source": [
     "%%writefile handler/code/requirements.txt\n",
-    "sentence-transformers==5.0.0"
+    "transformers==4.56.1\n",
+    "huggingface_hub==0.35.0\n",
+    "hf_xet==1.1.10\n",
+    "tokenizers==0.22.0\n",
+    "regex==2025.9.1\n",
+    "safetensors==0.6.2\n",
+    "sentence-transformers==5.1.0"
    ]
   },
   {
@@ -134,30 +140,64 @@
     "        )\n",
     "        print(f\"Using device: {self.device}\")\n",
     "        self.model = SparseEncoder(model_id, device=self.device, trust_remote_code=trust_remote_code)\n",
+    "        self._warmup()\n",
     "        self.initialized = True\n",
     "\n",
-    "    def preprocess(self, requests):\n",
+    "    def _warmup(self):\n",
+    "        input_data = [{\"body\": [\"hello world\"] * 10}]\n",
+    "        self.handle(input_data, None)\n",
+    "\n",
+    "    def _preprocess(self, requests):\n",
     "        inputSentence = []\n",
     "        batch_idx = []\n",
+    "        formats = []  # per-text format: \"word\" or \"token_id\"\n",
     "\n",
     "        for request in requests:\n",
     "            request_body = request.get(\"body\")\n",
     "            if isinstance(request_body, bytearray):\n",
     "                request_body = request_body.decode(\"utf-8\")\n",
     "                request_body = json.loads((request_body))\n",
-    "            if isinstance(request_body, list):\n",
+    "\n",
+    "            # dict-based new schema: {\"texts\": str | list[str], \"sparse_embedding_format\": str}\n",
+    "            if isinstance(request_body, dict):\n",
+    "                texts = request_body.get(\"texts\")\n",
+    "                fmt = request_body.get(\"sparse_embedding_format\", \"word\")\n",
+    "                fmt = \"token_id\" if isinstance(fmt, str) and fmt.lower() == \"token_id\" else \"word\"\n",
+    "\n",
+    "                if isinstance(texts, list):\n",
+    "                    inputSentence += texts\n",
+    "                    batch_idx.append(len(texts))\n",
+    "                    formats += [fmt] * len(texts)\n",
+    "                else:\n",
+    "                    inputSentence.append(texts)\n",
+    "                    batch_idx.append(1)\n",
+    "                    formats.append(fmt)\n",
+    "\n",
+    "            # legacy schemas\n",
+    "            elif isinstance(request_body, list):\n",
     "                inputSentence += request_body\n",
     "                batch_idx.append(len(request_body))\n",
+    "                formats += [\"word\"] * len(request_body)\n",
     "            else:\n",
     "                inputSentence.append(request_body)\n",
     "                batch_idx.append(1)\n",
+    "                formats.append(\"word\")\n",
+    "\n",
+    "        return inputSentence, batch_idx, formats\n",
     "\n",
-    "        return inputSentence, batch_idx\n",
+    "    def _convert_token_ids(self, sparse_embedding):\n",
+    "        token_ids = self.model.tokenizer.convert_tokens_to_ids([x[0] for x in sparse_embedding])\n",
+    "        return [(str(token_ids[i]), sparse_embedding[i][1]) for i in range(len(token_ids))]\n",
     "\n",
     "    def handle(self, data, context):\n",
-    "        inputSentence, batch_idx = self.preprocess(data)\n",
+    "        inputSentence, batch_idx, formats = self._preprocess(data)\n",
     "        model_output = self.model.encode_document(inputSentence, batch_size=max_bs)\n",
-    "        sparse_embedding = list(map(dict,self.model.decode(model_output)))\n",
+    "\n",
+    "        sparse_embedding_word = self.model.decode(model_output)\n",
+    "        for i, fmt in enumerate(formats):\n",
+    "            if fmt == \"token_id\":\n",
+    "                sparse_embedding_word[i] = self._convert_token_ids(sparse_embedding_word[i])\n",
+    "        sparse_embedding = list(map(dict, sparse_embedding_word))\n",
     "\n",
     "        outputs = [sparse_embedding[s:e]\n",
     "           for s, e in zip([0]+list(itertools.accumulate(batch_idx))[:-1],\n",
@@ -424,8 +464,8 @@
     "```json\n",
     "POST /_plugins/_ml/connectors/_create\n",
     "{\n",
-    "  \"name\": \"test\",\n",
-    "  \"description\": \"Test connector for Sagemaker model\",\n",
+    "  \"name\": \"Sagemaker Connector: embedding\",\n",
+    "  \"description\": \"The connector to sagemaker embedding model\",\n",
     "  \"version\": 1,\n",
     "  \"protocol\": \"aws_sigv4\",\n",
     "  \"credential\": {\n",
@@ -436,6 +476,7 @@
     "    \"region\": \"{region}\",\n",
     "    \"service_name\": \"sagemaker\",\n",
     "    \"input_docs_processed_step_size\": 2,\n",
+    "    \"sparse_embedding_format\": \"word\"\n",
     "  },\n",
     "  \"actions\": [\n",
     "    {\n",
@@ -445,7 +486,12 @@
     "        \"content-type\": \"application/json\"\n",
     "      },\n",
     "      \"url\": \"https://runtime.sagemaker.{region}.amazonaws.com/endpoints/{predictor.endpoint_name}/invocations\",\n",
-    "      \"request_body\": \"${parameters.input}\"\n",
+    "      \"request_body\": \"\"\"\n",
+    "          {\n",
+    "              \"texts\": ${parameters.input},\n",
+    "              \"sparse_embedding_format\": \"${parameters.sparse_embedding_format}\"\n",
+    "          }\n",
+    "      \"\"\"\n",
     "    }\n",
     "  ],\n",
     "  \"client_config\":{\n",
diff --git a/docs/remote_inference_blueprints/standard_blueprints/sagemaker_semantic_highlighter_standard_blueprint.md b/docs/remote_inference_blueprints/standard_blueprints/sagemaker_semantic_highlighter_standard_blueprint.md
@@ -1,6 +1,6 @@
 # AWS SageMaker Semantic Highlighter Model Standard Blueprint
 
-This blueprint demonstrates how to deploy a semantic highlighter model using AWS SageMaker and integrate it with OpenSearch. For a detailed Python-based tutorial on deploying the model to SageMaker, please refer to the [Deploying OpenSearch Sentence Highlighter Model To AWS SageMaker Guide](https://github.com/opensearch-project/opensearch-py-ml/blob/main/docs/source/examples/aws_sagemaker_sentence_highlighter_model/README.md).
+This blueprint demonstrates how to deploy a semantic highlighter model using AWS SageMaker and integrate it with OpenSearch. For a detailed Python-based tutorial on deploying the model to SageMaker, please refer to the [Deploying OpenSearch Sentence Highlighter Model To AWS SageMaker Guide](https://github.com/opensearch-project/opensearch-py-ml/blob/main/docs/source/examples/semantic_highlighting/README.md).
 
 ## Overview
 
@@ -11,6 +11,8 @@ The semantic highlighter model helps identify and highlight the most relevant pa
 3. Register and deploy the model
 4. Test the model inference
 
+**Note:** Batch inference semantic highlighting support requires OpenSearch 3.3 or later. For OpenSearch 3.0-3.2, only single document inference is supported.
+
 ## Prerequisites
 
 1. AWS account with SageMaker access
@@ -19,6 +21,8 @@ The semantic highlighter model helps identify and highlight the most relevant pa
 
 ## Steps
 
+> **Note:** This connector supports both single document inference (OpenSearch 3.0+) and batch inference (OpenSearch 3.3+). The unified pre-process function automatically handles both formats for backward compatibility.
+
 ### 1. Create SageMaker Connector
 
 ```json
@@ -47,8 +51,8 @@ POST /_plugins/_ml/connectors/_create
         "content-type": "application/json"
       },
       "url": "https://runtime.sagemaker.${parameters.region}.amazonaws.com/endpoints/${parameters.model}/invocations",
-      "request_body": "{ \"question\": \"${parameters.question}\", \"context\": \"${parameters.context}\" }",
-      "pre_process_function": "// Extract question and context directly from params\nif (params.question != null && params.context != null) {\n    return '{\"parameters\":{\"question\":\"' + params.question + '\",\"context\":\"' + params.context + '\"}}'; \n} \nelse {\n    throw new IllegalArgumentException(\"Missing required parameters: question and context\");\n}"
+      "request_body": "{ \"question\": \"${parameters.question:-}\", \"context\": \"${parameters.context:-}\", \"inputs\": ${parameters.inputs:-[]} }",
+      "pre_process_function": "// Unified pre-process function for backward compatibility\nif (params.question != null && params.context != null && params.inputs == null) {\n  // Single document format from older versions\n  return '{\"parameters\":{\"question\":\"' + params.question + '\",\"context\":\"' + params.context + '\"}}';\n}\nelse if (params.inputs != null) {\n  // Batch format from newer versions - pass inputs as JSON string\n  String inputsJson = params.inputs.toString();\n  return '{\"parameters\":{\"inputs\":' + inputsJson + '}}';\n}\nelse {\n  throw new IllegalArgumentException(\"Invalid input format: must provide either (question and context) or (inputs)\");\n}"
     }
   ]
 }
@@ -102,8 +106,36 @@ POST /_plugins/_ml/models/<MODEL_ID>/_predict
 
 Replace `<MODEL_ID>` with your deployed model ID.
 
+### 5. Test Batch Inference (OpenSearch 3.3+)
+
+```json
+POST /_plugins/_ml/models/<MODEL_ID>/_predict
+{
+  "parameters": {
+    "inputs": [
+      {
+        "question": "What are the symptoms of heart failure?",
+        "context": "Heart failure symptoms include shortness of breath, swelling in the feet and ankles, fatigue, and irregular pulse. Patients may also experience difficulty sleeping flat in bed."
+      },
+      {
+        "question": "What causes high blood pressure?",
+        "context": "High blood pressure can be caused by various factors including genetics, poor diet, lack of exercise, and stress. Sodium intake and obesity are major contributors."
+      },
+      {
+        "question": "How is diabetes managed?",
+        "context": "Diabetes management involves monitoring blood sugar levels, maintaining a healthy diet, regular exercise, and medication when necessary. Insulin therapy may be required for some patients."
+      }
+    ]
+  }
+}
+```
+
+Replace `<MODEL_ID>` with your deployed model ID.
+
 ## Example Response
 
+### Single Document Response
+
 ```json
 {
   "inference_results": [
@@ -126,8 +158,46 @@ Replace `<MODEL_ID>` with your deployed model ID.
 }
 ```
 
+### Batch Inference Response
+
+```json
+{
+  "inference_results": [
+    {
+      "output": [
+        {
+          "highlights": [
+            {
+              "start": 0,
+              "end": 145
+            }
+          ]
+        },
+        {
+          "highlights": [
+            {
+              "start": 62,
+              "end": 134
+            }
+          ]
+        },
+        {
+          "highlights": [
+            {
+              "start": 0,
+              "end": 108
+            }
+          ]
+        }
+      ],
+      "status_code": 200
+    }
+  ]
+}
+```
+
 ## References
-- [Deploying OpenSearch Sentence Highlighter Model To AWS SageMaker Guide](https://github.com/opensearch-project/opensearch-py-ml/docs/source/examples/aws_sagemaker_sentence_highlighter_model/README.md)
+- [Deploying OpenSearch Sentence Highlighter Model To AWS SageMaker Guide](https://github.com/opensearch-project/opensearch-py-ml/docs/source/examples/semantic_highlighting/README.md)
 - [Using OpenSearch Semantic Highlighting Guide](https://docs.opensearch.org/docs/latest/tutorials/vector-search/semantic-highlighting-tutorial/)
 - [OpenSearch ML Commons Documentation](https://opensearch.org/docs/latest/ml-commons-plugin/remote-models/index/)
 - [SageMaker Endpoints Documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/deploy-model.html)