Skip to content

Commit 0108c05

Browse files
committed
fix: correctly auto-generated on llm_result_path
Signed-off-by: Sicheng Song <[email protected]>
1 parent 1f0094f commit 0108c05

File tree

9 files changed

+908
-34
lines changed

9 files changed

+908
-34
lines changed
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.ml.common.utils;
7+
8+
import java.io.IOException;
9+
import java.util.Iterator;
10+
import java.util.Map;
11+
12+
import org.opensearch.OpenSearchParseException;
13+
14+
import com.fasterxml.jackson.databind.JsonNode;
15+
import com.fasterxml.jackson.databind.ObjectMapper;
16+
17+
import lombok.extern.log4j.Log4j2;
18+
19+
/**
20+
* Utility class for auto-generating JSONPath expressions from JSON Schema.
21+
*
22+
* This class analyzes the "output" schema from MLModel's modelInterface field
23+
* and generates a JSONPath expression to extract LLM text responses from
24+
* connector-specific dataAsMap structures.
25+
*
26+
* The generator looks for fields marked with the custom schema property
27+
* "x-llm-output": true to identify the target LLM text field.
28+
*
29+
* Example Usage:
30+
* <pre>
31+
* String outputSchema = model.getModelInterface().get("output");
32+
* String llmResultPath = LlmResultPathGenerator.generate(outputSchema);
33+
* // Returns: "$.choices[0].message.content" (for OpenAI)
34+
* // or: "$.content[0].text" (for Bedrock Claude)
35+
* </pre>
36+
*/
37+
@Log4j2
38+
public class LlmResultPathGenerator {
39+
40+
private static final ObjectMapper MAPPER = new ObjectMapper();
41+
42+
// Custom JSON Schema extension marker for LLM output fields
43+
private static final String LLM_OUTPUT_MARKER = "x-llm-output";
44+
45+
/**
46+
* Generates a JSONPath expression from the model's output schema.
47+
*
48+
* This method searches for fields marked with "x-llm-output": true in the schema.
49+
* It is designed to work with properly annotated schemas from supported models
50+
* (GPT-4o-mini, GPT-5, Claude 3.7+).
51+
*
52+
* If no marker is found, returns null and the caller should use a default fallback path.
53+
*
54+
* @param outputSchemaJson The JSON Schema string from model.interface.output
55+
* @return JSONPath expression (e.g., "$.choices[0].message.content"), or null if no marker found
56+
* @throws IOException if schema parsing fails
57+
* @throws OpenSearchParseException if schema structure is invalid
58+
*/
59+
public static String generate(String outputSchemaJson) throws IOException {
60+
if (outputSchemaJson == null || outputSchemaJson.trim().isEmpty()) {
61+
log.warn("Output schema is null or empty, cannot generate llm_result_path");
62+
return null;
63+
}
64+
65+
try {
66+
JsonNode schemaRoot = MAPPER.readTree(outputSchemaJson);
67+
68+
// Navigate to dataAsMap schema node using hardcoded path; if not found, search from root
69+
JsonNode searchRoot = navigateToDataAsMapSchema(schemaRoot);
70+
if (searchRoot == null) {
71+
log.debug("No dataAsMap schema found, searching from root");
72+
searchRoot = schemaRoot;
73+
}
74+
75+
// Search for LLM output field with x-llm-output marker
76+
String jsonPath = findLlmTextField(searchRoot, "$");
77+
78+
if (jsonPath == null) {
79+
log.warn("Could not find field with x-llm-output marker in schema");
80+
return null;
81+
}
82+
83+
log.debug("Generated llm_result_path: {}", jsonPath);
84+
return jsonPath;
85+
86+
} catch (Exception e) {
87+
log.error("Failed to generate llm_result_path from schema", e);
88+
throw new OpenSearchParseException("Schema parsing error: " + e.getMessage(), e);
89+
}
90+
}
91+
92+
/**
93+
* Navigates to the dataAsMap schema node using the rigid ModelTensorOutput structure.
94+
*
95+
* The path follows the serialization structure defined by:
96+
* - ModelTensorOutput.INFERENCE_RESULT_FIELD = "inference_results"
97+
* - ModelTensors.OUTPUT_FIELD = "output"
98+
* - ModelTensor.DATA_AS_MAP_FIELD = "dataAsMap"
99+
*
100+
* Schema path: properties.inference_results.items.properties.output.items.properties.dataAsMap
101+
*
102+
* @param schemaRoot The root schema node
103+
* @return The dataAsMap schema node if found, null otherwise
104+
*/
105+
private static JsonNode navigateToDataAsMapSchema(JsonNode schemaRoot) {
106+
if (schemaRoot == null || schemaRoot.isMissingNode()) {
107+
return null;
108+
}
109+
110+
// Follow the rigid ModelTensorOutput → ModelTensors → ModelTensor structure
111+
JsonNode dataAsMapSchema = schemaRoot
112+
.path("properties")
113+
.path("inference_results")
114+
.path("items")
115+
.path("properties")
116+
.path("output")
117+
.path("items")
118+
.path("properties")
119+
.path("dataAsMap");
120+
121+
return dataAsMapSchema.isMissingNode() ? null : dataAsMapSchema;
122+
}
123+
124+
/**
125+
* Recursively searches for the LLM text field marked with "x-llm-output": true.
126+
*
127+
* @param schemaNode The current schema node to search
128+
* @param currentPath The current JSONPath being built
129+
* @return JSONPath expression to the LLM text field, or null if not found
130+
*/
131+
private static String findLlmTextField(JsonNode schemaNode, String currentPath) {
132+
return findLlmTextFieldWithMarker(schemaNode, currentPath);
133+
}
134+
135+
/**
136+
* Searches ONLY for fields with explicit "x-llm-output": true marker.
137+
* Does NOT use any heuristic field name matching.
138+
*
139+
* @param schemaNode The current schema node to search
140+
* @param currentPath The current JSONPath being built
141+
* @return JSONPath expression if marker found, null otherwise
142+
*/
143+
private static String findLlmTextFieldWithMarker(JsonNode schemaNode, String currentPath) {
144+
if (schemaNode == null || schemaNode.isMissingNode()) {
145+
return null;
146+
}
147+
148+
// Check if this field has the x-llm-output marker
149+
JsonNode marker = schemaNode.get(LLM_OUTPUT_MARKER);
150+
if (marker != null && marker.isBoolean() && marker.asBoolean()) {
151+
return currentPath;
152+
}
153+
154+
// Get the type of this schema node
155+
JsonNode typeNode = schemaNode.get("type");
156+
String type = typeNode != null && typeNode.isTextual() ? typeNode.asText() : null;
157+
158+
// If it's an object, recursively search properties
159+
if ("object".equals(type) || schemaNode.has("properties")) {
160+
JsonNode properties = schemaNode.get("properties");
161+
if (properties != null && properties.isObject()) {
162+
Iterator<Map.Entry<String, JsonNode>> fields = properties.fields();
163+
while (fields.hasNext()) {
164+
Map.Entry<String, JsonNode> field = fields.next();
165+
String fieldName = field.getKey();
166+
JsonNode fieldSchema = field.getValue();
167+
168+
String newPath = currentPath + "." + fieldName;
169+
String result = findLlmTextFieldWithMarker(fieldSchema, newPath);
170+
if (result != null) {
171+
return result;
172+
}
173+
}
174+
}
175+
}
176+
177+
// If it's an array, navigate into items
178+
if ("array".equals(type) || schemaNode.has("items")) {
179+
JsonNode items = schemaNode.get("items");
180+
if (items != null) {
181+
String newPath = currentPath + "[0]";
182+
String result = findLlmTextFieldWithMarker(items, newPath);
183+
if (result != null) {
184+
return result;
185+
}
186+
}
187+
}
188+
189+
return null;
190+
}
191+
192+
/**
193+
* Validates that a generated JSONPath can be parsed and applied.
194+
*
195+
* This is a basic validation that checks if the path syntax is valid.
196+
* It does not validate against actual data.
197+
*
198+
* @param jsonPath The JSONPath expression to validate
199+
* @return true if the path appears valid, false otherwise
200+
*/
201+
public static boolean isValidJsonPath(String jsonPath) {
202+
if (jsonPath == null || jsonPath.trim().isEmpty()) {
203+
return false;
204+
}
205+
206+
// Basic validation: must start with $
207+
if (!jsonPath.startsWith("$")) {
208+
return false;
209+
}
210+
211+
// Check for balanced brackets
212+
int bracketCount = 0;
213+
for (char c : jsonPath.toCharArray()) {
214+
if (c == '[')
215+
bracketCount++;
216+
if (c == ']')
217+
bracketCount--;
218+
if (bracketCount < 0)
219+
return false;
220+
}
221+
222+
return bracketCount == 0;
223+
}
224+
225+
}

common/src/main/resources/model-interface-schemas/output/bedrock_anthropic_claude_use_system_prompt_output.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@
3737
"properties": {
3838
"type": {
3939
"type": "string"
40+
},
41+
"text": {
42+
"type": "string",
43+
"x-llm-output": true
4044
}
4145
},
4246
"required": ["type"],

common/src/main/resources/model-interface-schemas/output/openai_chat_completions_output.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@
7676
{
7777
"type": "null"
7878
}
79-
]
79+
],
80+
"x-llm-output": true
8081
}
8182
},
8283
"additionalProperties": true

0 commit comments

Comments
 (0)