Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
e1609c7
Support script project pushdown
LantaoJin Jul 25, 2025
46e27ff
Merge remote-tracking branch 'upstream/main' into issues/3387
LantaoJin Jul 26, 2025
e563b5b
Fix IT
LantaoJin Jul 28, 2025
efef12c
Merge branch 'main' into pr/issues/3387
songkant-aws Sep 4, 2025
f197c4d
Resolve compile issue and use derived field script
songkant-aws Sep 9, 2025
c693625
Merge branch 'main' into project-pushdown
songkant-aws Sep 9, 2025
d6061e9
Fix derived field pushdown with correct project names and setup
songkant-aws Sep 9, 2025
65973d8
Fix pushed derived field name key conflicts with index fields issue
songkant-aws Sep 10, 2025
bb9cb2c
Merge branch 'main' into project-pushdown
songkant-aws Sep 11, 2025
688b5c9
Exclude some cases that are not supported
songkant-aws Sep 12, 2025
2794e1c
Merge branch 'main' into project-pushdown
songkant-aws Sep 12, 2025
34aa546
Exclude agg pushdown case after script project pushdown
songkant-aws Sep 12, 2025
5f1b61d
Refactor the code a bit
songkant-aws Sep 12, 2025
85f5b3e
Fix UT
songkant-aws Sep 12, 2025
c59f683
Merge branch 'main' into project-pushdown
songkant-aws Sep 17, 2025
80b5888
Revert some unnecessary change
songkant-aws Sep 17, 2025
33e2fb7
Merge branch 'main' into project-pushdown
songkant-aws Sep 18, 2025
9389e43
Merge pushDownProject and pushDownScriptPorject methods
songkant-aws Sep 17, 2025
ab7a5ce
Fix some bugs and correct explained plans
songkant-aws Sep 21, 2025
89837e1
Fix sort by simple expression after script project pushdown
songkant-aws Sep 21, 2025
e511b0b
Fix some logic for derived_existing fields
songkant-aws Sep 21, 2025
81739cb
Remove some unused methods
songkant-aws Sep 22, 2025
3642f10
Merge branch 'main' into project-pushdown
songkant-aws Sep 22, 2025
8056146
Fix spotless check
songkant-aws Sep 22, 2025
8f0af84
Add more test cases
songkant-aws Sep 23, 2025
c7e154b
Minor fixes to save script length
songkant-aws Sep 23, 2025
5e9ec59
Fix no pushdown IT
songkant-aws Sep 23, 2025
ec6ca9f
Merge branch 'main' into project-pushdown
songkant-aws Sep 24, 2025
b4815b4
Enable valid UDT type in script project pushdown
songkant-aws Sep 24, 2025
29882e9
Merge branch 'main' into project-pushdown
songkant-aws Oct 13, 2025
01e38a2
Merge branch 'main' into project-pushdown
songkant-aws Oct 13, 2025
9bc1fc3
Correct explained plans after merge
songkant-aws Oct 13, 2025
62827f4
Fix spotless check
songkant-aws Oct 13, 2025
68cd3c7
Support float type project pushdown
songkant-aws Oct 13, 2025
8210764
Fix expected result of testDivide
songkant-aws Oct 15, 2025
037476a
Merge branch 'main' into project-pushdown
songkant-aws Oct 15, 2025
1da8670
Address comments and relax pushdown row count factor
songkant-aws Oct 16, 2025
3f046f3
Fix explain cost test
songkant-aws Oct 16, 2025
3e501b2
Merge branch 'main' into project-pushdown
songkant-aws Oct 17, 2025
72d4481
Add more test cases
songkant-aws Oct 17, 2025
e36d38b
Transform test json files to yaml files
songkant-aws Oct 17, 2025
70afed0
Minor change to instantiate single instance of remap index visitor
songkant-aws Oct 17, 2025
bc91275
Add an IT to check query correctness
songkant-aws Oct 17, 2025
25e61d8
Fix spotless check
songkant-aws Oct 19, 2025
7b0032f
Merge branch 'main' into project-pushdown
songkant-aws Oct 21, 2025
da07146
Minor change of remapping expression in pushdownProject
songkant-aws Oct 21, 2025
e1dd9e7
Resolve conflicts after merge
songkant-aws Oct 22, 2025
2010ea8
Merge branch 'main' into project-pushdown
songkant-aws Oct 22, 2025
dd22241
Fix IT after merge
songkant-aws Oct 22, 2025
988a465
Add penalty cost if derived field script exceeds cap
songkant-aws Oct 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,25 +30,30 @@
import static org.opensearch.sql.executor.QueryType.PPL;
import static org.opensearch.sql.lang.PPLLangSpec.PPL_SPEC;

import com.google.common.collect.ImmutableList;
import java.lang.reflect.Type;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.function.Predicate;
import lombok.Getter;
import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeSystem;
import org.apache.calcite.sql.SqlCollation;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.sql.type.SqlTypeUtil;
import org.apache.commons.lang3.tuple.Pair;
import org.opensearch.sql.calcite.type.AbstractExprRelDataType;
import org.opensearch.sql.calcite.type.ExprBinaryType;
import org.opensearch.sql.calcite.type.ExprDateType;
import org.opensearch.sql.calcite.type.ExprIPType;
import org.opensearch.sql.calcite.type.ExprSqlType;
import org.opensearch.sql.calcite.type.ExprTimeStampType;
import org.opensearch.sql.calcite.type.ExprTimeType;
import org.opensearch.sql.data.model.ExprValue;
Expand All @@ -68,6 +73,22 @@ private OpenSearchTypeFactory(RelDataTypeSystem typeSystem) {
super(typeSystem);
}

private static final List<SqlTypeName> FLOAT_TYPES =
ImmutableList.of(SqlTypeName.FLOAT, SqlTypeName.REAL);
private static final List<Pair<Predicate<RelDataType>, String>>
SUPPORTED_DSL_DERIVED_FIELD_TYPE_RULES =
Arrays.asList(
Pair.of(
t -> t instanceof ExprSqlType && ((ExprSqlType) t).getJavaType() == String.class,
"keyword"),
Pair.of(t -> SqlTypeName.INT_TYPES.contains(t.getSqlTypeName()), "long"),
// TODO: Support BigDecimal and other complex objects. A workaround is to wrap it in
// JSON object so that response can parse it
Pair.of(t -> SqlTypeName.DOUBLE.equals(t.getSqlTypeName()), "double"),
Pair.of(t -> FLOAT_TYPES.contains(t.getSqlTypeName()), "float"),
Pair.of(t -> SqlTypeName.BOOLEAN_TYPES.contains(t.getSqlTypeName()), "boolean"),
Pair.of(t -> SqlTypeName.CHAR_TYPES.contains(t.getSqlTypeName()), "keyword"));

@Getter
public enum ExprUDT {
EXPR_DATE(DATE),
Expand Down Expand Up @@ -337,4 +358,21 @@ public Type getJavaClass(RelDataType type) {
public static boolean isUserDefinedType(RelDataType type) {
return type instanceof AbstractExprRelDataType<?>;
}

public static boolean isTypeSupportedForDerivedField(RelDataType type) {
return SUPPORTED_DSL_DERIVED_FIELD_TYPE_RULES.stream()
.anyMatch(pair -> pair.getKey().test(type));
}

public static String convertRelDataTypeToSupportedDerivedFieldType(RelDataType type) {
return SUPPORTED_DSL_DERIVED_FIELD_TYPE_RULES.stream()
.filter(pair -> pair.getKey().test(type))
.findFirst()
.map(Pair::getValue)
.orElseThrow(
() ->
new IllegalArgumentException(
String.format(
Locale.ROOT, "Unsupported RelDataType for derived field: %s", type)));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.FixMethodOrder;
import org.junit.Test;
Expand All @@ -26,6 +27,12 @@ public void init() throws Exception {
super.init();
loadIndex(Index.CLICK_BENCH);
disableCalcite();
setMaxScriptFields("hits", 100);
}

@After
public void afterTest() throws IOException {
resetMaxScriptFields("hits");
}

@AfterClass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -627,15 +627,15 @@ public void testExplainRegexMatchInWhereWithScriptPushdown() throws IOException
}

@Test
public void testExplainRegexMatchInEvalWithOutScriptPushdown() throws IOException {
public void testExplainRegexMatchInEvalWithScriptPushdown() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
String query =
String.format(
"source=%s |eval has_hello = regex_match(name, 'hello') | fields has_hello",
TEST_INDEX_STRINGS);
var result = explainQueryToString(query);
String expected = loadFromFile("expectedOutput/calcite/explain_regex_match_in_eval.json");
assertJsonEqualsIgnoreId(expected, result);
var result = explainQueryYaml(query);
String expected = loadFromFile("expectedOutput/calcite/explain_regex_match_in_eval.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

// Only for Calcite
Expand Down Expand Up @@ -754,18 +754,18 @@ public void testRegexNegatedExplain() throws IOException {
public void testSimpleSortExpressionPushDownExplain() throws Exception {
String query =
"source=opensearch-sql_test_index_bank| eval age2 = age + 2 | sort age2 | fields age, age2";
var result = explainQueryToString(query);
String expected = loadExpectedPlan("explain_simple_sort_expr_push.json");
assertJsonEqualsIgnoreId(expected, result);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_simple_sort_expr_push.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testSimpleSortExpressionPushDownWithOnlyExprProjected() throws Exception {
String query =
"source=opensearch-sql_test_index_bank| eval b = balance + 1 | sort b | fields b";
var result = explainQueryToString(query);
String expected = loadExpectedPlan("explain_simple_sort_expr_single_expr_output_push.json");
assertJsonEqualsIgnoreId(expected, result);
var result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_simple_sort_expr_single_expr_output_push.yaml");
assertYamlEqualsIgnoreId(expected, result);
}

@Test
Expand All @@ -778,6 +778,35 @@ public void testRexExplain() throws IOException {
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testScriptProjectMultiplePush() throws IOException {
String expected = loadExpectedPlan("explain_script_project_multiple_push.yaml");
assertYamlEqualsIgnoreId(
expected,
explainQueryYaml(
"source=opensearch-sql_test_index_account"
+ "| eval age2 = age + 2"
+ "| where age2 > 20"
+ "| eval upper_name = upper(firstname)"
+ "| where firstname = \\\"John\\\""
+ "| rex field=lastname \\\"(?<initial>^[A-Z])\\\""
+ "| fields age2, upper_name, initial, lastname"));
}

// TODO: Pending implementation. For now, this test makes sure complex sort expression is not
// pushed.
@Test
public void testScriptSort() throws IOException {
String expected = loadExpectedPlan("explain_script_sort.yaml");
assertYamlEqualsIgnoreId(
expected,
explainQueryYaml(
"source=opensearch-sql_test_index_account"
+ "| eval derived = age + balance"
+ "| sort derived"
+ "| fields age, balance, derived"));
}

@Test
public void testExplainAppendCommand() throws IOException {
String expected = loadExpectedPlan("explain_append_command.json");
Expand Down Expand Up @@ -1038,19 +1067,19 @@ public void testExplainSortOnMetricsNoBucketNullable() throws IOException {

@Test
public void testExplainEvalMax() throws IOException {
String expected = loadExpectedPlan("explain_eval_max.json");
assertJsonEqualsIgnoreId(
String expected = loadExpectedPlan("explain_eval_max.yaml");
assertYamlEqualsIgnoreId(
expected,
explainQueryToString(
explainQueryYaml(
"source=opensearch-sql_test_index_account | eval new = max(1, 2, 3, age, 'banana')"));
}

@Test
public void testExplainEvalMin() throws IOException {
String expected = loadExpectedPlan("explain_eval_min.json");
assertJsonEqualsIgnoreId(
String expected = loadExpectedPlan("explain_eval_min.yaml");
assertYamlEqualsIgnoreId(
expected,
explainQueryToString(
explainQueryYaml(
"source=opensearch-sql_test_index_account | eval new = min(1, 2, 3, age, 'banana')"));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,26 @@ public void testFieldsWithManyFields() throws IOException {
schema("employer", "string"));
}

@Test
public void testFieldsWithNameConflictDerivedFieldPushdown() throws IOException {
// All the ascending top 5 original age field values are 20. After the derived field
// calculation, they are changed to 22. This query should give 5 rows of correct fields.
String ppl =
String.format(
"source=%s | sort age | head 5 | eval age = age + 2 | where age = 22 | fields email,"
+ " age",
TEST_INDEX_ACCOUNT);
JSONObject result = executeQuery(ppl);
verifySchema(result, schema("email", "string"), schema("age", "bigint"));
verifyDataRows(
result,
rows("[email protected]", 22),
rows("[email protected]", 22),
rows("[email protected]", 22),
rows("[email protected]", 22),
rows("[email protected]", 22));
}

@Test
public void testTableWithManyFields() throws IOException {
JSONObject result =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,11 @@ public void testSignAndRound() throws IOException {
verifyDataRows(actual, rows("Hello", 30, 31));
}

// TODO: Handle half_float_number, scaled_float_number precision issue in script fetched docValue
/*
* ScriptDocValue will resolve "half_float" and "scaled_float" to be a precision lost double.
* Hence, it will affect some function's result. Considering it's a rare case, defer the fix for now.
*/
@Test
public void testDivide() throws IOException {
JSONObject actual =
Expand Down Expand Up @@ -377,12 +382,12 @@ public void testDivide() throws IOException {
2,
3,
1,
1.1774194,
isPushdownDisabled() ? 1.1774194 : 1.1775454,
2.0666666,
3.142857142857143,
3.142857142857143,
3.0,
2.4333334,
isPushdownDisabled() ? 2.4333334 : 2.4335938,
0.8225806704669051));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ private Settings defaultSettings() {
.put(Key.FIELD_TYPE_TOLERANCE, true)
.put(Key.CALCITE_ENGINE_ENABLED, true)
.put(Key.CALCITE_PUSHDOWN_ENABLED, false)
.put(Key.CALCITE_PUSHDOWN_ROWCOUNT_ESTIMATION_FACTOR, 0.9)
.put(Key.CALCITE_PUSHDOWN_ROWCOUNT_ESTIMATION_FACTOR, 0.5)
.put(Key.PATTERN_METHOD, "SIMPLE_PATTERN")
.put(Key.PATTERN_MODE, "LABEL")
.put(Key.PATTERN_MAX_SAMPLE_COUNT, 10)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ public abstract class SQLIntegTestCase extends OpenSearchSQLRestTestCase {
Integer.parseInt(System.getProperty("defaultQueryBucketSize", "1000"));
public static final Integer DEFAULT_MAX_RESULT_WINDOW =
Integer.parseInt(System.getProperty("defaultMaxResultWindow", "10000"));
public static final Integer DEFAULT_MAX_SCRIPT_FIELDS =
Integer.parseInt(System.getProperty("defaultMaxScriptFields", "32"));

public boolean shouldResetQuerySizeLimit() {
return true;
Expand Down Expand Up @@ -191,6 +193,15 @@ protected void resetMaxResultWindow(String indexName) throws IOException {
indexName, "{ \"index\": { \"max_result_window\": " + DEFAULT_MAX_RESULT_WINDOW + " } }");
}

protected void setMaxScriptFields(String indexName, Integer value) throws IOException {
updateIndexSettings(indexName, "{ \"index\": { \"max_script_fields\":" + value + " } }");
}

protected void resetMaxScriptFields(String indexName) throws IOException {
updateIndexSettings(
indexName, "{ \"index\": { \"max_script_fields\": " + DEFAULT_MAX_SCRIPT_FIELDS + " } }");
}

/** Provide for each test to load test index, data and other setup work */
protected void init() throws Exception {
disableCalcite();
Expand Down
68 changes: 58 additions & 10 deletions integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,55 @@ public void testExplain() throws IOException {
+ "| fields age2"));
}

@Test
public void testScriptProjectPushDownExplain() throws IOException {
String expected = loadExpectedPlan("explain_script_project_push.yaml");
assertYamlEqualsIgnoreId(
expected,
explainQueryYaml(
"source=opensearch-sql_test_index_account"
+ "| eval age2 = age + 2, upper_name = upper(firstname)"
+ "| fields age2, upper_name, firstname, lastname"));
}

@Test
public void testScriptProjectHasLiteralPartialPushDownExplain() throws IOException {
String expected = loadExpectedPlan("explain_script_project_has_literal_partial_push.yaml");
assertYamlEqualsIgnoreId(
expected,
explainQueryYaml(
"source=opensearch-sql_test_index_account"
+ "| eval age2 = age + 2, upper_name = upper(firstname), age3 = 3"
+ "| fields age2, upper_name, gender, lastname, age3"));
}

// TODO: Optimize it with less scripts. For now, filter and two derived fields are all scripts.
// We can translate it to two derived script field with filter on the field result. Reduce from
// three scripts to two scripts with better cost computing logic
@Test
public void testScriptProjectPushDownWithFilterOnPushedFieldExplain() throws IOException {
String expected =
loadExpectedPlan("explain_script_project_push_with_filter_on_pushed_field.yaml");
assertYamlEqualsIgnoreId(
expected,
explainQueryYaml(
"source=opensearch-sql_test_index_account"
+ "| eval age2 = age + 2, upper_name = upper(firstname)"
+ "| where age2 > 20"
+ "| fields age2, upper_name, firstname, lastname"));
}

@Test
public void testScriptProjectWithNameConflict() throws IOException {
String expected = loadExpectedPlan("explain_script_project_name_conflict.yaml");
assertYamlEqualsIgnoreId(
expected,
explainQueryYaml(
"source=opensearch-sql_test_index_account"
+ "| eval age = age + 2"
+ "| fields age, lastname"));
}
Comment on lines +98 to +101
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems in the plan the new age field becomes age0. I'm curious where is it set back to name age

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems the column names in our final results are derived from the original plan(i.e. logical plan), so the final plan(i.e. physical plan) is allowed to produce a different row type as long as the types can match.


@Test
public void testFilterPushDownExplain() throws IOException {
String expected = loadExpectedPlan("explain_filter_push.yaml");
Expand Down Expand Up @@ -179,21 +228,20 @@ public void testSortWithCountPushDownExplain() throws IOException {

@Test
public void testSortWithDescPushDownExplain() throws IOException {
String expected = loadExpectedPlan("explain_sort_desc_push.json");
assertJsonEqualsIgnoreId(
String expected = loadExpectedPlan("explain_sort_desc_push.yaml");
assertYamlEqualsIgnoreId(
expected,
explainQueryToString(
explainQueryYaml(
"source=opensearch-sql_test_index_account | sort age, - firstname desc | fields age,"
+ " firstname"));
}

@Test
public void testSortWithTypePushDownExplain() throws IOException {
String expected = loadExpectedPlan("explain_sort_type_push.json");
assertJsonEqualsIgnoreId(
String expected = loadExpectedPlan("explain_sort_type_push.yaml");
assertYamlEqualsIgnoreId(
expected,
explainQueryToString(
"source=opensearch-sql_test_index_account | sort num(age) | fields age"));
explainQueryYaml("source=opensearch-sql_test_index_account | sort num(age) | fields age"));
}

@Test
Expand Down Expand Up @@ -373,10 +421,10 @@ public void testLimitWithMultipleOffsetPushdownExplain() throws IOException {

@Test
public void testFillNullPushDownExplain() throws IOException {
String expected = loadExpectedPlan("explain_fillnull_push.json");
assertJsonEqualsIgnoreId(
String expected = loadExpectedPlan("explain_fillnull_push.yaml");
assertYamlEqualsIgnoreId(
expected,
explainQueryToString(
explainQueryYaml(
"source=opensearch-sql_test_index_account"
+ " | fillnull with -1 in age,balance | fields age, balance"));
}
Expand Down
Loading
Loading