diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index afe5df01cf1..8fad51092c5 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -967,19 +967,15 @@ void populate() { XOR, SqlStdOperatorTable.NOT_EQUALS, PPLTypeChecker.family(SqlTypeFamily.BOOLEAN, SqlTypeFamily.BOOLEAN)); - // SqlStdOperatorTable.CASE.getOperandTypeChecker is null. We manually create a - // type checker - // for it. The second and third operands are required to be of the same type. If - // not, - // it will throw an IllegalArgumentException with information Can't find - // leastRestrictive type + // SqlStdOperatorTable.CASE.getOperandTypeChecker is null. We manually create a type checker + // for it. The second and third operands are required to be of the same type. If not, it will + // throw an IllegalArgumentException with information Can't find leastRestrictive type registerOperator( IF, SqlStdOperatorTable.CASE, PPLTypeChecker.family(SqlTypeFamily.BOOLEAN, SqlTypeFamily.ANY, SqlTypeFamily.ANY)); // Re-define the type checker for is not null, is present, and is null since - // their original - // type checker ANY isn't compatible with struct types. + // their original type checker ANY isn't compatible with struct types. registerOperator( IS_NOT_NULL, SqlStdOperatorTable.IS_NOT_NULL, diff --git a/docs/user/ppl/functions/condition.rst b/docs/user/ppl/functions/condition.rst index 6be77cd5f97..c4d52f74913 100644 --- a/docs/user/ppl/functions/condition.rst +++ b/docs/user/ppl/functions/condition.rst @@ -227,6 +227,14 @@ Argument type: all the supported data type, (NOTE : there is no comma before "el Return type: any +Limitations +>>>>>>>>>>> + +When each condition is a field comparison with a numeric literal and each result expression is a string literal, the query will be optimized as `range aggregations `_ if pushdown optimization is enabled. However, this optimization has the following limitations: + +- Null values will not be grouped into any bucket of a range aggregation and will be ignored +- The default ELSE clause will use the string literal ``"null"`` instead of actual NULL values + Example:: os> source=accounts | eval result = case(age > 35, firstname, age < 30, lastname else employer) | fields result, firstname, lastname, age, employer diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 182c1163752..15087d5d010 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -10,6 +10,7 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_LOGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STRINGS; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_TIME_DATA; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WORKER; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WORK_INFORMATION; @@ -18,6 +19,7 @@ import java.io.IOException; import java.util.Locale; +import org.junit.Assume; import org.junit.Ignore; import org.junit.Test; import org.opensearch.sql.ppl.ExplainIT; @@ -512,22 +514,6 @@ public void testExplainStatsWithSubAggregation() throws IOException { + " @timestamp, region")); } - @Test - public void bucketNullableNotSupportSubAggregation() throws IOException { - // TODO: Don't throw exception after addressing - // https://github.com/opensearch-project/sql/issues/4317 - // When bucketNullable is true, sub aggregation is not supported. Hence we cannot pushdown the - // aggregation in this query. Caused by issue - // https://github.com/opensearch-project/sql/issues/4317, - // bin aggregation on timestamp field won't work if not been push down. - enabledOnlyWhenPushdownIsEnabled(); - assertThrows( - Exception.class, - () -> - explainQueryToString( - "source=events | bin @timestamp bins=3 | stats count() by @timestamp, region")); - } - @Test public void testExplainBinWithSpan() throws IOException { String expected = loadExpectedPlan("explain_bin_span.yaml"); @@ -1169,4 +1155,127 @@ public void testPushDownMinOrMaxAggOnDerivedField() throws IOException { + "| stats MIN(balance2), MAX(balance2)", TEST_INDEX_ACCOUNT))); } + + @Test + public void testCasePushdownAsRangeQueryExplain() throws IOException { + // CASE 1: Range - Metric + // 1.1 Range - Metric + assertYamlEqualsIgnoreId( + loadExpectedPlan("agg_range_metric_push.yaml"), + explainQueryYaml( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30', age < 40, 'u40' else 'u100') |" + + " stats avg(age) as avg_age by age_range", + TEST_INDEX_BANK))); + + // 1.2 Range - Metric (COUNT) + assertYamlEqualsIgnoreId( + loadExpectedPlan("agg_range_count_push.yaml"), + explainQueryYaml( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30', age >= 30 and age < 40, 'u40'" + + " else 'u100') | stats avg(age) by age_range", + TEST_INDEX_BANK))); + + // 1.3 Range - Range - Metric + assertYamlEqualsIgnoreId( + loadExpectedPlan("agg_range_range_metric_push.yaml"), + explainQueryYaml( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30', age < 40, 'u40' else 'u100')," + + " balance_range = case(balance < 20000, 'medium' else 'high') | stats" + + " avg(balance) as avg_balance by age_range, balance_range", + TEST_INDEX_BANK))); + + // 1.4 Range - Metric (With null & discontinuous ranges) + assertYamlEqualsIgnoreId( + loadExpectedPlan("agg_range_metric_complex_push.yaml"), + explainQueryYaml( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30', (age >= 35 and age < 40) or age" + + " >= 80, '30-40 or >=80') | stats avg(balance) by age_range", + TEST_INDEX_BANK))); + + // 1.5 Should not be pushed because the range is not closed-open + assertYamlEqualsIgnoreId( + loadExpectedPlan("agg_case_cannot_push.yaml"), + explainQueryYaml( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30', age >= 30 and age <= 40, 'u40'" + + " else 'u100') | stats avg(age) as avg_age by age_range", + TEST_INDEX_BANK))); + + // 1.6 Should not be pushed as range query because the result expression is not a string + // literal. + // Range aggregation keys must be strings + assertYamlEqualsIgnoreId( + loadExpectedPlan("agg_case_num_res_cannot_push.yaml"), + explainQueryYaml( + String.format( + "source=%s | eval age_range = case(age < 30, 30 else 100) | stats count() by" + + " age_range", + TEST_INDEX_BANK))); + + // CASE 2: Composite - Range - Metric + // 2.1 Composite (term) - Range - Metric + assertYamlEqualsIgnoreId( + loadExpectedPlan("agg_composite_range_metric_push.yaml"), + explainQueryYaml( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30' else 'a30') | stats avg(balance)" + + " by state, age_range", + TEST_INDEX_BANK))); + + // 2.2 Composite (date histogram) - Range - Metric + assertYamlEqualsIgnoreId( + loadExpectedPlan("agg_composite_date_range_push.yaml"), + explainQueryYaml( + "source=opensearch-sql_test_index_time_data | eval value_range = case(value < 7000," + + " 'small' else 'large') | stats avg(value) by value_range, span(@timestamp," + + " 1h)")); + + // 2.3 Composite(2 fields) - Range - Metric (with count) + assertYamlEqualsIgnoreId( + loadExpectedPlan("agg_composite2_range_count_push.yaml"), + explainQueryYaml( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30' else 'a30') | stats" + + " avg(balance), count() by age_range, state, gender", + TEST_INDEX_BANK))); + + // 2.4 Composite (2 fields) - Range - Range - Metric (with count) + assertYamlEqualsIgnoreId( + loadExpectedPlan("agg_composite2_range_range_count_push.yaml"), + explainQueryYaml( + String.format( + "source=%s | eval age_range = case(age < 35, 'u35' else 'a35'), balance_range =" + + " case(balance < 20000, 'medium' else 'high') | stats avg(balance) as" + + " avg_balance by age_range, balance_range, state", + TEST_INDEX_BANK))); + + // 2.5 Should not be pushed down as range query because case result expression is not constant + assertYamlEqualsIgnoreId( + loadExpectedPlan("agg_case_composite_cannot_push.yaml"), + explainQueryYaml( + String.format( + "source=%s | eval age_range = case(age < 35, 'u35' else email) | stats avg(balance)" + + " as avg_balance by age_range, state", + TEST_INDEX_BANK))); + } + + @Test + public void testNestedAggregationsExplain() throws IOException { + // TODO: Remove after resolving: https://github.com/opensearch-project/sql/issues/4578 + Assume.assumeFalse( + "The query runs into error when pushdown is disabled due to bin's implementation", + isPushdownDisabled()); + assertYamlEqualsIgnoreId( + loadExpectedPlan("agg_composite_autodate_range_metric_push.yaml"), + explainQueryYaml( + String.format( + "source=%s | bin timestamp bins=3 | eval value_range = case(value < 7000, 'small'" + + " else 'great') | stats bucket_nullable=false avg(value), count() by" + + " timestamp, value_range, category", + TEST_INDEX_TIME_DATA))); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java index 7e4425d3a41..b7e16d1da8b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java @@ -5,14 +5,20 @@ package org.opensearch.sql.calcite.remote; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OTEL_LOGS; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STATE_COUNTRY_WITH_NULL; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; +import static org.opensearch.sql.util.MatcherUtils.closeTo; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows; import static org.opensearch.sql.util.MatcherUtils.verifySchema; import java.io.IOException; import org.json.JSONObject; +import org.junit.Assume; import org.junit.jupiter.api.Test; import org.opensearch.client.Request; import org.opensearch.sql.legacy.TestsConstants; @@ -25,6 +31,10 @@ public void init() throws Exception { enableCalcite(); loadIndex(Index.WEBLOG); + loadIndex(Index.TIME_TEST_DATA); + loadIndex(Index.STATE_COUNTRY_WITH_NULL); + loadIndex(Index.BANK); + loadIndex(Index.OTELLOGS); appendDataForBadResponse(); } @@ -246,4 +256,269 @@ public void testCaseWhenInSubquery() throws IOException { rows("0.0.0.2", "GET", null, "4085", "500", "/shuttle/missions/sts-73/mission-sts-73.html"), rows("::3", "GET", null, "3985", "403", "/shuttle/countdown/countdown.html")); } + + @Test + public void testCaseCanBePushedDownAsRangeQuery() throws IOException { + // CASE 1: Range - Metric + // 1.1 Range - Metric + JSONObject actual1 = + executeQuery( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30', age < 40, 'u40' else 'u100') |" + + " stats avg(age) as avg_age by age_range", + TEST_INDEX_BANK)); + verifySchema(actual1, schema("avg_age", "double"), schema("age_range", "string")); + verifyDataRows(actual1, rows(28.0, "u30"), rows(35.0, "u40")); + + // 1.2 Range - Metric (COUNT) + JSONObject actual2 = + executeQuery( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30', age >= 30 and age < 40, 'u40'" + + " else 'u100') | stats avg(age) by age_range", + TEST_INDEX_BANK)); + verifySchema(actual2, schema("avg(age)", "double"), schema("age_range", "string")); + verifyDataRows(actual2, rows(28.0, "u30"), rows(35.0, "u40")); + + // 1.3 Range - Range - Metric + JSONObject actual3 = + executeQuery( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30', age < 40, 'u40' else 'u100')," + + " balance_range = case(balance < 20000, 'medium' else 'high') | stats" + + " avg(balance) as avg_balance by age_range, balance_range", + TEST_INDEX_BANK)); + verifySchema( + actual3, + schema("avg_balance", "double"), + schema("age_range", "string"), + schema("balance_range", "string")); + verifyDataRows( + actual3, + rows(32838.0, "u30", "high"), + closeTo(8761.333333333334, "u40", "medium"), + rows(42617.0, "u40", "high")); + + // 1.4 Range - Metric (With null & discontinuous ranges) + JSONObject actual4 = + executeQuery( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30', (age >= 35 and age < 40) or age" + + " >= 80, '30-40 or >=80') | stats avg(balance) by age_range", + TEST_INDEX_BANK)); + verifySchema(actual4, schema("avg(balance)", "double"), schema("age_range", "string")); + // There's such a discrepancy because null cannot be the key for a range query + if (isPushdownDisabled()) { + verifyDataRows( + actual4, + rows(32838.0, "u30"), + rows(30497.0, null), + closeTo(20881.333333333332, "30-40 or >=80")); + } else { + verifyDataRows( + actual4, + rows(32838.0, "u30"), + rows(30497.0, "null"), + closeTo(20881.333333333332, "30-40 or >=80")); + } + + // 1.5 Should not be pushed because the range is not closed-open + JSONObject actual5 = + executeQuery( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30', age >= 30 and age <= 40, 'u40'" + + " else 'u100') | stats avg(age) as avg_age by age_range", + TEST_INDEX_BANK)); + verifySchema(actual5, schema("avg_age", "double"), schema("age_range", "string")); + verifyDataRows(actual5, rows(35.0, "u40"), rows(28.0, "u30")); + } + + @Test + public void testCaseCanBePushedDownAsCompositeRangeQuery() throws IOException { + // CASE 2: Composite - Range - Metric + // 2.1 Composite (term) - Range - Metric + JSONObject actual6 = + executeQuery( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30' else 'a30') | stats avg(balance)" + + " by state, age_range", + TEST_INDEX_BANK)); + verifySchema( + actual6, + schema("avg(balance)", "double"), + schema("state", "string"), + schema("age_range", "string")); + verifyDataRows( + actual6, + rows(39225.0, "IL", "a30"), + rows(48086.0, "IN", "a30"), + rows(4180.0, "MD", "a30"), + rows(40540.0, "PA", "a30"), + rows(5686.0, "TN", "a30"), + rows(32838.0, "VA", "u30"), + rows(16418.0, "WA", "a30")); + + // 2.2 Composite (date histogram) - Range - Metric + JSONObject actual7 = + executeQuery( + "source=opensearch-sql_test_index_time_data | eval value_range = case(value < 7000," + + " 'small' else 'large') | stats avg(value) by value_range, span(@timestamp," + + " 1month)"); + verifySchema( + actual7, + schema("avg(value)", "double"), + schema("span(@timestamp,1month)", "timestamp"), + schema("value_range", "string")); + + verifyDataRows( + actual7, + closeTo(6642.521739130435, "2025-07-01 00:00:00", "small"), + closeTo(8381.917808219177, "2025-07-01 00:00:00", "large"), + rows(6489.0, "2025-08-01 00:00:00", "small"), + rows(8375.0, "2025-08-01 00:00:00", "large")); + + // 2.3 Composite(2 fields) - Range - Metric (with count) + JSONObject actual8 = + executeQuery( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30' else 'a30') | stats" + + " avg(balance), count() by age_range, state, gender", + TEST_INDEX_BANK)); + verifySchema( + actual8, + schema("avg(balance)", "double"), + schema("count()", "bigint"), + schema("age_range", "string"), + schema("state", "string"), + schema("gender", "string")); + verifyDataRows( + actual8, + rows(5686.0, 1, "a30", "TN", "M"), + rows(16418.0, 1, "a30", "WA", "M"), + rows(40540.0, 1, "a30", "PA", "F"), + rows(4180.0, 1, "a30", "MD", "M"), + rows(32838.0, 1, "u30", "VA", "F"), + rows(39225.0, 1, "a30", "IL", "M"), + rows(48086.0, 1, "a30", "IN", "F")); + + // 2.4 Composite (2 fields) - Range - Range - Metric (with count) + JSONObject actual9 = + executeQuery( + String.format( + "source=%s | eval age_range = case(age < 35, 'u35' else 'a35'), balance_range =" + + " case(balance < 20000, 'medium' else 'high') | stats avg(balance) as" + + " avg_balance by age_range, balance_range, state", + TEST_INDEX_BANK)); + verifySchema( + actual9, + schema("avg_balance", "double"), + schema("age_range", "string"), + schema("balance_range", "string"), + schema("state", "string")); + verifyDataRows( + actual9, + rows(39225.0, "u35", "high", "IL"), + rows(48086.0, "u35", "high", "IN"), + rows(4180.0, "u35", "medium", "MD"), + rows(40540.0, "a35", "high", "PA"), + rows(5686.0, "a35", "medium", "TN"), + rows(32838.0, "u35", "high", "VA"), + rows(16418.0, "a35", "medium", "WA")); + + // 2.5 Should not be pushed because case result expression is not constant + JSONObject actual10 = + executeQuery( + String.format( + "source=%s | eval age_range = case(age < 35, 'u35' else email) | stats avg(balance)" + + " as avg_balance by age_range, state", + TEST_INDEX_BANK)); + verifySchema( + actual10, + schema("avg_balance", "double"), + schema("age_range", "string"), + schema("state", "string")); + verifyDataRows( + actual10, + rows(32838.0, "u35", "VA"), + rows(4180.0, "u35", "MD"), + rows(48086.0, "u35", "IN"), + rows(40540.0, "virginiaayala@filodyne.com", "PA"), + rows(39225.0, "u35", "IL"), + rows(5686.0, "hattiebond@netagy.com", "TN"), + rows(16418.0, "elinorratliff@scentric.com", "WA")); + } + + @Test + public void testCaseAggWithNullValues() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s" + + "| eval age_category = case(" + + " age < 20, 'teenager'," + + " age < 70, 'adult'," + + " age >= 70, 'senior'" + + " else 'unknown')" + + "| stats avg(age) by age_category", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + verifySchema(actual, schema("avg(age)", "double"), schema("age_category", "string")); + // There is such discrepancy because range aggregations will ignore null values + if (isPushdownDisabled()) { + verifyDataRows( + actual, + rows(10, "teenager"), + rows(25, "adult"), + rows(70, "senior"), + rows(null, "unknown")); + } else { + verifyDataRows(actual, rows(10, "teenager"), rows(25, "adult"), rows(70, "senior")); + } + } + + @Test + public void testNestedCaseAggWithAutoDateHistogram() throws IOException { + // TODO: Remove after resolving: https://github.com/opensearch-project/sql/issues/4578 + Assume.assumeFalse( + "The query cannot be executed when pushdown is disabled due to implementation defects of" + + " the bin command", + isPushdownDisabled()); + JSONObject actual1 = + executeQuery( + String.format( + "source=%s | bin @timestamp bins=2 | eval severity_range = case(severityNumber <" + + " 16, 'minor' else 'severe') | stats avg(severityNumber), count() by" + + " @timestamp, severity_range, flags", + TEST_INDEX_OTEL_LOGS)); + verifySchema( + actual1, + schema("avg(severityNumber)", "double"), + schema("count()", "bigint"), + schema("@timestamp", "timestamp"), + schema("severity_range", "string"), + schema("flags", "bigint")); + + verifyDataRows( + actual1, + rows(8.85, 20, "2024-01-15 10:30:02", "minor", 0), + rows(20, 9, "2024-01-15 10:30:02", "severe", 0), + rows(9, 1, "2024-01-15 10:30:00", "minor", 1), + rows(17, 1, "2024-01-15 10:30:00", "severe", 1), + rows(1, 1, "2024-01-15 10:30:05", "minor", 1)); + + JSONObject actual2 = + executeQuery( + String.format( + "source=%s | bin @timestamp bins=100 | eval severity_range = case(severityNumber <" + + " 16, 'minor' else 'severe') | stats avg(severityNumber), count() by" + + " @timestamp, severity_range, flags", + TEST_INDEX_OTEL_LOGS)); + verifySchema( + actual2, + schema("avg(severityNumber)", "double"), + schema("count()", "bigint"), + schema("@timestamp", "timestamp"), + schema("severity_range", "string"), + schema("flags", "bigint")); + verifyNumOfRows(actual2, 32); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml new file mode 100644 index 00000000000..d04bbd2df44 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_age=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], avg_age=[AVG($1)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[30..40]]), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg_age=AVG($1)), PROJECT->[avg_age, age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAe3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0BXZ7CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAwLAogICAgICAgICAgIm5hbWUiOiAiJDAiCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IDMwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAidTMwIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJTRUFSQ0giLAogICAgICAgICJraW5kIjogIlNFQVJDSCIsCiAgICAgICAgInN5bnRheCI6ICJJTlRFUk5BTCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogewogICAgICAgICAgICAicmFuZ2VTZXQiOiBbCiAgICAgICAgICAgICAgWwogICAgICAgICAgICAgICAgImNsb3NlZCIsCiAgICAgICAgICAgICAgICAiMzAiLAogICAgICAgICAgICAgICAgIjQwIgogICAgICAgICAgICAgIF0KICAgICAgICAgICAgXSwKICAgICAgICAgICAgIm51bGxBcyI6ICJVTktOT1dOIgogICAgICAgICAgfSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogInU0MCIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1MTAwIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AANhZ2V+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAdJTlRFR0VSeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml new file mode 100644 index 00000000000..82cbadeb735 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_balance=[$2], age_range=[$0], state=[$1]) + LogicalAggregate(group=[{0, 1}], avg_balance=[AVG($2)]) + LogicalProject(age_range=[CASE(<($10, 35), 'u35':VARCHAR, $11)], state=[$9], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_balance=AVG($2)), PROJECT->[avg_balance, age_range, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA5nsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1haWwiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQCe3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiPCIsCiAgICAgICAgImtpbmQiOiAiTEVTU19USEFOIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogMzUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1MzUiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImlucHV0IjogMSwKICAgICAgIm5hbWUiOiAiJDEiCiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAACdAADYWdlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAHSU5URUdFUnQABWVtYWlsc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+ABB4cH5xAH4ACnQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgALdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AG3hwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AHQAAAABzcQB+AAAAAAADdwQAAAAAeHh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_balance":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml new file mode 100644 index 00000000000..9502c66a448 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count()=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(age_range=[CASE(<($10, 30), 30, 100)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAe3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0Ap17CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAwLAogICAgICAgICAgIm5hbWUiOiAiJDAiCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IDMwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAzMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAxMDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AANhZ2V+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAdJTlRFR0VSeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"value_type":"long","missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_count_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_count_push.yaml new file mode 100644 index 00000000000..353bcf5c1e9 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_count_push.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$3], count()=[$4], age_range=[$0], state=[$1], gender=[$2]) + LogicalAggregate(group=[{0, 1, 2}], avg(balance)=[AVG($3)], count()=[COUNT()]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, 'a30':VARCHAR)], state=[$9], gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},avg(balance)=AVG($3),count()=COUNT()), PROJECT->[avg(balance), count(), age_range, state, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u30","to":30.0},{"key":"a30","from":30.0}],"keyed":true},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_range_count_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_range_count_push.yaml new file mode 100644 index 00000000000..eef2a7b23f8 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_range_count_push.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_balance=[$3], age_range=[$0], balance_range=[$1], state=[$2]) + LogicalAggregate(group=[{0, 1, 2}], avg_balance=[AVG($3)]) + LogicalProject(age_range=[CASE(<($10, 35), 'u35':VARCHAR, 'a35':VARCHAR)], balance_range=[CASE(<($7, 20000), 'medium':VARCHAR, 'high':VARCHAR)], state=[$9], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},avg_balance=AVG($3)), PROJECT->[avg_balance, age_range, balance_range, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u35","to":35.0},{"key":"a35","from":35.0}],"keyed":true},"aggregations":{"balance_range":{"range":{"field":"balance","ranges":[{"key":"medium","to":20000.0},{"key":"high","from":20000.0}],"keyed":true},"aggregations":{"avg_balance":{"avg":{"field":"balance"}}}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_push.yaml new file mode 100644 index 00000000000..dccce23e18b --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_push.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(value)=[$3], count()=[$4], timestamp=[$0], value_range=[$1], category=[$2]) + LogicalAggregate(group=[{0, 1, 2}], avg(value)=[AVG($3)], count()=[COUNT()]) + LogicalProject(timestamp=[$9], value_range=[$10], category=[$1], value=[$2]) + LogicalFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($1))]) + LogicalProject(@timestamp=[$0], category=[$1], value=[$2], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], timestamp=[WIDTH_BUCKET($3, 3, -(MAX($3) OVER (), MIN($3) OVER ()), MAX($3) OVER ())], value_range=[CASE(<($2, 7000), 'small':VARCHAR, 'great':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2, 3},avg(value)=AVG($1),count()=COUNT()), PROJECT->[avg(value), count(), timestamp, value_range, category], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"timestamp":{"auto_date_histogram":{"field":"timestamp","buckets":3,"minimum_interval":null},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"great","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml new file mode 100644 index 00000000000..30e4762d325 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(value)=[$2], span(@timestamp,1h)=[$1], value_range=[$0]) + LogicalAggregate(group=[{0, 2}], avg(value)=[AVG($1)]) + LogicalProject(value_range=[$10], value=[$2], span(@timestamp,1h)=[SPAN($0, 1, 'h')]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(@timestamp=[$0], category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], value_range=[CASE(<($2, 7000), 'small':VARCHAR, 'large':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},avg(value)=AVG($1)), PROJECT->[avg(value), span(@timestamp,1h), value_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(@timestamp,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"large","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_range_metric_push.yaml new file mode 100644 index 00000000000..065598bc82c --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_range_metric_push.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$2], state=[$0], age_range=[$1]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(state=[$9], age_range=[CASE(<($10, 30), 'u30':VARCHAR, 'a30':VARCHAR)], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), PROJECT->[avg(balance), state, age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u30","to":30.0},{"key":"a30","from":30.0}],"keyed":true},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_range_count_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_range_count_push.yaml new file mode 100644 index 00000000000..498786a6aef --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_range_count_push.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(age)=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], avg(age)=[AVG($1)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[30..40)]), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(age)=AVG($1)), PROJECT->[avg(age), age_range]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u30","to":30.0},{"key":"u40","from":30.0,"to":40.0},{"key":"u100","from":40.0}],"keyed":true},"aggregations":{"avg(age)":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_range_metric_complex_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_range_metric_complex_push.yaml new file mode 100644 index 00000000000..f3d749487c0 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_range_metric_complex_push.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[35..40), [80..+∞)]), '30-40 or >=80':VARCHAR, null:NULL)], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(balance)=AVG($1)), PROJECT->[avg(balance), age_range]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u30","to":30.0},{"key":"30-40 or >=80","from":35.0,"to":40.0},{"key":"30-40 or >=80","from":80.0},{"key":"null","from":30.0,"to":35.0},{"key":"null","from":40.0,"to":80.0}],"keyed":true},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_range_metric_push.yaml new file mode 100644 index 00000000000..ee0a5ce9448 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_range_metric_push.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_age=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], avg_age=[AVG($1)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, <($10, 40), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg_age=AVG($1)), PROJECT->[avg_age, age_range]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u30","to":30.0},{"key":"u40","from":30.0,"to":40.0},{"key":"u100","from":40.0}],"keyed":true},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_range_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_range_range_metric_push.yaml new file mode 100644 index 00000000000..5b44ebfdc68 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_range_range_metric_push.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_balance=[$2], age_range=[$0], balance_range=[$1]) + LogicalAggregate(group=[{0, 1}], avg_balance=[AVG($2)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, <($10, 40), 'u40':VARCHAR, 'u100':VARCHAR)], balance_range=[CASE(<($7, 20000), 'medium':VARCHAR, 'high':VARCHAR)], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_balance=AVG($2)), PROJECT->[avg_balance, age_range, balance_range]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u30","to":30.0},{"key":"u40","from":30.0,"to":40.0},{"key":"u100","from":40.0}],"keyed":true},"aggregations":{"balance_range":{"range":{"field":"balance","ranges":[{"key":"medium","to":20000.0},{"key":"high","from":20000.0}],"keyed":true},"aggregations":{"avg_balance":{"avg":{"field":"balance"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by4.yaml index e56eb5ad662..77fc6c6eadf 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by4.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(gender=[$4], account_number=[$0]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT(),count(account_number)=COUNT($1)), PROJECT->[count(), count(account_number), gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count()":{"value_count":{"field":"_index"}},"count(account_number)":{"value_count":{"field":"account_number"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT(),count(account_number)=COUNT($1)), PROJECT->[count(), count(account_number), gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count(account_number)":{"value_count":{"field":"account_number"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time.yaml index b3f3f5aed9b..5c7850cdf5e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time.yaml @@ -7,5 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, events]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[0], expr#3=[>($t1, $t2)], count()=[$t1], @timestamp=[$t0], $condition=[$t3]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT())], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), @timestamp]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time2.yaml index a0080e88f90..4efe38c96d1 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time2.yaml @@ -7,5 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, events]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NOT NULL($t1)], avg(cpu_usage)=[$t1], @timestamp=[$t0], $condition=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(cpu_usage)=AVG($1))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null},"aggregations":{"avg(cpu_usage)":{"avg":{"field":"cpu_usage"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(cpu_usage)=AVG($1)), PROJECT->[avg(cpu_usage), @timestamp]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null},"aggregations":{"avg(cpu_usage)":{"avg":{"field":"cpu_usage"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term.yaml index 8d3e77e622e..14cf8e6db82 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(environment=[$0], status_code=[$2], service=[$3], host=[$4], memory_usage=[$5], response_time=[$6], cpu_usage=[$7], region=[$8], bytes_sent=[$9], _id=[$10], _index=[$11], _score=[$12], _maxscore=[$13], _sort=[$14], _routing=[$15], @timestamp=[WIDTH_BUCKET($1, 3, -(MAX($1) OVER (), MIN($1) OVER ()), MAX($1) OVER ())]) CalciteLogicalIndexScan(table=[[OpenSearch, events]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), @timestamp, region], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"region":{"terms":{"field":"region","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":{"_key":"asc"}},"aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), @timestamp, region], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"region":{"terms":{"field":"region","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term2.yaml index ffc24ee8939..1dc48f5a550 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term2.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(environment=[$0], status_code=[$2], service=[$3], host=[$4], memory_usage=[$5], response_time=[$6], cpu_usage=[$7], region=[$8], bytes_sent=[$9], _id=[$10], _index=[$11], _score=[$12], _maxscore=[$13], _sort=[$14], _routing=[$15], @timestamp=[WIDTH_BUCKET($1, 3, -(MAX($1) OVER (), MIN($1) OVER ()), MAX($1) OVER ())]) CalciteLogicalIndexScan(table=[[OpenSearch, events]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1, 2},avg(cpu_usage)=AVG($0)), PROJECT->[avg(cpu_usage), @timestamp, region], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"region":{"terms":{"field":"region","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":{"_key":"asc"}},"aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null},"aggregations":{"avg(cpu_usage)":{"avg":{"field":"cpu_usage"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1, 2},avg(cpu_usage)=AVG($0)), PROJECT->[avg(cpu_usage), @timestamp, region], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"region":{"terms":{"field":"region","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null},"aggregations":{"avg(cpu_usage)":{"avg":{"field":"cpu_usage"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_cannot_push.yaml new file mode 100644 index 00000000000..f8fd80e1598 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_cannot_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_age=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], avg_age=[AVG($1)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[30..40]]), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], avg_age=[$t8], age_range=[$t0]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[30], expr#20=[<($t10, $t19)], expr#21=['u30':VARCHAR], expr#22=[Sarg[[30..40]]], expr#23=[SEARCH($t10, $t22)], expr#24=['u40':VARCHAR], expr#25=['u100':VARCHAR], expr#26=[CASE($t20, $t21, $t23, $t24, $t25)], age_range=[$t26], age=[$t10]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_composite_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_composite_cannot_push.yaml new file mode 100644 index 00000000000..059caa2e2d2 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_composite_cannot_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_balance=[$2], age_range=[$0], state=[$1]) + LogicalAggregate(group=[{0, 1}], avg_balance=[AVG($2)]) + LogicalProject(age_range=[CASE(<($10, 35), 'u35':VARCHAR, $11)], state=[$9], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], avg_balance=[$t9], age_range=[$t0], state=[$t1]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[35], expr#20=[<($t10, $t19)], expr#21=['u35':VARCHAR], expr#22=[CASE($t20, $t21, $t11)], age_range=[$t22], state=[$t9], balance=[$t7]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_num_res_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_num_res_cannot_push.yaml new file mode 100644 index 00000000000..46035abe925 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_num_res_cannot_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count()=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(age_range=[CASE(<($10, 30), 30, 100)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], age_range=[$t0]) + EnumerableAggregate(group=[{0}], count()=[COUNT()]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[30], expr#20=[<($t10, $t19)], expr#21=[100], expr#22=[CASE($t20, $t19, $t21)], age_range=[$t22]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite2_range_count_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite2_range_count_push.yaml new file mode 100644 index 00000000000..43e27cd2d5d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite2_range_count_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$3], count()=[$4], age_range=[$0], state=[$1], gender=[$2]) + LogicalAggregate(group=[{0, 1, 2}], avg(balance)=[AVG($3)], count()=[COUNT()]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, 'a30':VARCHAR)], state=[$9], gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[=($t4, $t6)], expr#8=[null:BIGINT], expr#9=[CASE($t7, $t8, $t3)], expr#10=[CAST($t9):DOUBLE], expr#11=[/($t10, $t4)], avg(balance)=[$t11], count()=[$t5], age_range=[$t0], state=[$t1], gender=[$t2]) + EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($3)], agg#1=[COUNT($3)], count()=[COUNT()]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[30], expr#20=[<($t10, $t19)], expr#21=['u30':VARCHAR], expr#22=['a30':VARCHAR], expr#23=[CASE($t20, $t21, $t22)], age_range=[$t23], state=[$t9], gender=[$t4], balance=[$t7]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite2_range_range_count_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite2_range_range_count_push.yaml new file mode 100644 index 00000000000..6dfa7cd65a3 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite2_range_range_count_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_balance=[$3], age_range=[$0], balance_range=[$1], state=[$2]) + LogicalAggregate(group=[{0, 1, 2}], avg_balance=[AVG($3)]) + LogicalProject(age_range=[CASE(<($10, 35), 'u35':VARCHAR, 'a35':VARCHAR)], balance_range=[CASE(<($7, 20000), 'medium':VARCHAR, 'high':VARCHAR)], state=[$9], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t3)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t4)], avg_balance=[$t10], age_range=[$t0], balance_range=[$t1], state=[$t2]) + EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($3)], agg#1=[COUNT($3)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[35], expr#20=[<($t10, $t19)], expr#21=['u35':VARCHAR], expr#22=['a35':VARCHAR], expr#23=[CASE($t20, $t21, $t22)], expr#24=[20000], expr#25=[<($t7, $t24)], expr#26=['medium':VARCHAR], expr#27=['high':VARCHAR], expr#28=[CASE($t25, $t26, $t27)], age_range=[$t23], balance_range=[$t28], state=[$t9], balance=[$t7]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_date_range_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_date_range_push.yaml new file mode 100644 index 00000000000..f99713d9aaa --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_date_range_push.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(value)=[$2], span(@timestamp,1h)=[$1], value_range=[$0]) + LogicalAggregate(group=[{0, 2}], avg(value)=[AVG($1)]) + LogicalProject(value_range=[$10], value=[$2], span(@timestamp,1h)=[SPAN($0, 1, 'h')]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(@timestamp=[$0], category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], value_range=[CASE(<($2, 7000), 'small':VARCHAR, 'large':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], avg(value)=[$t9], span(@timestamp,1h)=[$t1], value_range=[$t0]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..9=[{inputs}], expr#10=[7000], expr#11=[<($t2, $t10)], expr#12=['small':VARCHAR], expr#13=['large':VARCHAR], expr#14=[CASE($t11, $t12, $t13)], expr#15=[1], expr#16=['h'], expr#17=[SPAN($t0, $t15, $t16)], expr#18=[IS NOT NULL($t0)], value_range=[$t14], value=[$t2], span(@timestamp,1h)=[$t17], $condition=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_range_metric_push.yaml new file mode 100644 index 00000000000..41ed8ba61fc --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_range_metric_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$2], state=[$0], age_range=[$1]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(state=[$9], age_range=[CASE(<($10, 30), 'u30':VARCHAR, 'a30':VARCHAR)], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], avg(balance)=[$t9], state=[$t0], age_range=[$t1]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[30], expr#20=[<($t10, $t19)], expr#21=['u30':VARCHAR], expr#22=['a30':VARCHAR], expr#23=[CASE($t20, $t21, $t22)], state=[$t9], age_range=[$t23], balance=[$t7]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_count_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_count_push.yaml new file mode 100644 index 00000000000..67ad0f0fd07 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_count_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(age)=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], avg(age)=[AVG($1)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[30..40)]), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], avg(age)=[$t8], age_range=[$t0]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[30], expr#20=[<($t10, $t19)], expr#21=['u30':VARCHAR], expr#22=[Sarg[[30..40)]], expr#23=[SEARCH($t10, $t22)], expr#24=['u40':VARCHAR], expr#25=['u100':VARCHAR], expr#26=[CASE($t20, $t21, $t23, $t24, $t25)], age_range=[$t26], age=[$t10]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_metric_complex_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_metric_complex_push.yaml new file mode 100644 index 00000000000..10ead7ad449 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_metric_complex_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[35..40), [80..+∞)]), '30-40 or >=80':VARCHAR, null:NULL)], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], avg(balance)=[$t8], age_range=[$t0]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[30], expr#20=[<($t10, $t19)], expr#21=['u30':VARCHAR], expr#22=[Sarg[[35..40), [80..+∞)]], expr#23=[SEARCH($t10, $t22)], expr#24=['30-40 or >=80':VARCHAR], expr#25=[null:NULL], expr#26=[CASE($t20, $t21, $t23, $t24, $t25)], age_range=[$t26], balance=[$t7]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_metric_push.yaml new file mode 100644 index 00000000000..a81e208bdbf --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_metric_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_age=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], avg_age=[AVG($1)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, <($10, 40), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], avg_age=[$t8], age_range=[$t0]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[30], expr#20=[<($t10, $t19)], expr#21=['u30':VARCHAR], expr#22=[40], expr#23=[<($t10, $t22)], expr#24=['u40':VARCHAR], expr#25=['u100':VARCHAR], expr#26=[CASE($t20, $t21, $t23, $t24, $t25)], age_range=[$t26], age=[$t10]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_range_metric_push.yaml new file mode 100644 index 00000000000..404726f6083 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_range_metric_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_balance=[$2], age_range=[$0], balance_range=[$1]) + LogicalAggregate(group=[{0, 1}], avg_balance=[AVG($2)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, <($10, 40), 'u40':VARCHAR, 'u100':VARCHAR)], balance_range=[CASE(<($7, 20000), 'medium':VARCHAR, 'high':VARCHAR)], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], avg_balance=[$t9], age_range=[$t0], balance_range=[$t1]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[30], expr#20=[<($t10, $t19)], expr#21=['u30':VARCHAR], expr#22=[40], expr#23=[<($t10, $t22)], expr#24=['u40':VARCHAR], expr#25=['u100':VARCHAR], expr#26=[CASE($t20, $t21, $t23, $t24, $t25)], expr#27=[20000], expr#28=[<($t7, $t27)], expr#29=['medium':VARCHAR], expr#30=['high':VARCHAR], expr#31=[CASE($t28, $t29, $t30)], age_range=[$t26], balance_range=[$t31], balance=[$t7]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) \ No newline at end of file diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4201.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4201.yml new file mode 100644 index 00000000000..d90fa438a81 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4201.yml @@ -0,0 +1,110 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + + - do: + indices.create: + index: test + body: + mappings: + properties: + "@timestamp": + type: date + timestamp: + type: date + size: + type: long + tmin: + type: double + metrics: + type: object + properties: + size: + type: long + tmin: + type: double + + - do: + bulk: + index: test + refresh: true + body: + - '{"index": {}}' + - '{ "@timestamp": "2025-01-01T00:00:00Z", "timestamp": "2025-01-01T00:00:00Z", "size": -20, "tmin": 1.0, "metrics": { "size": -20, "tmin": 1.0 } }' + - '{"index": {}}' + - '{ "@timestamp": "2025-01-01T01:00:00Z", "timestamp": "2025-01-01T01:00:00Z", "size": 5, "tmin": 2.5, "metrics": { "size": 5, "tmin": 2.5 } }' + - '{"index": {}}' + - '{ "@timestamp": "2025-01-01T02:00:00Z", "timestamp": "2025-01-01T02:00:00Z", "size": 50, "tmin": 3.2, "metrics": { "size": 50, "tmin": 3.2 } }' + - '{"index": {}}' + - '{ "@timestamp": "2025-01-01T03:00:00Z", "timestamp": "2025-01-01T03:00:00Z", "size": 500, "tmin": 1.8, "metrics": { "size": 500, "tmin": 1.8 } }' + - '{"index": {}}' + - '{ "@timestamp": "2025-01-01T04:00:00Z", "timestamp": "2025-01-01T04:00:00Z", "size": 1500, "tmin": 4.1, "metrics": { "size": 1500, "tmin": 4.1 } }' + - '{"index": {}}' + - '{ "@timestamp": "2025-01-01T05:00:00Z", "timestamp": "2025-01-01T05:30:00Z", "size": 3000, "tmin": 2.9, "metrics": { "size": 3000, "tmin": 2.9 } }' + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + +--- +"Test aggregation by range bucket": + - skip: + features: + - headers + - allowed_warnings + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: | + source = test + | eval range_bucket = case( + `metrics.size` < -10, 'range_1', + `metrics.size` >= -10 and `metrics.size` < 10, 'range_2', + `metrics.size` >= 10 and `metrics.size` < 100, 'range_3', + `metrics.size` >= 100 and `metrics.size` < 1000, 'range_4', + `metrics.size` >= 1000 and `metrics.size` < 2000, 'range_5', + `metrics.size` >= 2000, 'range_6' + ) + | stats min(`metrics.tmin`) as tmin, avg(`metrics.size`) as tavg, max(`metrics.size`) as tmax + by range_bucket + + - match: { total: 6 } + - match: { schema: [{"name": "tmin", "type": "double"}, {"name": "tavg", "type": "double"}, {"name": "tmax", "type": "bigint"}, {"name": "range_bucket", "type": "string"}] } + - match: { datarows: [[1.0, -20.0, -20, "range_1"], [2.5, 5.0, 5, "range_2"], [3.2, 50.0, 50, "range_3"], [1.8, 500.0, 500, "range_4"], [4.1, 1500.0, 1500, "range_5"], [2.9, 3000.0, 3000, "range_6"]] } + +--- +"Test aggregation by range bucket and time span": + - skip: + features: + - headers + - allowed_warnings + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: | + source = test + | eval range_bucket = case( + `metrics.size` < -10, 'range_1', + `metrics.size` >= -10 and `metrics.size` < 10, 'range_2', + `metrics.size` >= 10 and `metrics.size` < 100, 'range_3', + `metrics.size` >= 100 and `metrics.size` < 1000, 'range_4', + `metrics.size` >= 1000 and `metrics.size` < 2000, 'range_5', + `metrics.size` >= 2000, 'range_6' + ) + | stats min(`metrics.tmin`) as tmin, avg(`metrics.size`) as tavg, max(`metrics.size`) as tmax + by range_bucket, span(`@timestamp`, 1h) + + - match: { total: 6 } + - match: { schema: [{"name": "tmin", "type": "double"}, {"name": "tavg", "type": "double"}, {"name": "tmax", "type": "bigint"}, {"name": "span(`@timestamp`,1h)", "type": "timestamp"}, {"name": "range_bucket", "type": "string"}] } + - match: { datarows: [[1.0, -20.0, -20, "2025-01-01 00:00:00", "range_1"], [2.5, 5.0, 5, "2025-01-01 01:00:00", "range_2"], [3.2, 50.0, 50, "2025-01-01 02:00:00", "range_3"], [1.8, 500.0, 500, "2025-01-01 03:00:00", "range_4"], [4.1, 1500.0, 1500, "2025-01-01 04:00:00", "range_5"], [2.9, 3000.0, 3000, "2025-01-01 05:00:00", "range_6"]] } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 036a052f0a1..82ee2d84520 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -37,8 +37,11 @@ import com.google.common.collect.ImmutableList; import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.Set; import java.util.function.Function; import lombok.RequiredArgsConstructor; import org.apache.calcite.plan.RelOptCluster; @@ -62,6 +65,7 @@ import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder; import org.opensearch.search.aggregations.bucket.missing.MissingOrder; +import org.opensearch.search.aggregations.bucket.range.RangeAggregationBuilder; import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.opensearch.search.aggregations.metrics.ExtendedStats; import org.opensearch.search.aggregations.metrics.PercentilesAggregationBuilder; @@ -80,7 +84,6 @@ import org.opensearch.sql.opensearch.request.PredicateAnalyzer.NamedFieldExpression; import org.opensearch.sql.opensearch.response.agg.ArgMaxMinParser; import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; -import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; import org.opensearch.sql.opensearch.response.agg.CountAsTotalHitsParser; import org.opensearch.sql.opensearch.response.agg.MetricParser; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; @@ -89,7 +92,6 @@ import org.opensearch.sql.opensearch.response.agg.SingleValueParser; import org.opensearch.sql.opensearch.response.agg.StatsParser; import org.opensearch.sql.opensearch.response.agg.TopHitsParser; -import org.opensearch.sql.opensearch.storage.script.aggregation.dsl.BucketAggregationBuilder; import org.opensearch.sql.opensearch.storage.script.aggregation.dsl.CompositeAggregationBuilder; /** @@ -124,12 +126,6 @@ public static class ExpressionNotAnalyzableException extends Exception { } } - public static class CompositeAggUnSupportedException extends RuntimeException { - CompositeAggUnSupportedException(String message) { - super(message); - } - } - private AggregateAnalyzer() {} @RequiredArgsConstructor @@ -206,16 +202,20 @@ public static Pair, OpenSearchAggregationResponseParser Pair> builderAndParser = processAggregateCalls(aggFieldNames, aggregate.getAggCallList(), project, helper); Builder metricBuilder = builderAndParser.getLeft(); - List metricParserList = builderAndParser.getRight(); + List metricParsers = builderAndParser.getRight(); // both count() and count(FIELD) can apply doc_count optimization in non-bucket aggregation, // but only count() can apply doc_count optimization in bucket aggregation. - boolean countAllOnly = !aggregate.getGroupSet().isEmpty(); + boolean countAllOnly = !groupList.isEmpty(); Pair, Builder> countAggNameAndBuilderPair = removeCountAggregationBuilders(metricBuilder, countAllOnly); Builder newMetricBuilder = countAggNameAndBuilderPair.getRight(); List countAggNames = countAggNameAndBuilderPair.getLeft(); + // No group-by clause -- no parent aggregations are attached: + // - stats count() + // - stats avg(), count() + // Metric if (aggregate.getGroupSet().isEmpty()) { if (newMetricBuilder == null) { // The optimization must require all count aggregations are removed, @@ -224,38 +224,58 @@ public static Pair, OpenSearchAggregationResponseParser } else { return Pair.of( ImmutableList.copyOf(newMetricBuilder.getAggregatorFactories()), - new NoBucketAggregationParser(metricParserList)); - } - } else if (aggregate.getGroupSet().length() == 1 - && isAutoDateSpan(project.getProjects().get(groupList.getFirst()))) { - ValuesSourceAggregationBuilder bucketBuilder = createBucket(0, project, helper); - if (newMetricBuilder != null) { - bucketBuilder.subAggregations(newMetricBuilder); + new NoBucketAggregationParser(metricParsers)); } - return Pair.of( - Collections.singletonList(bucketBuilder), - new BucketAggregationParser(metricParserList, countAggNames)); } else { - AggregationBuilder aggregationBuilder; - try { + // Used to track the current sub-builder as analysis progresses + Builder subBuilder = newMetricBuilder; + // Push auto date span & case in group-by list into nested aggregations + Pair, AggregationBuilder> aggPushedAndAggBuilder = + createNestedAggregation(groupList, project, subBuilder, helper); + Set aggPushed = aggPushedAndAggBuilder.getLeft(); + AggregationBuilder pushedAggBuilder = aggPushedAndAggBuilder.getRight(); + // The group-by list after removing pushed aggregations + groupList = groupList.stream().filter(i -> !aggPushed.contains(i)).toList(); + if (pushedAggBuilder != null) { + subBuilder = new Builder().addAggregator(pushedAggBuilder); + } + + // No composite aggregation at top-level -- auto date span & case in group-by list are + // pushed into nested aggregations: + // - stats avg() by range_field + // - stats count() by auto_date_span + // - stats count() by ...auto_date_spans, ...range_fields + // [AutoDateHistogram | RangeAgg]+ + // Metric + if (groupList.isEmpty()) { + return Pair.of( + ImmutableList.copyOf(subBuilder.getAggregatorFactories()), + new BucketAggregationParser(metricParsers, countAggNames)); + } + // Composite aggregation at top level -- it has composite aggregation, with or without its + // incompatible value sources as sub-aggregations: + // - stats avg() by term_fields + // - stats avg() by date_histogram + // - stats count() by auto_date_span, range_field, term_fields + // CompositeAgg + // [AutoDateHistogram | RangeAgg]* + // Metric + else { List> buckets = createCompositeBuckets(groupList, project, helper); - aggregationBuilder = - AggregationBuilders.composite("composite_buckets", buckets).size(bucketSize); - if (newMetricBuilder != null) { - aggregationBuilder.subAggregations(metricBuilder); + if (buckets.size() != groupList.size()) { + throw new UnsupportedOperationException( + "Not all the left aggregations can be converted to value sources of composite" + + " aggregation"); } - return Pair.of( - Collections.singletonList(aggregationBuilder), - new CompositeAggregationParser(metricParserList, countAggNames)); - } catch (CompositeAggUnSupportedException e) { - if (bucketNullable) { - throw new UnsupportedOperationException(e.getMessage()); + AggregationBuilder compositeBuilder = + AggregationBuilders.composite("composite_buckets", buckets).size(bucketSize); + if (subBuilder != null) { + compositeBuilder.subAggregations(subBuilder); } - aggregationBuilder = createNestedBuckets(groupList, project, newMetricBuilder, helper); return Pair.of( - Collections.singletonList(aggregationBuilder), - new BucketAggregationParser(metricParserList, countAggNames)); + Collections.singletonList(compositeBuilder), + new BucketAggregationParser(metricParsers, countAggNames)); } } } catch (Throwable e) { @@ -529,22 +549,106 @@ private static List> createCompositeBuckets( return resultBuilder.build(); } - private static ValuesSourceAggregationBuilder createNestedBuckets( + /** + * Creates nested bucket aggregations for expressions that are not qualified as value sources for + * composite aggregations. + * + *

This method processes a list of group by expressions and identifies those that cannot be + * used as value sources in composite aggregations but can be pushed down as sub-aggregations, + * such as auto date histograms and range buckets. + * + *

The aggregation hierarchy follows this pattern: + * + *

+   * AutoDateHistogram | RangeAggregation
+   *   └── AutoDateHistogram | RangeAggregation (nested)
+   *       └── ... (more composite-incompatible aggregations)
+   *           └── Metric Aggregation (at the bottom)
+   * 
+ * + * @param groupList the list of group by field indices from the query + * @param project the projection containing the expressions to analyze + * @param metricBuilder the metric aggregation builder to be placed at the bottom of the hierarchy + * @param helper the aggregation builder helper containing row type and utility methods + * @return a pair containing: + *
    + *
  • A set of integers representing the indices of group fields that were successfully + * pushed as sub-aggregations + *
  • The root aggregation builder, or null if no such expressions were found + *
+ */ + private static Pair, AggregationBuilder> createNestedAggregation( List groupList, Project project, Builder metricBuilder, AggregateAnalyzer.AggregateBuilderHelper helper) { - ValuesSourceAggregationBuilder rootAgg = createBucket(groupList.get(0), project, helper); - ValuesSourceAggregationBuilder currentAgg = rootAgg; - for (int i = 1; i < groupList.size(); i++) { - ValuesSourceAggregationBuilder nextAgg = createBucket(groupList.get(i), project, helper); - currentAgg.subAggregations(new AggregatorFactories.Builder().addAggregator(nextAgg)); - currentAgg = nextAgg; + AggregationBuilder rootAggBuilder = null; + AggregationBuilder tailAggBuilder = null; + + Set aggPushed = new HashSet<>(); + for (Integer i : groupList) { + RexNode agg = project.getProjects().get(i); + String name = project.getNamedProjects().get(i).getValue(); + AggregationBuilder aggBuilder = createCompositeIncompatibleAggregation(agg, name, helper); + if (aggBuilder != null) { + aggPushed.add(i); + if (rootAggBuilder == null) { + rootAggBuilder = aggBuilder; + } else { + tailAggBuilder.subAggregation(aggBuilder); + } + tailAggBuilder = aggBuilder; + } } - if (metricBuilder != null) { - currentAgg.subAggregations(metricBuilder); + if (tailAggBuilder != null && metricBuilder != null) { + tailAggBuilder.subAggregations(metricBuilder); + } + return Pair.of(aggPushed, rootAggBuilder); + } + + /** + * Creates an aggregation builder for expressions that are not qualified as composite aggregation + * value sources. + * + *

This method analyzes RexNode expressions and creates appropriate OpenSearch aggregation + * builders for cases where they can not be value sources of a composite aggregation. + * + *

The method supports the following aggregation types: + * + *

+   * - Auto Date Histogram Aggregation: For temporal bucketing with automatic interval selection
+   * - Range Aggregation: For CASE expressions that define value ranges
+   * 
+ * + * @param agg the RexNode expression to analyze and convert + * @param name the name to assign to the created aggregation builder + * @param helper the aggregation builder helper containing row type and utility methods + * @return the appropriate ValuesSourceAggregationBuilder for the expression, or null if no + * compatible aggregation type is found + */ + private static ValuesSourceAggregationBuilder createCompositeIncompatibleAggregation( + RexNode agg, String name, AggregateBuilderHelper helper) { + ValuesSourceAggregationBuilder aggBuilder = null; + if (isAutoDateSpan(agg)) { + aggBuilder = analyzeAutoDateSpan(agg, name, helper); + } else if (isCase(agg)) { + Optional rangeAggBuilder = + CaseRangeAnalyzer.create(name, helper.rowType).analyze((RexCall) agg); + if (rangeAggBuilder.isPresent()) { + aggBuilder = rangeAggBuilder.get(); + } } - return rootAgg; + return aggBuilder; + } + + private static AutoDateHistogramAggregationBuilder analyzeAutoDateSpan( + RexNode spanAgg, String name, AggregateAnalyzer.AggregateBuilderHelper helper) { + RexCall rexCall = (RexCall) spanAgg; + RexInputRef rexInputRef = (RexInputRef) rexCall.getOperands().getFirst(); + RexLiteral valueLiteral = (RexLiteral) rexCall.getOperands().get(1); + return new AutoDateHistogramAggregationBuilder(name) + .field(helper.inferNamedField(rexInputRef).getRootName()) + .setNumBuckets(requireNonNull(valueLiteral.getValueAs(Integer.class))); } private static boolean isAutoDateSpan(RexNode rex) { @@ -553,39 +657,14 @@ private static boolean isAutoDateSpan(RexNode rex) { && rexCall.getOperator().equals(WIDTH_BUCKET); } - private static ValuesSourceAggregationBuilder createBucket( - Integer groupIndex, Project project, AggregateBuilderHelper helper) { - RexNode rex = project.getProjects().get(groupIndex); - String bucketName = project.getRowType().getFieldList().get(groupIndex).getName(); - if (rex instanceof RexCall rexCall - && rexCall.getKind() == SqlKind.OTHER_FUNCTION - && rexCall.getOperator().getName().equalsIgnoreCase(BuiltinFunctionName.SPAN.name()) - && rexCall.getOperands().size() == 3 - && rexCall.getOperands().getFirst() instanceof RexInputRef rexInputRef - && rexCall.getOperands().get(1) instanceof RexLiteral valueLiteral - && rexCall.getOperands().get(2) instanceof RexLiteral unitLiteral) { - return BucketAggregationBuilder.buildHistogram( - bucketName, - helper.inferNamedField(rexInputRef).getRootName(), - valueLiteral.getValueAs(Double.class), - SpanUnit.of(unitLiteral.getValueAs(String.class))); - } else if (isAutoDateSpan(rex)) { - RexCall rexCall = (RexCall) rex; - RexInputRef rexInputRef = (RexInputRef) rexCall.getOperands().getFirst(); - RexLiteral valueLiteral = (RexLiteral) rexCall.getOperands().get(1); - return new AutoDateHistogramAggregationBuilder(bucketName) - .field(helper.inferNamedField(rexInputRef).getRootName()) - .setNumBuckets(requireNonNull(valueLiteral.getValueAs(Integer.class))); - } else { - return createTermsAggregationBuilder(bucketName, rex, helper); - } + private static boolean isCase(RexNode rex) { + return rex instanceof RexCall rexCall && rexCall.getKind() == SqlKind.CASE; } private static CompositeValuesSourceBuilder createCompositeBucket( - Integer groupIndex, Project project, AggregateAnalyzer.AggregateBuilderHelper helper) - throws CompositeAggUnSupportedException { + Integer groupIndex, Project project, AggregateAnalyzer.AggregateBuilderHelper helper) { RexNode rex = project.getProjects().get(groupIndex); - String bucketName = project.getRowType().getFieldList().get(groupIndex).getName(); + String bucketName = project.getRowType().getFieldNames().get(groupIndex); if (rex instanceof RexCall rexCall && rexCall.getKind() == SqlKind.OTHER_FUNCTION && rexCall.getOperator().getName().equalsIgnoreCase(BuiltinFunctionName.SPAN.name()) @@ -600,9 +679,6 @@ private static CompositeValuesSourceBuilder createCompositeBucket( SpanUnit.of(unitLiteral.getValueAs(String.class)), MissingOrder.FIRST, helper.bucketNullable); - } else if (isAutoDateSpan(rex)) { - throw new CompositeAggUnSupportedException( - "auto_date_histogram is not supported in composite agg."); } else { return createTermsSourceBuilder(bucketName, rex, helper); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java new file mode 100644 index 00000000000..104ab04e547 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java @@ -0,0 +1,289 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.request; + +import com.google.common.collect.BoundType; +import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; +import com.google.common.collect.TreeRangeSet; +import java.math.BigDecimal; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlBinaryOperator; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.util.Sarg; +import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.bucket.range.RangeAggregationBuilder; + +/** + * Analyzer to detect CASE expressions that can be converted to OpenSearch range aggregations. + * + *

Strict validation rules: + * + *

    + *
  • All conditions must compare the same field with literals + *
  • Only closed-open, at-least, and less-than ranges are allowed + *
  • Return values must be string literals + *
+ */ +public class CaseRangeAnalyzer { + /** The default key to use if there isn't a key specified for the else case */ + public static final String DEFAULT_ELSE_KEY = "null"; + + private final RelDataType rowType; + private final RangeSet takenRange; + private final RangeAggregationBuilder builder; + + public CaseRangeAnalyzer(String name, RelDataType rowType) { + this.rowType = rowType; + this.takenRange = TreeRangeSet.create(); + this.builder = AggregationBuilders.range(name).keyed(true); + } + + /** + * Creates a new CaseRangeAnalyzer instance. + * + * @param rowType the row type information for field resolution + * @return a new CaseRangeAnalyzer instance + */ + public static CaseRangeAnalyzer create(String name, RelDataType rowType) { + return new CaseRangeAnalyzer(name, rowType); + } + + /** + * Analyzes a CASE expression to determine if it can be converted to a range aggregation. + * + * @param caseCall The CASE RexCall to analyze + * @return Optional RangeAggregationBuilder if conversion is possible, empty otherwise + */ + public Optional analyze(RexCall caseCall) { + if (!caseCall.getKind().equals(SqlKind.CASE)) { + return Optional.empty(); + } + + List operands = caseCall.getOperands(); + + // Process WHEN-THEN pairs + for (int i = 0; i < operands.size() - 1; i += 2) { + RexNode condition = operands.get(i); + RexNode expr = operands.get(i + 1); + try { + String key = parseLiteralAsString(expr); + analyzeCondition(condition, key); + } catch (UnsupportedOperationException e) { + return Optional.empty(); + } + } + + // Check ELSE clause + RexNode elseExpr = operands.getLast(); + String elseKey; + if (RexLiteral.isNullLiteral(elseExpr)) { + // range key doesn't support values of type: VALUE_NULL + elseKey = DEFAULT_ELSE_KEY; + } else { + try { + elseKey = parseLiteralAsString(elseExpr); + } catch (UnsupportedOperationException e) { + return Optional.empty(); + } + } + addRangeSet(elseKey, takenRange.complement()); + return Optional.of(builder); + } + + /** Analyzes a single condition in the CASE WHEN clause. */ + private void analyzeCondition(RexNode condition, String key) { + if (!(condition instanceof RexCall)) { + throwUnsupported("condition must be a RexCall"); + } + + RexCall call = (RexCall) condition; + SqlKind kind = call.getKind(); + + // Handle simple comparisons + if (kind == SqlKind.GREATER_THAN_OR_EQUAL + || kind == SqlKind.LESS_THAN + || kind == SqlKind.LESS_THAN_OR_EQUAL + || kind == SqlKind.GREATER_THAN) { + analyzeSimpleComparison(call, key); + } else if (kind == SqlKind.SEARCH) { + analyzeSearchCondition(call, key); + } + // AND / OR will only appear when users try to create a complex condition on multiple fields + // E.g. (a > 3 and b < 5). Otherwise, the complex conditions will be converted to a SEARCH call. + else if (kind == SqlKind.AND || kind == SqlKind.OR) { + throwUnsupported("Range queries must be performed on the same field"); + } else { + throwUnsupported("Can not analyze condition as a range query: " + call); + } + } + + private void analyzeSimpleComparison(RexCall call, String key) { + List operands = call.getOperands(); + if (operands.size() != 2 || !(call.getOperator() instanceof SqlBinaryOperator)) { + throwUnsupported(); + } + RexNode left = operands.get(0); + RexNode right = operands.get(1); + SqlOperator operator = call.getOperator(); + RexInputRef inputRef = null; + RexLiteral literal = null; + + // Swap inputRef to the left if necessary + if (left instanceof RexInputRef && right instanceof RexLiteral) { + inputRef = (RexInputRef) left; + literal = (RexLiteral) right; + } else if (left instanceof RexLiteral && right instanceof RexInputRef) { + inputRef = (RexInputRef) right; + literal = (RexLiteral) left; + operator = operator.reverse(); + } else { + throwUnsupported(); + } + + if (operator == null) { + throwUnsupported(); + } + + String fieldName = rowType.getFieldNames().get(inputRef.getIndex()); + if (builder.field() == null) { + builder.field(fieldName); + } else if (!Objects.equals(builder.field(), fieldName)) { + throwUnsupported("comparison must be performed on the same field"); + } + + Double value = literal.getValueAs(Double.class); + if (value == null) { + throwUnsupported("Cannot parse value for comparison"); + } + switch (operator.getKind()) { + case GREATER_THAN_OR_EQUAL -> { + addFrom(key, value); + } + case LESS_THAN -> { + addTo(key, value); + } + default -> throw new UnsupportedOperationException( + "ranges must be equivalents of field >= constant or field < constant"); + } + ; + } + + private void analyzeSearchCondition(RexCall searchCall, String key) { + RexNode field = searchCall.getOperands().getFirst(); + if (!(field instanceof RexInputRef)) { + throwUnsupported("Range query must be performed on a field"); + } + String fieldName = getFieldName((RexInputRef) field); + if (builder.field() == null) { + builder.field(fieldName); + } else if (!Objects.equals(builder.field(), fieldName)) { + throwUnsupported("Range query must be performed on the same field"); + } + RexLiteral literal = (RexLiteral) searchCall.getOperands().getLast(); + Sarg sarg = Objects.requireNonNull(literal.getValueAs(Sarg.class)); + for (Range r : sarg.rangeSet.asRanges()) { + @SuppressWarnings("unchecked") + Range range = (Range) r; + validateRange(range); + if (!range.hasLowerBound() && range.hasUpperBound()) { + // It will be Double.MAX_VALUE if be big decimal is greater than Double.MAX_VALUE + double upper = range.upperEndpoint().doubleValue(); + addTo(key, upper); + } else if (range.hasLowerBound() && !range.hasUpperBound()) { + double lower = range.lowerEndpoint().doubleValue(); + addFrom(key, lower); + } else if (range.hasLowerBound()) { + double lower = range.lowerEndpoint().doubleValue(); + double upper = range.upperEndpoint().doubleValue(); + addBetween(key, lower, upper); + } else { + addBetween(key, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY); + } + } + } + + private void addFrom(String key, Double value) { + var from = Range.atLeast(value); + updateRange(key, from); + } + + private void addTo(String key, Double value) { + var to = Range.lessThan(value); + updateRange(key, to); + } + + private void addBetween(String key, Double from, Double to) { + var range = Range.closedOpen(from, to); + updateRange(key, range); + } + + private void updateRange(String key, Range range) { + // The range to add: remaining space ∩ new range + RangeSet toAdd = takenRange.complement().subRangeSet(range); + addRangeSet(key, toAdd); + takenRange.add(range); + } + + // Add range set without updating taken range + private void addRangeSet(String key, RangeSet rangeSet) { + rangeSet.asRanges().forEach(range -> addRange(key, range)); + } + + // Add range without updating taken range + private void addRange(String key, Range range) { + validateRange(range); + if (range.hasLowerBound() && range.hasUpperBound()) { + builder.addRange(key, range.lowerEndpoint(), range.upperEndpoint()); + } else if (range.hasLowerBound()) { + builder.addUnboundedFrom(key, range.lowerEndpoint()); + } else if (range.hasUpperBound()) { + builder.addUnboundedTo(key, range.upperEndpoint()); + } else { + builder.addRange(key, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY); + } + } + + private String getFieldName(RexInputRef field) { + return rowType.getFieldNames().get(field.getIndex()); + } + + private static void validateRange(Range range) { + if ((range.hasLowerBound() && range.lowerBoundType() != BoundType.CLOSED) + || (range.hasUpperBound() && range.upperBoundType() != BoundType.OPEN)) { + throwUnsupported("Range query only supports closed-open ranges"); + } + } + + private static String parseLiteralAsString(RexNode node) { + if (!(node instanceof RexLiteral)) { + throwUnsupported("Result expressions of range queries must be literals"); + } + RexLiteral literal = (RexLiteral) node; + try { + return literal.getValueAs(String.class); + } catch (AssertionError ignore) { + } + throw new UnsupportedOperationException( + "Cannot parse result expression of type " + literal.getType()); + } + + private static void throwUnsupported() { + throw new UnsupportedOperationException("Cannot create range aggregator from case"); + } + + private static void throwUnsupported(String message) { + throw new UnsupportedOperationException("Cannot create range aggregator: " + message); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/AbstractBucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/AbstractBucketAggregationParser.java new file mode 100644 index 00000000000..1098ccd4666 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/AbstractBucketAggregationParser.java @@ -0,0 +1,63 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.response.agg; + +import java.util.List; +import java.util.Map; +import org.opensearch.search.SearchHits; +import org.opensearch.search.aggregations.bucket.composite.CompositeAggregation; +import org.opensearch.search.aggregations.bucket.range.Range; + +/** + * Abstract base class for parsing bucket aggregations from OpenSearch responses. Provides common + * functionality for extracting key-value pairs from different types of buckets. + */ +public abstract class AbstractBucketAggregationParser + implements OpenSearchAggregationResponseParser { + /** + * Extracts key-value pairs from a composite aggregation bucket without processing its + * sub-aggregations. + * + *

For example, for the following CompositeAggregation bucket in response: + * + *

{@code
+   * {
+   *   "key": {
+   *     "firstname": "William",
+   *     "lastname": "Shakespeare"
+   *   },
+   *   "sub_agg_name": {
+   *     "buckets": []
+   *   }
+   * }
+   * }
+ * + * It returns {@code {"firstname": "William", "lastname": "Shakespeare"}} as the response. + * + * @param bucket the composite aggregation bucket to extract data from + * @return a map containing the bucket's key-value pairs + */ + protected Map extract(CompositeAggregation.Bucket bucket) { + return bucket.getKey(); + } + + /** + * Extracts key-value pairs from a range aggregation bucket without processing its + * sub-aggregations. + * + * @param bucket the range aggregation bucket to extract data from + * @param name the name to use as the key in the returned map + * @return a map containing the bucket's key mapped to the provided name + */ + protected Map extract(Range.Bucket bucket, String name) { + return Map.of(name, bucket.getKey()); + } + + @Override + public List> parse(SearchHits hits) { + throw new UnsupportedOperationException(this.getClass() + " doesn't support parse(SearchHits)"); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java index f9395976625..5fde477fd06 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java @@ -8,11 +8,18 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.Optional; import lombok.EqualsAndHashCode; +import lombok.Getter; import org.opensearch.search.SearchHits; import org.opensearch.search.aggregations.Aggregation; import org.opensearch.search.aggregations.Aggregations; import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation; +import org.opensearch.search.aggregations.bucket.composite.CompositeAggregation; +import org.opensearch.search.aggregations.bucket.histogram.InternalAutoDateHistogram; +import org.opensearch.search.aggregations.bucket.range.Range; +import org.opensearch.search.aggregations.bucket.terms.ParsedStringTerms; /** * Use BucketAggregationParser only when there is a single group-by key, it returns multiple @@ -20,7 +27,7 @@ */ @EqualsAndHashCode public class BucketAggregationParser implements OpenSearchAggregationResponseParser { - private final MetricParserHelper metricsParser; + @Getter private final MetricParserHelper metricsParser; // countAggNameList dedicated the list of count aggregations which are filled by doc_count private List countAggNameList = List.of(); @@ -44,20 +51,28 @@ public List> parse(Aggregations aggregations) { return ((MultiBucketsAggregation) agg) .getBuckets().stream() .map(b -> parseBucket(b, agg.getName())) + .filter(Objects::nonNull) .flatMap(List::stream) .toList(); } private List> parseBucket( MultiBucketsAggregation.Bucket bucket, String name) { + // return null so that an empty bucket of range or date span will be filtered out + if (bucket instanceof Range.Bucket || bucket instanceof InternalAutoDateHistogram.Bucket) { + if (bucket.getDocCount() == 0) { + return null; + } + } + Aggregations aggregations = bucket.getAggregations(); List> results = isLeafAgg(aggregations) ? parseLeafAgg(aggregations, bucket.getDocCount()) : parse(aggregations); - for (Map r : results) { - r.put(name, bucket.getKey()); - } + + Optional> common = extract(bucket, name); + common.ifPresent(commonMap -> results.forEach(r -> r.putAll(commonMap))); return results; } @@ -77,4 +92,48 @@ public List> parse(SearchHits hits) { throw new UnsupportedOperationException( "BucketAggregationParser doesn't support parse(SearchHits)"); } + + /** + * Extracts key-value pairs from different types of aggregation buckets without processing their + * sub-aggregations. + * + *

For CompositeAggregation buckets, it extracts all key-value pairs from the bucket's key. For + * example, for the following CompositeAggregation bucket in response: + * + *

{@code
+   * {
+   *   "key": {
+   *     "firstname": "William",
+   *     "lastname": "Shakespeare"
+   *   },
+   *   "sub_agg_name": {
+   *     "buckets": []
+   *   }
+   * }
+   * }
+ * + * It returns {@code {"firstname": "William", "lastname": "Shakespeare"}}. + * + *

For Range buckets, it creates a single key-value pair using the provided name and the + * bucket's key. + * + * @param bucket the aggregation bucket to extract data from + * @param name the field name to use for range buckets (ignored for composite buckets) + * @return an Optional containing the extracted key-value pairs, or empty if bucket type is + * unsupported + */ + protected Optional> extract( + MultiBucketsAggregation.Bucket bucket, String name) { + Map extracted; + if (bucket instanceof CompositeAggregation.Bucket compositeBucket) { + extracted = compositeBucket.getKey(); + } else if (bucket instanceof Range.Bucket + || bucket instanceof InternalAutoDateHistogram.Bucket + || bucket instanceof ParsedStringTerms.ParsedBucket) { + extracted = Map.of(name, bucket.getKey()); + } else { + extracted = null; + } + return Optional.ofNullable(extracted); + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index d50bba29b7c..56a926f509e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -9,7 +9,6 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Optional; import java.util.stream.Collectors; import lombok.Getter; import org.apache.calcite.plan.Convention; @@ -26,7 +25,6 @@ import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.hint.RelHint; -import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalSort; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; @@ -34,13 +32,10 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.fun.SqlStdOperatorTable; -import org.apache.calcite.sql.type.SqlTypeName; import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.search.aggregations.AggregationBuilder; -import org.opensearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder; -import org.opensearch.search.aggregations.metrics.ValueCountAggregationBuilder; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.data.type.ExprCoreType; @@ -310,24 +305,6 @@ public AbstractRelNode pushDownAggregate(Aggregate aggregate, Project project) { extendedTypeMapping, outputFields.subList(0, aggregate.getGroupSet().cardinality())); newScan.pushDownContext.add(PushDownType.AGGREGATION, aggregate, action); - if (aggregationBuilder.getLeft().size() == 1 - && aggregationBuilder.getLeft().getFirst() - instanceof AutoDateHistogramAggregationBuilder autoDateHistogram) { - // If it's auto_date_histogram, filter the empty bucket by using the first aggregate metrics - RexBuilder rexBuilder = getCluster().getRexBuilder(); - Optional aggBuilderOpt = - autoDateHistogram.getSubAggregations().stream().toList().stream().findFirst(); - RexNode condition = - aggBuilderOpt.isEmpty() || aggBuilderOpt.get() instanceof ValueCountAggregationBuilder - ? rexBuilder.makeCall( - SqlStdOperatorTable.GREATER_THAN, - rexBuilder.makeInputRef(newScan, 1), - rexBuilder.makeLiteral( - 0, rexBuilder.getTypeFactory().createSqlType(SqlTypeName.INTEGER))) - : rexBuilder.makeCall( - SqlStdOperatorTable.IS_NOT_NULL, rexBuilder.makeInputRef(newScan, 1)); - return LogicalFilter.create(newScan, condition); - } return newScan; } catch (Exception e) { if (LOG.isDebugEnabled()) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java index 408511fde3f..e11c5aa9728 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java @@ -29,7 +29,7 @@ import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.expression.aggregation.NamedAggregator; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; -import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; +import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.MetricParser; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; @@ -95,7 +95,7 @@ public AggregationQueryBuilder(ExpressionSerializer serializer) { bucketNullable)) .subAggregations(metrics.getLeft()) .size(AGGREGATION_BUCKET_SIZE)), - new CompositeAggregationParser(metrics.getRight())); + new BucketAggregationParser(metrics.getRight())); } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java index 4953bdb7d9b..ae6e12aee07 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java @@ -46,7 +46,7 @@ import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType.MappingType; import org.opensearch.sql.opensearch.request.AggregateAnalyzer.ExpressionNotAnalyzableException; -import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; +import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.FilterParser; import org.opensearch.sql.opensearch.response.agg.MetricParserHelper; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; @@ -283,9 +283,10 @@ void analyze_groupBy() throws ExpressionNotAnalyzableException { + "{\"a\":{\"terms\":{\"field\":\"a\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}," + "{\"b\":{\"terms\":{\"field\":\"b.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]}}}]", result.getLeft().toString()); - assertInstanceOf(CompositeAggregationParser.class, result.getRight()); + assertInstanceOf(BucketAggregationParser.class, result.getRight()); + assertInstanceOf(BucketAggregationParser.class, result.getRight()); MetricParserHelper metricsParser = - ((CompositeAggregationParser) result.getRight()).getMetricsParser(); + ((BucketAggregationParser) result.getRight()).getMetricsParser(); assertEquals(1, metricsParser.getMetricParserMap().size()); metricsParser .getMetricParserMap() @@ -594,8 +595,11 @@ private Project createMockProject(List refIndex) { when(ref.getType()).thenReturn(typeFactory.createSqlType(SqlTypeName.INTEGER)); rexNodes.add(ref); } + List> namedProjects = + rexNodes.stream().map(n -> org.apache.calcite.util.Pair.of(n, n.toString())).toList(); when(project.getProjects()).thenReturn(rexNodes); when(project.getRowType()).thenReturn(rowType); + when(project.getNamedProjects()).thenReturn(namedProjects); return project; } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java new file mode 100644 index 00000000000..505db011f7b --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java @@ -0,0 +1,844 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.request; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.google.common.collect.Range; +import com.google.common.collect.TreeRangeSet; +import java.math.BigDecimal; +import java.util.Arrays; +import java.util.Optional; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeSystem; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUnknownAs; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeFactoryImpl; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.Sarg; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.opensearch.search.aggregations.bucket.range.RangeAggregationBuilder; + +class CaseRangeAnalyzerTest { + + private RelDataTypeFactory typeFactory; + private RexBuilder rexBuilder; + private RelDataType rowType; + private RexInputRef fieldRef; + + @BeforeEach + void setUp() { + typeFactory = new SqlTypeFactoryImpl(RelDataTypeSystem.DEFAULT); + rexBuilder = new RexBuilder(typeFactory); + + // Create a row type with fields: age (INTEGER), name (VARCHAR) + rowType = + typeFactory + .builder() + .add("age", SqlTypeName.INTEGER) + .add("name", SqlTypeName.VARCHAR) + .build(); + + fieldRef = rexBuilder.makeInputRef(rowType.getFieldList().get(0).getType(), 0); // age field + } + + @Test + void testAnalyzeSimpleCaseExpression() { + // CASE + // WHEN age >= 18 THEN 'adult' + // WHEN age >= 13 THEN 'teen' + // ELSE 'child' + // END + + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral literal13 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(13)); + RexLiteral adultLiteral = rexBuilder.makeLiteral("adult"); + RexLiteral teenLiteral = rexBuilder.makeLiteral("teen"); + RexLiteral childLiteral = rexBuilder.makeLiteral("child"); + + RexCall condition1 = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal18); + RexCall condition2 = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal13); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, + Arrays.asList(condition1, adultLiteral, condition2, teenLiteral, childLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("age_ranges", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + assertEquals("age_ranges", builder.getName()); + assertEquals("age", builder.field()); + + String expectedJson = + """ + { + "age_ranges" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "adult", + "from" : 18.0 + }, + { + "key" : "teen", + "from" : 13.0, + "to" : 18.0 + }, + { + "key" : "child", + "to" : 13.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testAnalyzeLessThanComparison() { + // CASE WHEN age < 18 THEN 'minor' ELSE 'adult' END + + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral minorLiteral = rexBuilder.makeLiteral("minor"); + RexLiteral adultLiteral = rexBuilder.makeLiteral("adult"); + + RexCall condition = + (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.LESS_THAN, fieldRef, literal18); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(condition, minorLiteral, adultLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("age_check", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "age_check" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "minor", + "to" : 18.0 + }, + { + "key" : "adult", + "from" : 18.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testAnalyzeWithSearchCondition() { + // Create a SEARCH condition (Sarg-based range) + TreeRangeSet rangeSet = TreeRangeSet.create(); + rangeSet.add(Range.closedOpen(BigDecimal.valueOf(18), BigDecimal.valueOf(65))); + + Sarg sarg = Sarg.of(RexUnknownAs.UNKNOWN, rangeSet); + RexNode sargLiteral = + rexBuilder.makeSearchArgumentLiteral(sarg, typeFactory.createSqlType(SqlTypeName.DECIMAL)); + + RexCall searchCall = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.SEARCH, Arrays.asList(fieldRef, sargLiteral)); + + RexLiteral workingLiteral = rexBuilder.makeLiteral("working_age"); + RexLiteral otherLiteral = rexBuilder.makeLiteral("other"); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(searchCall, workingLiteral, otherLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("age_groups", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "age_groups" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "working_age", + "from" : 18.0, + "to" : 65.0 + }, + { + "key" : "other", + "to" : 18.0 + }, + { + "key" : "other", + "from" : 65.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testAnalyzeWithNullElse() { + // CASE WHEN age >= 18 THEN 'adult' ELSE NULL END + + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral adultLiteral = rexBuilder.makeLiteral("adult"); + RexLiteral nullLiteral = + rexBuilder.makeNullLiteral(typeFactory.createSqlType(SqlTypeName.VARCHAR)); + + RexCall condition = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal18); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(condition, adultLiteral, nullLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("age_check", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + // Should use DEFAULT_ELSE_KEY for null else clause + + String expectedJson = + """ + { + "age_check" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "adult", + "from" : 18.0 + }, + { + "key" : "null", + "to" : 18.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testAnalyzeWithNonLiteralResultShouldNotSucceed() { + // CASE WHEN age >= 18 THEN age ELSE 0 END (non-literal result) + + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral zeroLiteral = rexBuilder.makeExactLiteral(BigDecimal.valueOf(0)); + + RexCall condition = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal18); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, + Arrays.asList(condition, fieldRef, zeroLiteral)); // fieldRef as result, not literal + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + Optional result = analyzer.analyze(caseCall); + + assertFalse(result.isPresent()); + } + + @Test + void testAnalyzeDifferentFieldsShouldReturnEmpty() { + // Test comparing different fields in conditions + RexInputRef nameFieldRef = rexBuilder.makeInputRef(rowType.getFieldList().get(1).getType(), 1); + + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral literalName = rexBuilder.makeLiteral("John"); + RexLiteral result1 = rexBuilder.makeLiteral("result1"); + RexLiteral result2 = rexBuilder.makeLiteral("result2"); + RexLiteral elseResult = rexBuilder.makeLiteral("else"); + + RexCall condition1 = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal18); // age >= 18 + RexCall condition2 = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.EQUALS, nameFieldRef, literalName); // name = 'John' + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, + Arrays.asList(condition1, result1, condition2, result2, elseResult)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + Optional result = analyzer.analyze(caseCall); + + assertFalse(result.isPresent()); + } + + @Test + void testAnalyzeWithAndConditionShouldReturnEmpty() { + // Test AND condition which should be unsupported + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral literal65 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(65)); + RexLiteral resultLiteral = rexBuilder.makeLiteral("working_age"); + RexLiteral elseLiteral = rexBuilder.makeLiteral("other"); + + RexCall condition1 = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal18); + RexCall condition2 = + (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.LESS_THAN, fieldRef, literal65); + RexCall andCondition = + (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.AND, condition1, condition2); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(andCondition, resultLiteral, elseLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + Optional result = analyzer.analyze(caseCall); + + assertFalse(result.isPresent()); + } + + @Test + void testAnalyzeWithOrConditionShouldReturnEmpty() { + // Test OR condition which should be unsupported + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral literal65 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(65)); + RexLiteral resultLiteral = rexBuilder.makeLiteral("age_group"); + RexLiteral elseLiteral = rexBuilder.makeLiteral("other"); + + RexCall condition1 = + (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.LESS_THAN, fieldRef, literal18); + RexCall condition2 = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal65); + RexCall orCondition = + (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.OR, condition1, condition2); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(orCondition, resultLiteral, elseLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + Optional result = analyzer.analyze(caseCall); + + assertFalse(result.isPresent()); + } + + @Test + void testAnalyzeWithUnsupportedComparison() { + // Test GREATER_THAN which should be converted to supported operations or fail + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral resultLiteral = rexBuilder.makeLiteral("adult"); + RexLiteral elseLiteral = rexBuilder.makeLiteral("minor"); + + RexCall condition = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.GREATER_THAN, fieldRef, literal18); // This should fail + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(condition, resultLiteral, elseLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + Optional result = analyzer.analyze(caseCall); + + assertFalse(result.isPresent()); + } + + @Test + void testAnalyzeWithReversedComparison() { + // Test literal on left side: 18 <= age (should be converted to age >= 18) + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral resultLiteral = rexBuilder.makeLiteral("adult"); + RexLiteral elseLiteral = rexBuilder.makeLiteral("minor"); + + RexCall condition = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.LESS_THAN_OR_EQUAL, literal18, fieldRef); // 18 <= age + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(condition, resultLiteral, elseLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("reversed_test", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "reversed_test" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "adult", + "from" : 18.0 + }, + { + "key" : "minor", + "to" : 18.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testAnalyzeWithNullLiteralValue() { + // Test with null literal value that can't be converted to Double + RexLiteral nullLiteral = + rexBuilder.makeNullLiteral(typeFactory.createSqlType(SqlTypeName.INTEGER)); + RexLiteral resultLiteral = rexBuilder.makeLiteral("result"); + RexLiteral elseLiteral = rexBuilder.makeLiteral("else"); + + RexCall condition = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, nullLiteral); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(condition, resultLiteral, elseLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + Optional result = analyzer.analyze(caseCall); + + assertFalse(result.isPresent()); + } + + @Test + void testSimpleCaseGeneratesExpectedDSL() { + // CASE WHEN age >= 18 THEN 'adult' ELSE 'minor' END + + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral adultLiteral = rexBuilder.makeLiteral("adult"); + RexLiteral minorLiteral = rexBuilder.makeLiteral("minor"); + + RexCall condition = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal18); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(condition, adultLiteral, minorLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("age_groups", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "age_groups" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "adult", + "from" : 18.0 + }, + { + "key" : "minor", + "to" : 18.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testMultipleConditionsGenerateExpectedDSL() { + // CASE + // WHEN age >= 65 THEN 'senior' + // WHEN age >= 18 THEN 'adult' + // ELSE 'minor' + // END + + RexLiteral literal65 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(65)); + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral seniorLiteral = rexBuilder.makeLiteral("senior"); + RexLiteral adultLiteral = rexBuilder.makeLiteral("adult"); + RexLiteral minorLiteral = rexBuilder.makeLiteral("minor"); + + RexCall condition1 = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal65); + RexCall condition2 = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal18); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, + Arrays.asList(condition1, seniorLiteral, condition2, adultLiteral, minorLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("age_categories", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "age_categories" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "senior", + "from" : 65.0 + }, + { + "key" : "adult", + "from" : 18.0, + "to" : 65.0 + }, + { + "key" : "minor", + "to" : 18.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testLessThanConditionGeneratesExpectedDSL() { + // CASE WHEN age < 21 THEN 'underage' ELSE 'legal' END + + RexLiteral literal21 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(21)); + RexLiteral underageLiteral = rexBuilder.makeLiteral("underage"); + RexLiteral legalLiteral = rexBuilder.makeLiteral("legal"); + + RexCall condition = + (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.LESS_THAN, fieldRef, literal21); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(condition, underageLiteral, legalLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("legal_status", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "legal_status" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "underage", + "to" : 21.0 + }, + { + "key" : "legal", + "from" : 21.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testNullElseClauseGeneratesExpectedDSL() { + // CASE WHEN age >= 18 THEN 'adult' ELSE NULL END + + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral adultLiteral = rexBuilder.makeLiteral("adult"); + RexLiteral nullLiteral = + rexBuilder.makeNullLiteral(typeFactory.createSqlType(SqlTypeName.VARCHAR)); + + RexCall condition = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal18); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(condition, adultLiteral, nullLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("adult_check", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "adult_check" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "adult", + "from" : 18.0 + }, + { + "key" : "null", + "to" : 18.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testSearchConditionGeneratesExpectedDSL() { + // Create a SEARCH condition (Sarg-based range): 18 <= age < 65 + TreeRangeSet rangeSet = TreeRangeSet.create(); + rangeSet.add(Range.closedOpen(BigDecimal.valueOf(18), BigDecimal.valueOf(65))); + + Sarg sarg = Sarg.of(RexUnknownAs.UNKNOWN, rangeSet); + RexNode sargLiteral = + rexBuilder.makeSearchArgumentLiteral(sarg, typeFactory.createSqlType(SqlTypeName.DECIMAL)); + + RexCall searchCall = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.SEARCH, Arrays.asList(fieldRef, sargLiteral)); + + RexLiteral workingLiteral = rexBuilder.makeLiteral("working_age"); + RexLiteral otherLiteral = rexBuilder.makeLiteral("other"); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(searchCall, workingLiteral, otherLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("employment_status", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "employment_status" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "working_age", + "from" : 18.0, + "to" : 65.0 + }, + { + "key" : "other", + "to" : 18.0 + }, + { + "key" : "other", + "from" : 65.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testSearchWithDiscontinuousRanges() { + // age >= 20 && age < 30 -> '20-30' + // age >= 40 && age <50 -> '40-50' + // Create discontinuous ranges: [20, 30) and [40, 50) + TreeRangeSet rangeSet = TreeRangeSet.create(); + rangeSet.add(Range.closedOpen(BigDecimal.valueOf(20), BigDecimal.valueOf(30))); + rangeSet.add(Range.closedOpen(BigDecimal.valueOf(40), BigDecimal.valueOf(50))); + + Sarg sarg = Sarg.of(RexUnknownAs.UNKNOWN, rangeSet); + RexNode sargLiteral = + rexBuilder.makeSearchArgumentLiteral(sarg, typeFactory.createSqlType(SqlTypeName.DECIMAL)); + + RexCall searchCall = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.SEARCH, Arrays.asList(fieldRef, sargLiteral)); + + RexLiteral targetLiteral = rexBuilder.makeLiteral("target_age"); + RexLiteral otherLiteral = + rexBuilder.makeNullLiteral(typeFactory.createSqlType(SqlTypeName.VARCHAR)); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(searchCall, targetLiteral, otherLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("discontinuous_ranges", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "discontinuous_ranges" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "target_age", + "from" : 20.0, + "to" : 30.0 + }, + { + "key" : "target_age", + "from" : 40.0, + "to" : 50.0 + }, + { + "key" : "null", + "to" : 20.0 + }, + { + "key" : "null", + "from" : 30.0, + "to" : 40.0 + }, + { + "key" : "null", + "from" : 50.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + /** + * Helper method to normalize JSON strings for comparison by removing extra whitespace and + * ensuring consistent formatting. + */ + private String normalizeJson(String json) { + return json.replaceAll("\\s+", " ").replaceAll("\\s*([{}\\[\\],:]?)\\s*", "$1").trim(); + } + + @Test + void testAnalyzeSearchConditionWithInvalidField() { + // Create a SEARCH condition with non-field reference + TreeRangeSet rangeSet = TreeRangeSet.create(); + rangeSet.add(Range.closedOpen(BigDecimal.valueOf(18), BigDecimal.valueOf(65))); + + Sarg sarg = Sarg.of(RexUnknownAs.UNKNOWN, rangeSet); + RexNode sargLiteral = + rexBuilder.makeSearchArgumentLiteral(sarg, typeFactory.createSqlType(SqlTypeName.DECIMAL)); + RexLiteral constantLiteral = rexBuilder.makeExactLiteral(BigDecimal.valueOf(42)); + + RexCall searchCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.SEARCH, + Arrays.asList(constantLiteral, sargLiteral)); // constant instead of field + + RexLiteral resultLiteral = rexBuilder.makeLiteral("result"); + RexLiteral elseLiteral = rexBuilder.makeLiteral("else"); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(searchCall, resultLiteral, elseLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + Optional result = analyzer.analyze(caseCall); + + assertFalse(result.isPresent()); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java index 06cc0b82fd7..ca5cc712ae9 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java @@ -74,7 +74,7 @@ import org.opensearch.sql.expression.function.OpenSearchFunctions; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; -import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; +import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; import org.opensearch.sql.opensearch.response.agg.SingleValueParser; import org.opensearch.sql.opensearch.storage.script.aggregation.AggregationQueryBuilder; @@ -802,8 +802,7 @@ private Runnable withAggregationPushedDown( AggregationBuilders.avg(aggregation.aggregateName).field(aggregation.aggregateBy)) .size(AggregationQueryBuilder.AGGREGATION_BUCKET_SIZE); List aggBuilders = Collections.singletonList(aggBuilder); - responseParser = - new CompositeAggregationParser(new SingleValueParser(aggregation.aggregateName)); + responseParser = new BucketAggregationParser(new SingleValueParser(aggregation.aggregateName)); return () -> { verify(requestBuilder, times(1)).pushDownAggregation(Pair.of(aggBuilders, responseParser));