diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index e79c15e588..f701756559 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -61,6 +61,7 @@ import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; import org.opensearch.sql.ast.tree.Bin; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; @@ -769,6 +770,11 @@ public LogicalPlan visitSpath(SPath node, AnalysisContext context) { throw getOnlyForCalciteException("Spath"); } + @Override + public LogicalPlan visitChart(Chart node, AnalysisContext context) { + throw getOnlyForCalciteException("Chart"); + } + @Override public LogicalPlan visitTimechart(Timechart node, AnalysisContext context) { throw getOnlyForCalciteException("Timechart"); diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 0dd475c561..320723fd57 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -49,6 +49,7 @@ import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; import org.opensearch.sql.ast.tree.Bin; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; @@ -275,6 +276,10 @@ public T visitReverse(Reverse node, C context) { return visitChildren(node, context); } + public T visitChart(Chart node, C context) { + return visitChildren(node, context); + } + public T visitTimechart(Timechart node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java b/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java new file mode 100644 index 0000000000..d0f982edce --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java @@ -0,0 +1,55 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.ast.expression.Literal; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +/** AST node represent chart command. */ +@Getter +@ToString +@EqualsAndHashCode(callSuper = false) +@AllArgsConstructor +@lombok.Builder(toBuilder = true) +public class Chart extends UnresolvedPlan { + public static final Literal DEFAULT_USE_OTHER = Literal.TRUE; + public static final Literal DEFAULT_OTHER_STR = AstDSL.stringLiteral("OTHER"); + public static final Literal DEFAULT_LIMIT = AstDSL.intLiteral(10); + public static final Literal DEFAULT_USE_NULL = Literal.TRUE; + public static final Literal DEFAULT_NULL_STR = AstDSL.stringLiteral("NULL"); + public static final Literal DEFAULT_TOP = Literal.TRUE; + + private UnresolvedPlan child; + private UnresolvedExpression rowSplit; + private UnresolvedExpression columnSplit; + private UnresolvedExpression aggregationFunction; + private List arguments; + + @Override + public UnresolvedPlan attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitChart(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 573a51de2a..0512a17246 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -66,6 +66,7 @@ import org.apache.calcite.sql.type.MapSqlType; import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeUtil; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilder.AggCall; import org.apache.calcite.util.Holder; @@ -102,6 +103,7 @@ import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; import org.opensearch.sql.ast.tree.Bin; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; @@ -921,7 +923,8 @@ private boolean isCountField(RexCall call) { private Pair, List> aggregateWithTrimming( List groupExprList, List aggExprList, - CalcitePlanContext context) { + CalcitePlanContext context, + boolean hintBucketNonNull) { Pair, List> resolved = resolveAttributesForAggregation(groupExprList, aggExprList, context); List resolvedGroupByList = resolved.getLeft(); @@ -1025,6 +1028,7 @@ private Pair, List> aggregateWithTrimming( List intendedGroupKeyAliases = getGroupKeyNamesAfterAggregation(reResolved.getLeft()); context.relBuilder.aggregate( context.relBuilder.groupKey(reResolved.getLeft()), reResolved.getRight()); + if (hintBucketNonNull) addIgnoreNullBucketHintToAggregate(context); // During aggregation, Calcite projects both input dependencies and output group-by fields. // When names conflict, Calcite adds numeric suffixes (e.g., "value0"). // Apply explicit renaming to restore the intended aliases. @@ -1093,6 +1097,19 @@ private Pair, List> resolveAttributesForAggregation( @Override public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { + visitAggregation(node, context, true); + return context.relBuilder.peek(); + } + + /** + * Visits an aggregation node and builds the corresponding Calcite RelNode. + * + * @param node the aggregation node containing group expressions and aggregation functions + * @param context the Calcite plan context for building RelNodes + * @param aggFirst if true, aggregation results (metrics) appear first in output schema (agg, + * group-by fields); if false, group expressions appear first (group-by fields, agg). + */ + private void visitAggregation(Aggregation node, CalcitePlanContext context, boolean aggFirst) { visitChildren(node, context); List aggExprList = node.getAggExprList(); @@ -1134,14 +1151,9 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { } Pair, List> aggregationAttributes = - aggregateWithTrimming(groupExprList, aggExprList, context); - if (toAddHintsOnAggregate) { - addIgnoreNullBucketHintToAggregate(context); - } + aggregateWithTrimming(groupExprList, aggExprList, context, toAddHintsOnAggregate); // schema reordering - // As an example, in command `stats count() by colA, colB`, - // the sequence of output schema is "count, colA, colB". List outputFields = context.relBuilder.fields(); int numOfOutputFields = outputFields.size(); int numOfAggList = aggExprList.size(); @@ -1149,8 +1161,6 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { // Add aggregation results first List aggRexList = outputFields.subList(numOfOutputFields - numOfAggList, numOfOutputFields); - reordered.addAll(aggRexList); - // Add group by columns List aliasedGroupByList = aggregationAttributes.getLeft().stream() .map(this::extractAliasLiteral) @@ -1159,10 +1169,17 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { .map(context.relBuilder::field) .map(f -> (RexNode) f) .toList(); - reordered.addAll(aliasedGroupByList); + if (aggFirst) { + // As an example, in command `stats count() by colA, colB`, + // the sequence of output schema is "count, colA, colB". + reordered.addAll(aggRexList); + // Add group by columns + reordered.addAll(aliasedGroupByList); + } else { + reordered.addAll(aliasedGroupByList); + reordered.addAll(aggRexList); + } context.relBuilder.project(reordered); - - return context.relBuilder.peek(); } private Optional getTimeSpanField(UnresolvedExpression expr) { @@ -2210,11 +2227,7 @@ public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) { .map(context.relBuilder::isNotNull) .toList()); } - aggregateWithTrimming(groupExprList, aggExprList, context); - - if (toAddHintsOnAggregate) { - addIgnoreNullBucketHintToAggregate(context); - } + aggregateWithTrimming(groupExprList, aggExprList, context, toAddHintsOnAggregate); // 2. add count() column with sort direction List partitionKeys = rexVisitor.analyze(node.getGroupExprList(), context); @@ -2338,7 +2351,7 @@ public RelNode visitFlatten(Flatten node, CalcitePlanContext context) { } /** Helper method to get the function name for proper column naming */ - private String getValueFunctionName(UnresolvedExpression aggregateFunction) { + private String getAggFieldAlias(UnresolvedExpression aggregateFunction) { if (aggregateFunction instanceof Alias) { return ((Alias) aggregateFunction).getName(); } @@ -2375,6 +2388,155 @@ private String getValueFunctionName(UnresolvedExpression aggregateFunction) { return sb.toString(); } + @Override + public RelNode visitChart(Chart node, CalcitePlanContext context) { + visitChildren(node, context); + ArgumentMap argMap = ArgumentMap.of(node.getArguments()); + List groupExprList = + Stream.of(node.getRowSplit(), node.getColumnSplit()).filter(Objects::nonNull).toList(); + ChartConfig config = ChartConfig.fromArguments(argMap); + Aggregation aggregation = + new Aggregation( + List.of(node.getAggregationFunction()), + List.of(), + groupExprList, + null, + List.of(new Argument(Argument.BUCKET_NULLABLE, AstDSL.booleanLiteral(config.useNull)))); + visitAggregation(aggregation, context, false); + RelBuilder relBuilder = context.relBuilder; + String columnSplitName = + relBuilder.peek().getRowType().getFieldNames().size() > 2 + ? relBuilder.peek().getRowType().getFieldNames().get(1) + : null; + + // If row or column split does not present or limit equals 0, this is the same as `stats agg + // [group by col]` because all truncating is performed on the column split + Integer limit = (Integer) argMap.getOrDefault("limit", Chart.DEFAULT_LIMIT).getValue(); + if (node.getRowSplit() == null || node.getColumnSplit() == null || Objects.equals(limit, 0)) { + // The output of chart is expected to be ordered by row split names + relBuilder.sort(relBuilder.field(0)); + return relBuilder.peek(); + } + + String aggFunctionName = getAggFunctionName(node.getAggregationFunction()); + BuiltinFunctionName aggFunction = + BuiltinFunctionName.of(aggFunctionName) + .orElseThrow( + () -> + new IllegalArgumentException( + StringUtils.format( + "Unrecognized aggregation function: %s", aggFunctionName))); + + // Convert the column split to string if necessary: column split was supposed to be pivoted to + // column names. This guarantees that its type compatibility with useother and usenull + RexNode colSplit = relBuilder.field(1); + String columSplitName = relBuilder.peek().getRowType().getFieldNames().get(1); + if (!SqlTypeUtil.isCharacter(colSplit.getType())) { + colSplit = + relBuilder.alias( + context.rexBuilder.makeCast( + UserDefinedFunctionUtils.NULLABLE_STRING, colSplit, true, true), + columSplitName); + } + relBuilder.project(relBuilder.field(0), colSplit, relBuilder.field(2)); + RelNode aggregated = relBuilder.peek(); + + // 1: column-split, 2: agg + relBuilder.project(relBuilder.field(1), relBuilder.field(2)); + // Make sure that rows who don't have a column split not interfere grand total calculation + relBuilder.filter(relBuilder.isNotNull(relBuilder.field(0))); + final String GRAND_TOTAL_COL = "__grand_total__"; + relBuilder.aggregate( + relBuilder.groupKey(relBuilder.field(0)), + buildAggCall(context.relBuilder, aggFunction, relBuilder.field(1)) + .as(GRAND_TOTAL_COL)); // results: group key, agg calls + RexNode grandTotal = relBuilder.field(GRAND_TOTAL_COL); + // Apply sorting: for MIN/EARLIEST, reverse the top/bottom logic + boolean smallestFirst = + aggFunction == BuiltinFunctionName.MIN || aggFunction == BuiltinFunctionName.EARLIEST; + if (config.top != smallestFirst) { + grandTotal = relBuilder.desc(grandTotal); + } + + // Always set it to null last so that it does not interfere with top / bottom calculation + grandTotal = relBuilder.nullsLast(grandTotal); + RexNode rowNum = + PlanUtils.makeOver( + context, + BuiltinFunctionName.ROW_NUMBER, + relBuilder.literal(1), // dummy expression for row number calculation + List.of(), + List.of(), + List.of(grandTotal), + WindowFrame.toCurrentRow()); + relBuilder.projectPlus(relBuilder.alias(rowNum, PlanUtils.ROW_NUMBER_COLUMN_FOR_CHART)); + RelNode ranked = relBuilder.build(); + + relBuilder.push(aggregated); + relBuilder.push(ranked); + + // on column-split = group key + relBuilder.join( + JoinRelType.LEFT, relBuilder.equals(relBuilder.field(2, 0, 1), relBuilder.field(2, 1, 0))); + + RexNode colSplitPostJoin = relBuilder.field(1); + RexNode lteCondition = + relBuilder.call( + SqlStdOperatorTable.LESS_THAN_OR_EQUAL, + relBuilder.field(PlanUtils.ROW_NUMBER_COLUMN_FOR_CHART), + relBuilder.literal(limit)); + if (!config.useOther) { + relBuilder.filter(lteCondition); + } + RexNode nullCondition = relBuilder.isNull(colSplitPostJoin); + + RexNode columnSplitExpr; + if (config.useNull) { + columnSplitExpr = + relBuilder.call( + SqlStdOperatorTable.CASE, + nullCondition, + relBuilder.literal(config.nullStr), + lteCondition, + relBuilder.field(1), // col split + relBuilder.literal(config.otherStr)); + } else { + columnSplitExpr = + relBuilder.call( + SqlStdOperatorTable.CASE, + lteCondition, + relBuilder.field(1), + relBuilder.literal(config.otherStr)); + } + + String aggFieldName = relBuilder.peek().getRowType().getFieldNames().get(2); + relBuilder.project( + relBuilder.field(0), + relBuilder.alias(columnSplitExpr, columnSplitName), + relBuilder.field(2)); + relBuilder.aggregate( + relBuilder.groupKey(relBuilder.field(0), relBuilder.field(1)), + buildAggCall(context.relBuilder, aggFunction, relBuilder.field(2)).as(aggFieldName)); + // The output of chart is expected to be ordered by row and column split names + relBuilder.sort(relBuilder.field(0), relBuilder.field(1)); + return relBuilder.peek(); + } + + private record ChartConfig( + int limit, boolean top, boolean useOther, boolean useNull, String otherStr, String nullStr) { + static ChartConfig fromArguments(ArgumentMap argMap) { + int limit = (Integer) argMap.getOrDefault("limit", Chart.DEFAULT_LIMIT).getValue(); + boolean top = (Boolean) argMap.getOrDefault("top", Chart.DEFAULT_TOP).getValue(); + boolean useOther = + (Boolean) argMap.getOrDefault("useother", Chart.DEFAULT_USE_OTHER).getValue(); + boolean useNull = (Boolean) argMap.getOrDefault("usenull", Chart.DEFAULT_USE_NULL).getValue(); + String otherStr = + (String) argMap.getOrDefault("otherstr", Chart.DEFAULT_OTHER_STR).getValue(); + String nullStr = (String) argMap.getOrDefault("nullstr", Chart.DEFAULT_NULL_STR).getValue(); + return new ChartConfig(limit, top, useOther, useNull, otherStr, nullStr); + } + } + /** Transforms timechart command into SQL-based operations. */ @Override public RelNode visitTimechart( @@ -2384,11 +2546,11 @@ public RelNode visitTimechart( // Extract parameters UnresolvedExpression spanExpr = node.getBinExpression(); - List groupExprList = Arrays.asList(spanExpr); + List groupExprList; // Handle no by field case if (node.getByField() == null) { - String valueFunctionName = getValueFunctionName(node.getAggregateFunction()); + String aggFieldAlias = getAggFieldAlias(node.getAggregateFunction()); // Create group expression list with just the timestamp span but use a different alias // to avoid @timestamp naming conflict @@ -2396,7 +2558,7 @@ public RelNode visitTimechart( simpleGroupExprList.add(new Alias("timestamp", spanExpr)); // Create agg expression list with the aggregate function List simpleAggExprList = - List.of(new Alias(valueFunctionName, node.getAggregateFunction())); + List.of(new Alias(aggFieldAlias, node.getAggregateFunction())); // Create an Aggregation object Aggregation aggregation = new Aggregation( @@ -2411,9 +2573,9 @@ public RelNode visitTimechart( context.relBuilder.push(result); // Reorder fields: timestamp first, then count context.relBuilder.project( - context.relBuilder.field("timestamp"), context.relBuilder.field(valueFunctionName)); + context.relBuilder.field("timestamp"), context.relBuilder.field(aggFieldAlias)); // Rename timestamp to @timestamp - context.relBuilder.rename(List.of("@timestamp", valueFunctionName)); + context.relBuilder.rename(List.of("@timestamp", aggFieldAlias)); context.relBuilder.sort(context.relBuilder.field(0)); return context.relBuilder.peek(); @@ -2422,7 +2584,7 @@ public RelNode visitTimechart( // Extract parameters for byField case UnresolvedExpression byField = node.getByField(); String byFieldName = ((Field) byField).getField().toString(); - String valueFunctionName = getValueFunctionName(node.getAggregateFunction()); + String aggFieldAlias = getAggFieldAlias(node.getAggregateFunction()); int limit = Optional.ofNullable(node.getLimit()).orElse(10); boolean useOther = Optional.ofNullable(node.getUseOther()).orElse(true); @@ -2430,7 +2592,7 @@ public RelNode visitTimechart( try { // Step 1: Initial aggregation - IMPORTANT: order is [spanExpr, byField] groupExprList = Arrays.asList(spanExpr, byField); - aggregateWithTrimming(groupExprList, List.of(node.getAggregateFunction()), context); + aggregateWithTrimming(groupExprList, List.of(node.getAggregateFunction()), context, false); // First rename the timestamp field (2nd to last) to @timestamp List fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); @@ -2449,11 +2611,11 @@ public RelNode visitTimechart( // Handle no limit case - just sort and return with proper field aliases if (limit == 0) { - // Add final projection with proper aliases: [@timestamp, byField, valueFunctionName] + // Add final projection with proper aliases: [@timestamp, byField, aggFieldAlias] context.relBuilder.project( context.relBuilder.alias(context.relBuilder.field(0), "@timestamp"), context.relBuilder.alias(context.relBuilder.field(1), byFieldName), - context.relBuilder.alias(context.relBuilder.field(2), valueFunctionName)); + context.relBuilder.alias(context.relBuilder.field(2), aggFieldAlias)); context.relBuilder.sort(context.relBuilder.field(0), context.relBuilder.field(1)); return context.relBuilder.peek(); } @@ -2463,36 +2625,67 @@ public RelNode visitTimechart( // Step 2: Find top N categories using window function approach (more efficient than separate // aggregation) - RelNode topCategories = buildTopCategoriesQuery(completeResults, limit, context); + String aggFunctionName = getAggFunctionName(node.getAggregateFunction()); + Optional aggFuncNameOptional = BuiltinFunctionName.of(aggFunctionName); + if (aggFuncNameOptional.isEmpty()) { + throw new IllegalArgumentException( + StringUtils.format("Unrecognized aggregation function: %s", aggFunctionName)); + } + BuiltinFunctionName aggFunction = aggFuncNameOptional.get(); + RelNode topCategories = buildTopCategoriesQuery(completeResults, limit, aggFunction, context); // Step 3: Apply OTHER logic with single pass return buildFinalResultWithOther( - completeResults, topCategories, byFieldName, valueFunctionName, useOther, limit, context); + completeResults, + topCategories, + byFieldName, + aggFunction, + aggFieldAlias, + useOther, + limit, + context); } catch (Exception e) { throw new RuntimeException("Error in visitTimechart: " + e.getMessage(), e); } } + private String getAggFunctionName(UnresolvedExpression aggregateFunction) { + if (aggregateFunction instanceof Alias alias) { + return getAggFunctionName(alias.getDelegated()); + } + return ((AggregateFunction) aggregateFunction).getFuncName(); + } + /** Build top categories query - simpler approach that works better with OTHER handling */ private RelNode buildTopCategoriesQuery( - RelNode completeResults, int limit, CalcitePlanContext context) { + RelNode completeResults, + int limit, + BuiltinFunctionName aggFunction, + CalcitePlanContext context) { context.relBuilder.push(completeResults); // Filter out null values when determining top categories - null should not count towards limit context.relBuilder.filter(context.relBuilder.isNotNull(context.relBuilder.field(1))); // Get totals for non-null categories - field positions: 0=@timestamp, 1=byField, 2=value + RexInputRef valueField = context.relBuilder.field(2); + AggCall call = buildAggCall(context.relBuilder, aggFunction, valueField); + context.relBuilder.aggregate( - context.relBuilder.groupKey(context.relBuilder.field(1)), - context.relBuilder.sum(context.relBuilder.field(2)).as("grand_total")); + context.relBuilder.groupKey(context.relBuilder.field(1)), call.as("grand_total")); // Apply sorting and limit to non-null categories only - context.relBuilder.sort(context.relBuilder.desc(context.relBuilder.field("grand_total"))); + RexNode sortField = context.relBuilder.field("grand_total"); + // For MIN and EARLIEST, top results should be the minimum ones + sortField = + aggFunction == BuiltinFunctionName.MIN || aggFunction == BuiltinFunctionName.EARLIEST + ? sortField + : context.relBuilder.desc(sortField); + context.relBuilder.sort(sortField); if (limit > 0) { context.relBuilder.limit(0, limit); } - return context.relBuilder.build(); } @@ -2501,18 +2694,25 @@ private RelNode buildFinalResultWithOther( RelNode completeResults, RelNode topCategories, String byFieldName, - String valueFunctionName, + BuiltinFunctionName aggFunction, + String aggFieldAlias, boolean useOther, int limit, CalcitePlanContext context) { // Use zero-filling for count aggregations, standard result for others - if (valueFunctionName.equals("count")) { + if (aggFieldAlias.equals("count")) { return buildZeroFilledResult( - completeResults, topCategories, byFieldName, valueFunctionName, useOther, limit, context); + completeResults, topCategories, byFieldName, aggFieldAlias, useOther, limit, context); } else { return buildStandardResult( - completeResults, topCategories, byFieldName, valueFunctionName, useOther, context); + completeResults, + topCategories, + byFieldName, + aggFunction, + aggFieldAlias, + useOther, + context); } } @@ -2521,7 +2721,8 @@ private RelNode buildStandardResult( RelNode completeResults, RelNode topCategories, String byFieldName, - String valueFunctionName, + BuiltinFunctionName aggFunctionName, + String aggFieldAlias, boolean useOther, CalcitePlanContext context) { @@ -2544,11 +2745,13 @@ private RelNode buildStandardResult( context.relBuilder.project( context.relBuilder.alias(context.relBuilder.field(0), "@timestamp"), context.relBuilder.alias(categoryExpr, byFieldName), - context.relBuilder.alias(context.relBuilder.field(2), valueFunctionName)); + context.relBuilder.alias(context.relBuilder.field(2), aggFieldAlias)); + RexInputRef valueField = context.relBuilder.field(2); + AggCall aggCall = buildAggCall(context.relBuilder, aggFunctionName, valueField); context.relBuilder.aggregate( context.relBuilder.groupKey(context.relBuilder.field(0), context.relBuilder.field(1)), - context.relBuilder.sum(context.relBuilder.field(2)).as(valueFunctionName)); + aggCall.as(aggFieldAlias)); applyFiltersAndSort(useOther, context); return context.relBuilder.peek(); @@ -2583,7 +2786,7 @@ private RelNode buildZeroFilledResult( RelNode completeResults, RelNode topCategories, String byFieldName, - String valueFunctionName, + String aggFieldAlias, boolean useOther, int limit, CalcitePlanContext context) { @@ -2622,7 +2825,7 @@ private RelNode buildZeroFilledResult( context.relBuilder.cast(context.relBuilder.field(0), SqlTypeName.TIMESTAMP), "@timestamp"), context.relBuilder.alias(context.relBuilder.field(1), byFieldName), - context.relBuilder.alias(context.relBuilder.literal(0), valueFunctionName)); + context.relBuilder.alias(context.relBuilder.literal(0), aggFieldAlias)); RelNode zeroFilledCombinations = context.relBuilder.build(); // Get actual results with OTHER logic applied @@ -2644,7 +2847,7 @@ private RelNode buildZeroFilledResult( context.relBuilder.cast(context.relBuilder.field(0), SqlTypeName.TIMESTAMP), "@timestamp"), context.relBuilder.alias(actualCategoryExpr, byFieldName), - context.relBuilder.alias(context.relBuilder.field(2), valueFunctionName)); + context.relBuilder.alias(context.relBuilder.field(2), aggFieldAlias)); context.relBuilder.aggregate( context.relBuilder.groupKey(context.relBuilder.field(0), context.relBuilder.field(1)), @@ -2659,12 +2862,30 @@ private RelNode buildZeroFilledResult( // Aggregate to combine actual and zero-filled data context.relBuilder.aggregate( context.relBuilder.groupKey(context.relBuilder.field(0), context.relBuilder.field(1)), - context.relBuilder.sum(context.relBuilder.field(2)).as(valueFunctionName)); + context.relBuilder.sum(context.relBuilder.field(2)).as(aggFieldAlias)); applyFiltersAndSort(useOther, context); return context.relBuilder.peek(); } + /** + * Aggregate a field based on a given built-in aggregation function name. + * + *

It is intended for secondary aggregations in timechart and chart commands. Using it + * elsewhere may lead to unintended results. It handles explicitly only MIN, MAX, AVG, COUNT, + * DISTINCT_COUNT, EARLIEST, and LATEST. It sums the results for the rest aggregation types, + * assuming them to be accumulative. + */ + private AggCall buildAggCall( + RelBuilder relBuilder, BuiltinFunctionName aggFunction, RexNode node) { + return switch (aggFunction) { + case MIN, EARLIEST -> relBuilder.min(node); + case MAX, LATEST -> relBuilder.max(node); + case AVG -> relBuilder.avg(node); + default -> relBuilder.sum(node); + }; + } + @Override public RelNode visitTrendline(Trendline node, CalcitePlanContext context) { visitChildren(node, context); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index fefab6d57c..ded7ba541a 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -66,6 +66,7 @@ public interface PlanUtils { String ROW_NUMBER_COLUMN_FOR_MAIN = "_row_number_main_"; String ROW_NUMBER_COLUMN_FOR_SUBSEARCH = "_row_number_subsearch_"; String ROW_NUMBER_COLUMN_FOR_STREAMSTATS = "__stream_seq__"; + String ROW_NUMBER_COLUMN_FOR_CHART = "_row_number_chart_"; static SpanUnit intervalUnitToSpanUnit(IntervalUnit unit) { return switch (unit) { diff --git a/docs/category.json b/docs/category.json index 7ebe643373..f126904da6 100644 --- a/docs/category.json +++ b/docs/category.json @@ -50,6 +50,7 @@ "user/ppl/cmd/streamstats.rst", "user/ppl/cmd/subquery.rst", "user/ppl/cmd/syntax.rst", + "user/ppl/cmd/chart.rst", "user/ppl/cmd/timechart.rst", "user/ppl/cmd/search.rst", "user/ppl/functions/statistical.rst", diff --git a/docs/user/ppl/cmd/chart.rst b/docs/user/ppl/cmd/chart.rst new file mode 100644 index 0000000000..5824ef704b --- /dev/null +++ b/docs/user/ppl/cmd/chart.rst @@ -0,0 +1,211 @@ +===== +chart +===== + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +=========== + +The ``chart`` command transforms search results by applying a statistical aggregation function and optionally grouping the data by one or two fields. The results are suitable for visualization as a two-dimension chart when grouping by two fields, where unique values in the second group key can be pivoted to column names. + +Version +======= +3.4.0 + +Syntax +====== + +.. code-block:: text + + chart + [limit=(top|bottom) ] [useother=] [usenull=] [nullstr=] [otherstr=] + + [ by ] | [over ] [ by ] + +**Parameters:** + +* **limit**: optional. Specifies the number of distinct values to display when using column split. + + * Default: top10 + * Syntax: ``limit=(top|bottom)`` or ``limit=`` (defaults to top) + * When ``limit=K`` is set, the top or bottom K distinct column split values are retained; the additional values are grouped into an "OTHER" category if ``useother`` is not set to false. + * Set limit to 0 to show all distinct values without any limit. + * Use ``limit=topK`` or ``limit=bottomK`` to specify whether to retain the top or bottom K column categories. The ranking is based on the aggregated values for each distinct column-split value. For example, ``chart limit=top3 count() by a b`` retains the 3 most common b categories; ``chart limit=top5 min(value) by a b`` selects the 5 b categories that contains smallest aggregated values. If not specified, top is used by default. + * Only applies when column split presents (by 2 fields or over...by... coexists). + +* **useother**: optional. Controls whether to create an "OTHER" category for distinct column-split values beyond the limit. + + * Default: true + * When set to false, only the top/bottom N distinct column-split values (based on limit) are shown without an "OTHER" category. + * When set to true, distinct values beyond the limit are grouped into an "OTHER" category. + * Only applies when using column split and when there are more distinct column-split values than the limit. + +* **usenull**: optional. Controls whether to group events without a column split (i.e. whose column split is null) into a separate "NULL" category. + + * Default: true + * When ``usenull=false``, events with a null column split are excluded from results. + * When ``usenull=true``, events with a null column split are grouped into a separate "NULL" category. + * ``usenull`` only applies to column split. Null values in the row split are handled in the same way as normal aggregations. + +* **nullstr**: optional. Specifies the category name for rows that do not contain the column split value. + + * Default: "NULL" + * Only applies when ``usenull`` is set to true. + +* **otherstr**: optional. Specifies the category name for the "OTHER" category. + + * Default: "OTHER" + * Only applies when ``useother`` is set to true and there are values beyond the limit. + +* **aggregation_function**: mandatory. The aggregation function to apply to the data. + + * Currently, only a single aggregation function is supported. + * Available functions: aggregation functions supported by the `stats `_ command. + +* **by**: optional. Groups the results by either one field (row split) or two fields (row split and column split) + + * ``limit``, ``useother``, and ``usenull`` apply to the column split + * Results are returned as individual rows for each combination. + * If not specified, the aggregation is performed across all documents. + +* **over...by...**: optional. Alternative syntax for grouping by multiple fields. + + * ``over by `` groups the results by both fields. + * Using ``over`` alone on one field is equivalent to ``by `` + +Notes +===== + +* The fields generated by column splitting are converted to strings so that they are compatible with ``nullstr`` and ``otherstr`` and can be used as column names once pivoted. +* The aggregation metric appears as the last column in the result. Result columns are ordered as: [row-split] [column-split] [aggregation-metrics] + +Examples +======== + +Example 1: Basic aggregation without grouping +--------------------------------------------- + +This example calculates the average balance across all accounts. + +PPL query:: + + os> source=accounts | chart avg(balance) + fetched rows / total rows = 1/1 + +--------------+ + | avg(balance) | + |--------------| + | 20482.25 | + +--------------+ + +Example 2: Group by single field +-------------------------------- + +This example calculates the count of accounts grouped by gender. + +PPL query:: + + os> source=accounts | chart count() by gender + fetched rows / total rows = 2/2 + +--------+---------+ + | gender | count() | + |--------+---------| + | F | 1 | + | M | 3 | + +--------+---------+ + +Example 3: Using over and by for multiple field grouping +-------------------------------------------------------- + +This example shows average balance grouped by both gender and age fields. Note that the age column in the result is converted to string type. + +PPL query:: + + os> source=accounts | chart avg(balance) over gender by age + fetched rows / total rows = 4/4 + +--------+-----+--------------+ + | gender | age | avg(balance) | + |--------+-----+--------------| + | F | 28 | 32838.0 | + | M | 32 | 39225.0 | + | M | 33 | 4180.0 | + | M | 36 | 5686.0 | + +--------+-----+--------------+ + +Example 4: Using basic limit functionality +------------------------------------------ + +This example limits the results to show only the top 1 age group. Note that the age column in the result is converted to string type. + +PPL query:: + + os> source=accounts | chart limit=1 count() over gender by age + fetched rows / total rows = 3/3 + +--------+-------+---------+ + | gender | age | count() | + |--------+-------+---------| + | M | OTHER | 2 | + | M | 33 | 1 | + | F | OTHER | 1 | + +--------+-------+---------+ + +Example 5: Using limit with other parameters +-------------------------------------------- + +This example shows using limit with useother and custom otherstr parameters. + +PPL query:: + + os> source=accounts | chart limit=top1 useother=true otherstr='minor_gender' count() over state by gender + fetched rows / total rows = 4/4 + +-------+--------------+---------+ + | state | gender | count() | + |-------+--------------+---------| + | TN | M | 1 | + | MD | M | 1 | + | VA | minor_gender | 1 | + | IL | M | 1 | + +-------+--------------+---------+ + +Example 6: Using null parameters +-------------------------------- + +This example shows using limit with usenull and custom nullstr parameters. + +PPL query:: + + os> source=accounts | chart usenull=true nullstr='employer not specified' count() over firstname by employer + fetched rows / total rows = 4/4 + +-----------+------------------------+---------+ + | firstname | employer | count() | + |-----------+------------------------+---------| + | Nanette | Quility | 1 | + | Amber | Pyrami | 1 | + | Dale | employer not specified | 1 | + | Hattie | Netagy | 1 | + +-----------+------------------------+---------+ + +Example 7: Using chart command with span +---------------------------------------- + +This example demonstrates using span for grouping age ranges. + +PPL query:: + + os> source=accounts | chart max(balance) by age span=10, gender + fetched rows / total rows = 2/2 + +-----+--------+--------------+ + | age | gender | max(balance) | + |-----+--------+--------------| + | 30 | M | 39225 | + | 20 | F | 32838 | + +-----+--------+--------------+ + +Limitations +=========== +* Only a single aggregation function is supported per chart command. diff --git a/docs/user/ppl/index.rst b/docs/user/ppl/index.rst index 697ec7e2c6..04a3182757 100644 --- a/docs/user/ppl/index.rst +++ b/docs/user/ppl/index.rst @@ -119,6 +119,8 @@ The query start with search command and then flowing a set of command delimited - `reverse command `_ - `table command `_ + + - `chart command `_ - `timechart command `_ diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java new file mode 100644 index 0000000000..46b067ac9d --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java @@ -0,0 +1,317 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OTEL_LOGS; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_TIME_DATA; +import static org.opensearch.sql.util.MatcherUtils.assertJsonEquals; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteChartCommandIT extends PPLIntegTestCase { + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.BANK); + loadIndex(Index.BANK_WITH_NULL_VALUES); + loadIndex(Index.OTELLOGS); + loadIndex(Index.TIME_TEST_DATA); + loadIndex(Index.EVENTS_NULL); + } + + @Test + public void testChartWithSingleGroupKey() throws IOException { + JSONObject result1 = + executeQuery(String.format("source=%s | chart avg(balance) by gender", TEST_INDEX_BANK)); + verifySchema(result1, schema("gender", "string"), schema("avg(balance)", "double")); + verifyDataRows(result1, rows("F", 40488), rows("M", 16377.25)); + JSONObject result2 = + executeQuery(String.format("source=%s | chart avg(balance) over gender", TEST_INDEX_BANK)); + assertJsonEquals(result1.toString(), result2.toString()); + } + + @Test + public void testChartWithMultipleGroupKeys() throws IOException { + JSONObject result1 = + executeQuery( + String.format("source=%s | chart avg(balance) over gender by age", TEST_INDEX_BANK)); + verifySchema( + result1, + schema("gender", "string"), + schema("age", "string"), + schema("avg(balance)", "double")); + verifyDataRows( + result1, + rows("F", "28", 32838), + rows("F", "39", 40540), + rows("M", "32", 39225), + rows("M", "33", 4180), + rows("M", "36", 11052), + rows("F", "34", 48086)); + JSONObject result2 = + executeQuery( + String.format("source=%s | chart avg(balance) by gender, age", TEST_INDEX_BANK)); + assertJsonEquals(result1.toString(), result2.toString()); + } + + @Test + public void testChartCombineOverByWithLimit0() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart limit=0 avg(balance) over state by gender", TEST_INDEX_BANK)); + verifySchema( + result, + schema("state", "string"), + schema("gender", "string"), + schema("avg(balance)", "double")); + verifyDataRows( + result, + rows("IL", "M", 39225.0), + rows("IN", "F", 48086.0), + rows("MD", "M", 4180.0), + rows("PA", "F", 40540.0), + rows("TN", "M", 5686.0), + rows("VA", "F", 32838.0), + rows("WA", "M", 16418.0)); + } + + @Test + public void testChartMaxBalanceByAgeSpan() throws IOException { + JSONObject result = + executeQuery( + String.format("source=%s | chart max(balance) by age span=10", TEST_INDEX_BANK)); + verifySchema(result, schema("age", "int"), schema("max(balance)", "bigint")); + verifyDataRows(result, rows(20, 32838), rows(30, 48086)); + } + + @Test + public void testChartMaxValueOverTimestampSpanWeekByCategory() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart max(value) over timestamp span=1week by category", + TEST_INDEX_TIME_DATA)); + verifySchema( + result, + schema("timestamp", "timestamp"), + schema("category", "string"), + schema("max(value)", "int")); + // Data spans from 2025-07-28 to 2025-08-01, all within same week + verifyDataRows( + result, + rows("2025-07-28 00:00:00", "A", 9367), + rows("2025-07-28 00:00:00", "B", 9521), + rows("2025-07-28 00:00:00", "C", 9187), + rows("2025-07-28 00:00:00", "D", 8736)); + } + + @Test + public void testChartMaxValueOverCategoryByTimestampSpanWeek() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart max(value) over category by timestamp span=1week", + TEST_INDEX_TIME_DATA)); + verifySchema( + result, + schema("category", "string"), + schema("timestamp", "string"), + schema("max(value)", "int")); + // All data within same week span + verifyDataRows( + result, + rows("A", "2025-07-28 00:00:00", 9367), + rows("B", "2025-07-28 00:00:00", 9521), + rows("C", "2025-07-28 00:00:00", 9187), + rows("D", "2025-07-28 00:00:00", 8736)); + } + + @Test + public void testChartMaxValueByTimestampSpanDayAndWeek() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart max(value) by timestamp span=1day, @timestamp span=2weeks", + TEST_INDEX_TIME_DATA)); + // column split are converted to string in order to be compatible with nullstr and otherstr + verifySchema( + result, + schema("timestamp", "timestamp"), + schema("@timestamp", "string"), + schema("max(value)", "int")); + // Data grouped by day spans + verifyDataRows( + result, + rows("2025-07-28 00:00:00", "2025-07-28 00:00:00", 9367), + rows("2025-07-29 00:00:00", "2025-07-28 00:00:00", 9521), + rows("2025-07-30 00:00:00", "2025-07-28 00:00:00", 9234), + rows("2025-07-31 00:00:00", "2025-07-28 00:00:00", 9318), + rows("2025-08-01 00:00:00", "2025-07-28 00:00:00", 9015)); + } + + @Test + public void testChartLimit0WithUseOther() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart limit=0 useother=true otherstr='max_among_other'" + + " max(severityNumber) over flags by severityText", + TEST_INDEX_OTEL_LOGS)); + verifySchema( + result, + schema("flags", "bigint"), + schema("severityText", "string"), + schema("max(severityNumber)", "bigint")); + verifyDataRows( + result, + rows(0, "DEBUG", 5), + rows(0, "DEBUG2", 6), + rows(0, "DEBUG3", 7), + rows(0, "DEBUG4", 8), + rows(0, "ERROR", 17), + rows(0, "ERROR2", 18), + rows(0, "ERROR3", 19), + rows(0, "ERROR4", 20), + rows(0, "FATAL", 21), + rows(0, "FATAL2", 22), + rows(0, "FATAL3", 23), + rows(0, "FATAL4", 24), + rows(0, "INFO", 9), + rows(0, "INFO2", 10), + rows(0, "INFO3", 11), + rows(0, "INFO4", 12), + rows(0, "TRACE2", 2), + rows(0, "TRACE3", 3), + rows(0, "TRACE4", 4), + rows(0, "WARN", 13), + rows(0, "WARN2", 14), + rows(0, "WARN3", 15), + rows(0, "WARN4", 16), + rows(1, "ERROR", 17), + rows(1, "INFO", 9), + rows(1, "TRACE", 1)); + } + + @Test + public void testChartLimitTopWithUseOther() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart limit=top2 useother=true otherstr='max_among_other'" + + " max(severityNumber) over flags by severityText", + TEST_INDEX_OTEL_LOGS)); + verifySchema( + result, + schema("flags", "bigint"), + schema("severityText", "string"), + schema("max(severityNumber)", "bigint")); + verifyDataRows( + result, + rows(1, "max_among_other", 17), + rows(0, "max_among_other", 22), + rows(0, "FATAL3", 23), + rows(0, "FATAL4", 24)); + } + + @Test + public void testChartLimitBottomWithUseOther() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart limit=bottom2 useother=false otherstr='other_small_not_shown'" + + " max(severityNumber) over flags by severityText", + TEST_INDEX_OTEL_LOGS)); + verifySchema( + result, + schema("flags", "bigint"), + schema("severityText", "string"), + schema("max(severityNumber)", "bigint")); + verifyDataRows(result, rows(1, "TRACE", 1), rows(0, "TRACE2", 2)); + } + + @Test + public void testChartLimitTopWithMinAgg() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart limit=top2 min(severityNumber) over flags by severityText", + TEST_INDEX_OTEL_LOGS)); + verifySchema( + result, + schema("flags", "bigint"), + schema("severityText", "string"), + schema("min(severityNumber)", "bigint")); + verifyDataRows( + result, + rows(1, "OTHER", 9), + rows(1, "TRACE", 1), + rows(0, "OTHER", 3), + rows(0, "TRACE2", 2)); + } + + @Test + public void testChartUseNullTrueWithNullStr() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart nullstr='nil' avg(balance) over gender by age span=10", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchema( + result, + schema("gender", "string"), + schema("age", "string"), + schema("avg(balance)", "double")); + verifyDataRows( + result, + rows("M", "30", 21702.5), + rows("F", "30", 48086.0), + rows("F", "20", 32838.0), + rows("F", "nil", null)); + } + + @Test + public void testChartWithNullAndLimit() throws IOException { + JSONObject result = + executeQuery("source=events_null | chart limit=3 count() over @timestamp span=1d by host"); + + verifySchema( + result, + schema("@timestamp", "timestamp"), + schema("host", "string"), + schema("count()", "bigint")); + + verifyDataRows( + result, + rows("2024-07-01 00:00:00", "db-01", 1), + rows("2024-07-01 00:00:00", "web-01", 2), + rows("2024-07-01 00:00:00", "web-02", 2), + rows("2024-07-01 00:00:00", "NULL", 1)); + } + + @Test + public void testChartUseNullFalseWithNullStr() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart usenull=false nullstr='not_shown' count() over gender by age" + + " span=10", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchema( + result, schema("gender", "string"), schema("age", "string"), schema("count()", "bigint")); + verifyDataRows(result, rows("M", "30", 4), rows("F", "30", 1), rows("F", "20", 1)); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 77f3a45cc0..f975bcdc2f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -7,8 +7,10 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_LOGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OTEL_LOGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STRINGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_TIME_DATA; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; @@ -31,6 +33,7 @@ public void init() throws Exception { enableCalcite(); setQueryBucketSize(1000); loadIndex(Index.BANK_WITH_STRING_VALUES); + loadIndex(Index.BANK_WITH_NULL_VALUES); loadIndex(Index.NESTED_SIMPLE); loadIndex(Index.TIME_TEST_DATA); loadIndex(Index.TIME_TEST_DATA2); @@ -1403,6 +1406,65 @@ public void testPushDownMinOrMaxAggOnDerivedField() throws IOException { TEST_INDEX_ACCOUNT))); } + @Test + public void testExplainChartWithSingleGroupKey() throws IOException { + assertYamlEqualsIgnoreId( + loadExpectedPlan("explain_chart_single_group_key.yaml"), + explainQueryYaml( + String.format("source=%s | chart avg(balance) by gender", TEST_INDEX_BANK))); + + assertYamlEqualsIgnoreId( + loadExpectedPlan("explain_chart_with_span.yaml"), + explainQueryYaml( + String.format("source=%s | chart max(balance) by age span=10", TEST_INDEX_BANK))); + + assertYamlEqualsIgnoreId( + loadExpectedPlan("explain_chart_timestamp_span.yaml"), + explainQueryYaml( + String.format( + "source=%s | chart max(value) over timestamp span=1week by category", + TEST_INDEX_TIME_DATA))); + } + + @Test + public void testExplainChartWithMultipleGroupKeys() throws IOException { + String expected = loadExpectedPlan("explain_chart_multiple_group_keys.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format("source=%s | chart avg(balance) over gender by age", TEST_INDEX_BANK))); + } + + @Test + public void testExplainChartWithLimits() throws IOException { + String expected = loadExpectedPlan("explain_chart_with_limit.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format( + "source=%s | chart limit=0 avg(balance) over state by gender", TEST_INDEX_BANK))); + + assertYamlEqualsIgnoreId( + loadExpectedPlan("explain_chart_use_other.yaml"), + explainQueryYaml( + String.format( + "source=%s | chart limit=2 useother=true otherstr='max_among_other'" + + " max(severityNumber) over flags by severityText", + TEST_INDEX_OTEL_LOGS))); + } + + @Test + public void testExplainChartWithNullStr() throws IOException { + String expected = loadExpectedPlan("explain_chart_null_str.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format( + "source=%s | chart limit=10 usenull=true nullstr='nil' avg(balance) over gender by" + + " age span=10", + TEST_INDEX_BANK_WITH_NULL_VALUES))); + } + @Test public void testCasePushdownAsRangeQueryExplain() throws IOException { // CASE 1: Range - Metric diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java index 3b4ca27dab..4d9352e9e8 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java @@ -183,27 +183,13 @@ public void testTimechartWithLimit() throws IOException { schema("host", "string"), schema("avg(cpu_usage)", "double")); - // Verify we have rows for web-01, web-02, and OTHER - boolean foundWeb01 = false; - boolean foundWeb02 = false; - boolean foundOther = false; - - for (int i = 0; i < result.getJSONArray("datarows").length(); i++) { - Object[] row = result.getJSONArray("datarows").getJSONArray(i).toList().toArray(); - String label = (String) row[1]; - - if ("web-01".equals(label)) { - foundWeb01 = true; - } else if ("web-02".equals(label)) { - foundWeb02 = true; - } else if ("OTHER".equals(label)) { - foundOther = true; - } - } - - assertTrue("web-01 not found in results", foundWeb01); - assertTrue("web-02 not found in results", foundWeb02); - assertTrue("OTHER category not found in results", foundOther); + verifyDataRows( + result, + rows("2024-07-01 00:00:00", "web-01", 45.2), + rows("2024-07-01 00:01:00", "OTHER", 38.7), + rows("2024-07-01 00:02:00", "web-01", 55.3), + rows("2024-07-01 00:03:00", "db-01", 42.1), + rows("2024-07-01 00:04:00", "OTHER", 41.8)); } @Test @@ -383,7 +369,7 @@ public void testTimechartWithLimitAndUseOther() throws IOException { if ("OTHER".equals(host)) { foundOther = true; - assertEquals(330.4, cpuUsage, 0.1); + assertEquals(41.3, cpuUsage, 0.1); } else if ("web-03".equals(host)) { foundWeb03 = true; assertEquals(55.3, cpuUsage, 0.1); diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_multiple_group_keys.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_multiple_group_keys.yaml new file mode 100644 index 0000000000..d619f0b89f --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_multiple_group_keys.yaml @@ -0,0 +1,35 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$0], age=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], avg(balance)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(gender=[$0], age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalProject(age=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[AVG($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], gender=[$t0], age=[$t10], avg(balance)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[SAFE_CAST($t1)], gender=[$t0], age=[$t3], avg(balance)=[$t2]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], age=[$t0], __grand_total__=[$t7]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[SAFE_CAST($t0)], expr#3=[IS NOT NULL($t2)], $f0=[$t2], avg(balance)=[$t1], $condition=[$t3]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), PROJECT->[age, avg(balance)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_null_str.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_null_str.yaml new file mode 100644 index 0000000000..2fa1a05e51 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_null_str.yaml @@ -0,0 +1,39 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$0], age=[CASE(IS NULL($1), 'nil', <=($5, 10), $1, 'OTHER')], avg(balance)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(gender=[$0], age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + LogicalProject(age=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[AVG($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['nil'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], gender=[$t0], age=[$t10], avg(balance)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], gender=[$t0], age=[$t4], avg(balance)=[$t10]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[10], expr#4=[null:NULL], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]], PushDownContext=[[PROJECT->[gender, balance, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","balance","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], age=[$t0], __grand_total__=[$t7]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], expr#11=[IS NOT NULL($t4)], age=[$t4], avg(balance)=[$t10], $condition=[$t11]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[10], expr#4=[null:NULL], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]], PushDownContext=[[PROJECT->[gender, balance, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","balance","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_single_group_key.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_single_group_key.yaml new file mode 100644 index 0000000000..8ec7422de3 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_single_group_key.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC]) + LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(balance)=AVG($1)), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_timestamp_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_timestamp_span.yaml new file mode 100644 index 0000000000..1233817e40 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_timestamp_span.yaml @@ -0,0 +1,29 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], max(value)=[MAX($2)]) + LogicalProject(timestamp=[$0], category=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], max(value)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(timestamp=[$1], category=[$0], max(value)=[$2]) + LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + LogicalProject(category=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[MAX($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(category=[$0], max(value)=[$2]) + LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableAggregate(group=[{0, 1}], max(value)=[MAX($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], timestamp=[$t0], category=[$t10], max(value)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},max(value)=MAX($1)), PROJECT->[timestamp0, category, max(value)], SORT->[1]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":true,"missing_order":"last","order":"asc"}}},{"timestamp0":{"date_histogram":{"field":"timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1w"}}}]},"aggregations":{"max(value)":{"max":{"field":"value"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], category=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->IS NOT NULL($1), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},__grand_total__=MAX($1))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"category","boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"__grand_total__":{"max":{"field":"value"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_use_other.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_use_other.yaml new file mode 100644 index 0000000000..7916a31a5d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_use_other.yaml @@ -0,0 +1,28 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$0], severityText=[CASE(IS NULL($1), 'NULL', <=($5, 2), $1, 'max_among_other')], max(severityNumber)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + LogicalProject(severityText=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[MAX($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(severityText=[$1], max(severityNumber)=[$2]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[2], expr#8=[<=($t4, $t7)], expr#9=['max_among_other'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], flags=[$t0], severityText=[$t10], max(severityNumber)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},max(severityNumber)=MAX($2)), SORT->[1]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"severityText":{"terms":{"field":"severityText","missing_bucket":true,"missing_order":"last","order":"asc"}}},{"flags":{"terms":{"field":"flags","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"max(severityNumber)":{"max":{"field":"severityNumber"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], severityText=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[FILTER->IS NOT NULL($7), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},__grand_total__=MAX($2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"severityText","boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"severityText":{"terms":{"field":"severityText","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"__grand_total__":{"max":{"field":"severityNumber"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_limit.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_limit.yaml new file mode 100644 index 0000000000..0b4ea1ad9c --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_limit.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(state=[$9], gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}},{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_span.yaml new file mode 100644 index 0000000000..73039fe4ed --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_span.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC]) + LogicalProject(age=[$0], max(balance)=[$1]) + LogicalAggregate(group=[{1}], max(balance)=[MAX($0)]) + LogicalProject(balance=[$7], age0=[SPAN($10, 10, null:NULL)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},max(balance)=MAX($0)), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age0":{"histogram":{"field":"age","missing_bucket":true,"missing_order":"last","order":"asc","interval":10.0}}}]},"aggregations":{"max(balance)":{"max":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml index f212b4c8bf..a315860aac 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml @@ -2,7 +2,7 @@ calcite: logical: | LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) - LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)]) + LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[AVG($2)]) LogicalProject(@timestamp=[$0], host=[CASE(IS NOT NULL($3), $1, CASE(IS NULL($1), null:NULL, 'OTHER'))], avg(cpu_usage)=[$2]) LogicalJoin(condition=[=($1, $3)], joinType=[left]) LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) @@ -10,7 +10,7 @@ calcite: LogicalProject(host=[$4], cpu_usage=[$7], $f3=[SPAN($1, 1, 'm')]) CalciteLogicalIndexScan(table=[[OpenSearch, events]]) LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10]) - LogicalAggregate(group=[{1}], grand_total=[SUM($2)]) + LogicalAggregate(group=[{1}], grand_total=[AVG($2)]) LogicalFilter(condition=[IS NOT NULL($1)]) LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)]) @@ -19,19 +19,21 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) - EnumerableAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=[IS NULL($t1)], expr#7=[null:NULL], expr#8=['OTHER'], expr#9=[CASE($t6, $t7, $t8)], expr#10=[CASE($t5, $t1, $t9)], @timestamp=[$t0], host=[$t10], avg(cpu_usage)=[$t2]) - EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) - EnumerableSort(sort0=[$1], dir0=[ASC]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], $f2=[$t8]) - EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, cpu_usage, @timestamp]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["host","cpu_usage","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableSort(sort0=[$0], dir0=[ASC]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$1], dir0=[DESC]) - EnumerableAggregate(group=[{0}], grand_total=[SUM($1)]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], host=[$t0], $f2=[$t8]) - EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, cpu_usage, @timestamp], FILTER->IS NOT NULL($0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"host","boost":1.0}},"_source":{"includes":["host","cpu_usage","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(cpu_usage)=[$t8]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=[IS NULL($t1)], expr#7=[null:NULL], expr#8=['OTHER'], expr#9=[CASE($t6, $t7, $t8)], expr#10=[CASE($t5, $t1, $t9)], @timestamp=[$t0], host=[$t10], avg(cpu_usage)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], $f2=[$t8]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, cpu_usage, @timestamp]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["host","cpu_usage","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableLimit(fetch=[10]) + EnumerableSort(sort0=[$1], dir0=[DESC]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], host=[$t0], grand_total=[$t7]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], host=[$t0], $f2=[$t8]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, cpu_usage, @timestamp], FILTER->IS NOT NULL($0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"host","boost":1.0}},"_source":{"includes":["host","cpu_usage","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_group_keys.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_group_keys.yaml new file mode 100644 index 0000000000..c9a7377f62 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_group_keys.yaml @@ -0,0 +1,37 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$0], age=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], avg(balance)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(gender=[$0], age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalProject(age=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[AVG($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], gender=[$t0], age=[$t10], avg(balance)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], gender=[$t0], age=[$t4], avg(balance)=[$t10]) + EnumerableAggregate(group=[{4, 10}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], age=[$t0], __grand_total__=[$t7]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], expr#11=[IS NOT NULL($t4)], age=[$t4], avg(balance)=[$t10], $condition=[$t11]) + EnumerableAggregate(group=[{4, 10}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_groups.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_groups.yaml new file mode 100644 index 0000000000..df3fd8391d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_groups.yaml @@ -0,0 +1,37 @@ +logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT], sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{1, 2}], avg(balance)=[AVG($0)]) + LogicalProject(avg(balance)=[$0], gender=[$1], age=[CASE(IS NULL($2), 'NULL', <=($5, 10), $2, 'OTHER')]) + LogicalJoin(condition=[=($2, $3)], joinType=[left]) + LogicalProject(avg(balance)=[$2], gender=[$0], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalProject(age=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{1}], __grand_total__=[AVG($0)]) + LogicalProject(avg(balance)=[$2], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + +physical: | + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) + EnumerableAggregate(group=[{1, 2}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], age=[$t10]) + EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) + EnumerableSort(sort0=[$2], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], gender=[$t0], age=[$t10]) + EnumerableAggregate(group=[{4, 10}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], age=[$t0], __grand_total__=[$t7]) + EnumerableAggregate(group=[{1}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], age=[$t10]) + EnumerableAggregate(group=[{4, 10}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_null_str.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_null_str.yaml new file mode 100644 index 0000000000..72d29bd369 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_null_str.yaml @@ -0,0 +1,39 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$0], age=[CASE(IS NULL($1), 'nil', <=($5, 10), $1, 'OTHER')], avg(balance)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(gender=[$0], age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + LogicalProject(age=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[AVG($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['nil'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], gender=[$t0], age=[$t10], avg(balance)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], gender=[$t0], age=[$t4], avg(balance)=[$t10]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[10], expr#14=[null:NULL], expr#15=[SPAN($t5, $t13, $t14)], gender=[$t4], balance=[$t3], age0=[$t15]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], age=[$t0], __grand_total__=[$t7]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], expr#11=[IS NOT NULL($t4)], age=[$t4], avg(balance)=[$t10], $condition=[$t11]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[10], expr#14=[null:NULL], expr#15=[SPAN($t5, $t13, $t14)], gender=[$t4], balance=[$t3], age0=[$t15]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group.yaml new file mode 100644 index 0000000000..208fdf9993 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group.yaml @@ -0,0 +1,15 @@ +logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT], sort0=[$0], dir0=[ASC]) + LogicalSort(sort0=[$0], dir0=[ASC]) + LogicalProject(avg(balance)=[$1], gender=[$0]) + LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + +physical: | + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], avg(balance)=[$t8], gender=[$t0]) + EnumerableAggregate(group=[{4}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group_key.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group_key.yaml new file mode 100644 index 0000000000..9370110fa0 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group_key.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC]) + LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], gender=[$t0], avg(balance)=[$t8]) + EnumerableAggregate(group=[{4}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_timestamp_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_timestamp_span.yaml new file mode 100644 index 0000000000..e0826fddce --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_timestamp_span.yaml @@ -0,0 +1,35 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], max(value)=[MAX($2)]) + LogicalProject(timestamp=[$0], category=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], max(value)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(timestamp=[$1], category=[$0], max(value)=[$2]) + LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + LogicalProject(category=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[MAX($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(category=[$0], max(value)=[$2]) + LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableAggregate(group=[{0, 1}], max(value)=[MAX($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], timestamp=[$t0], category=[$t10], max(value)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], timestamp=[$t1], category=[$t0], max(value)=[$t2]) + EnumerableAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + EnumerableCalc(expr#0..9=[{inputs}], expr#10=[1], expr#11=['w'], expr#12=[SPAN($t3, $t10, $t11)], category=[$t1], value=[$t2], timestamp0=[$t12]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], category=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{1}], __grand_total__=[MAX($2)]) + EnumerableCalc(expr#0..9=[{inputs}], expr#10=[IS NOT NULL($t1)], proj#0..9=[{exprs}], $condition=[$t10]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_use_other.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_use_other.yaml new file mode 100644 index 0000000000..8e36c39627 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_use_other.yaml @@ -0,0 +1,34 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$0], severityText=[CASE(IS NULL($1), 'NULL', <=($5, 2), $1, 'max_among_other')], max(severityNumber)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + LogicalProject(severityText=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[MAX($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(severityText=[$1], max(severityNumber)=[$2]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[2], expr#8=[<=($t4, $t7)], expr#9=['max_among_other'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], flags=[$t0], severityText=[$t10], max(severityNumber)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], flags=[$t1], severityText=[$t0], max(severityNumber)=[$t2]) + EnumerableAggregate(group=[{7, 23}], max(severityNumber)=[MAX($163)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], severityText=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{0}], __grand_total__=[MAX($2)]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[IS NOT NULL($t0)], proj#0..2=[{exprs}], $condition=[$t3]) + EnumerableAggregate(group=[{7, 23}], max(severityNumber)=[MAX($163)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_limit.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_limit.yaml new file mode 100644 index 0000000000..b51f7589cf --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_limit.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(state=[$9], gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], state=[$t1], gender=[$t0], avg(balance)=[$t9]) + EnumerableAggregate(group=[{4, 9}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_span.yaml new file mode 100644 index 0000000000..77129b1530 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_span.yaml @@ -0,0 +1,14 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC]) + LogicalProject(age=[$0], max(balance)=[$1]) + LogicalAggregate(group=[{1}], max(balance)=[MAX($0)]) + LogicalProject(balance=[$7], age0=[SPAN($10, 10, null:NULL)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableAggregate(group=[{1}], max(balance)=[MAX($0)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[10], expr#20=[null:NULL], expr#21=[SPAN($t10, $t19, $t20)], balance=[$t7], age0=[$t21]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart.yaml index ae966d7eea..5aa55ca656 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart.yaml @@ -2,7 +2,7 @@ calcite: logical: | LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) - LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)]) + LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[AVG($2)]) LogicalProject(@timestamp=[$0], host=[CASE(IS NOT NULL($3), $1, CASE(IS NULL($1), null:NULL, 'OTHER'))], avg(cpu_usage)=[$2]) LogicalJoin(condition=[=($1, $3)], joinType=[left]) LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) @@ -10,7 +10,7 @@ calcite: LogicalProject(host=[$4], cpu_usage=[$7], $f3=[SPAN($1, 1, 'm')]) CalciteLogicalIndexScan(table=[[OpenSearch, events]]) LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10]) - LogicalAggregate(group=[{1}], grand_total=[SUM($2)]) + LogicalAggregate(group=[{1}], grand_total=[AVG($2)]) LogicalFilter(condition=[IS NOT NULL($1)]) LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)]) @@ -19,19 +19,21 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) - EnumerableAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=[IS NULL($t1)], expr#7=[null:NULL], expr#8=['OTHER'], expr#9=[CASE($t6, $t7, $t8)], expr#10=[CASE($t5, $t1, $t9)], @timestamp=[$t0], host=[$t10], avg(cpu_usage)=[$t2]) - EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) - EnumerableSort(sort0=[$1], dir0=[ASC]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], $f2=[$t8]) - EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], cpu_usage=[$t7], $f3=[$t18]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) - EnumerableSort(sort0=[$0], dir0=[ASC]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$1], dir0=[DESC]) - EnumerableAggregate(group=[{0}], grand_total=[SUM($2)]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], expr#9=[IS NOT NULL($t0)], proj#0..1=[{exprs}], $f2=[$t8], $condition=[$t9]) - EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], cpu_usage=[$t7], $f3=[$t18]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(cpu_usage)=[$t8]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=[IS NULL($t1)], expr#7=[null:NULL], expr#8=['OTHER'], expr#9=[CASE($t6, $t7, $t8)], expr#10=[CASE($t5, $t1, $t9)], @timestamp=[$t0], host=[$t10], avg(cpu_usage)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], $f2=[$t8]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], cpu_usage=[$t7], $f3=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableLimit(fetch=[10]) + EnumerableSort(sort0=[$1], dir0=[DESC]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], host=[$t0], grand_total=[$t7]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], expr#9=[IS NOT NULL($t0)], proj#0..1=[{exprs}], $f2=[$t8], $condition=[$t9]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], cpu_usage=[$t7], $f3=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 511122fa28..070c031eee 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -46,6 +46,7 @@ ML: 'ML'; FILLNULL: 'FILLNULL'; FLATTEN: 'FLATTEN'; TRENDLINE: 'TRENDLINE'; +CHART: 'CHART'; TIMECHART: 'TIMECHART'; APPENDCOL: 'APPENDCOL'; EXPAND: 'EXPAND'; @@ -77,6 +78,7 @@ RIGHT_HINT: 'HINT.RIGHT'; // COMMAND ASSIST KEYWORDS AS: 'AS'; BY: 'BY'; +OVER: 'OVER'; SOURCE: 'SOURCE'; INDEX: 'INDEX'; A: 'A'; @@ -93,6 +95,8 @@ COST: 'COST'; EXTENDED: 'EXTENDED'; OVERRIDE: 'OVERRIDE'; OVERWRITE: 'OVERWRITE'; +TOP_K: 'TOP'[0-9]+; +BOTTOM_K: 'BOTTOM'[0-9]+; // SORT FIELD KEYWORDS // TODO #3180: Fix broken sort functionality @@ -138,6 +142,8 @@ COUNTFIELD: 'COUNTFIELD'; SHOWCOUNT: 'SHOWCOUNT'; LIMIT: 'LIMIT'; USEOTHER: 'USEOTHER'; +OTHERSTR: 'OTHERSTR'; +NULLSTR: 'NULLSTR'; INPUT: 'INPUT'; OUTPUT: 'OUTPUT'; PATH: 'PATH'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 6b98fac02d..1e8a862120 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -77,6 +77,7 @@ commands | flattenCommand | reverseCommand | regexCommand + | chartCommand | timechartCommand | rexCommand | replaceCommand @@ -287,6 +288,28 @@ reverseCommand : REVERSE ; +chartCommand + : CHART chartOptions* statsAggTerm (OVER rowSplit)? (BY columnSplit)? + | CHART chartOptions* statsAggTerm BY rowSplit (COMMA)? columnSplit + ; + +chartOptions + : LIMIT EQUAL integerLiteral + | LIMIT EQUAL (TOP_K | BOTTOM_K) + | USEOTHER EQUAL booleanLiteral + | OTHERSTR EQUAL stringLiteral + | USENULL EQUAL booleanLiteral + | NULLSTR EQUAL stringLiteral + ; + +rowSplit + : fieldExpression binOption* + ; + +columnSplit + : fieldExpression binOption* + ; + timechartCommand : TIMECHART timechartParameter* statsFunction (BY fieldExpression)? ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 6532322916..09e9b4c77e 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -42,6 +42,7 @@ import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.TerminalNode; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.ast.EmptySourcePropagateVisitor; import org.opensearch.sql.ast.dsl.AstDSL; @@ -72,6 +73,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CountBin; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.DefaultBin; @@ -427,16 +429,7 @@ private ReplacePair buildReplacePair(OpenSearchPPLParser.ReplacePairContext ctx) /** Stats command. */ @Override public UnresolvedPlan visitStatsCommand(StatsCommandContext ctx) { - ImmutableList.Builder aggListBuilder = new ImmutableList.Builder<>(); - for (OpenSearchPPLParser.StatsAggTermContext aggCtx : ctx.statsAggTerm()) { - UnresolvedExpression aggExpression = internalVisitExpression(aggCtx.statsFunction()); - String name = - aggCtx.alias == null - ? getTextInQuery(aggCtx) - : StringUtils.unquoteIdentifier(aggCtx.alias.getText()); - Alias alias = new Alias(name, aggExpression); - aggListBuilder.add(alias); - } + List aggregations = parseAggTerms(ctx.statsAggTerm()); List groupList = Optional.ofNullable(ctx.statsByClause()) @@ -461,7 +454,7 @@ public UnresolvedPlan visitStatsCommand(StatsCommandContext ctx) { Aggregation aggregation = new Aggregation( - aggListBuilder.build(), + aggregations, Collections.emptyList(), groupList, span, @@ -609,60 +602,39 @@ public UnresolvedPlan visitBinCommand(BinCommandContext ctx) { UnresolvedExpression aligntime = null; UnresolvedExpression start = null; UnresolvedExpression end = null; - + String errorFormat = "Duplicate %s parameter in bin command"; // Process each bin option: detect duplicates and assign values in one shot for (OpenSearchPPLParser.BinOptionContext option : ctx.binOption()) { + UnresolvedExpression resolvedOption = internalVisitExpression(option); // SPAN parameter if (option.span != null) { - if (!seenParams.add("SPAN")) { - throw new IllegalArgumentException("Duplicate SPAN parameter in bin command"); - } - span = internalVisitExpression(option.span); + checkParamDuplication(seenParams, option.SPAN(), errorFormat); + span = resolvedOption; } - // BINS parameter if (option.bins != null) { - if (!seenParams.add("BINS")) { - throw new IllegalArgumentException("Duplicate BINS parameter in bin command"); - } - bins = Integer.parseInt(option.bins.getText()); + checkParamDuplication(seenParams, option.BINS(), errorFormat); + bins = (Integer) ((Literal) resolvedOption).getValue(); } - // MINSPAN parameter if (option.minspan != null) { - if (!seenParams.add("MINSPAN")) { - throw new IllegalArgumentException("Duplicate MINSPAN parameter in bin command"); - } - minspan = internalVisitExpression(option.minspan); + checkParamDuplication(seenParams, option.MINSPAN(), errorFormat); + minspan = resolvedOption; } - // ALIGNTIME parameter if (option.aligntime != null) { - if (!seenParams.add("ALIGNTIME")) { - throw new IllegalArgumentException("Duplicate ALIGNTIME parameter in bin command"); - } - aligntime = - option.aligntime.EARLIEST() != null - ? org.opensearch.sql.ast.dsl.AstDSL.stringLiteral("earliest") - : option.aligntime.LATEST() != null - ? org.opensearch.sql.ast.dsl.AstDSL.stringLiteral("latest") - : internalVisitExpression(option.aligntime.literalValue()); + checkParamDuplication(seenParams, option.ALIGNTIME(), errorFormat); + aligntime = resolvedOption; } - // START parameter if (option.start != null) { - if (!seenParams.add("START")) { - throw new IllegalArgumentException("Duplicate START parameter in bin command"); - } - start = internalVisitExpression(option.start); + checkParamDuplication(seenParams, option.START(), errorFormat); + start = resolvedOption; } - // END parameter if (option.end != null) { - if (!seenParams.add("END")) { - throw new IllegalArgumentException("Duplicate END parameter in bin command"); - } - end = internalVisitExpression(option.end); + checkParamDuplication(seenParams, option.END(), errorFormat); + end = resolvedOption; } } @@ -691,6 +663,14 @@ public UnresolvedPlan visitBinCommand(BinCommandContext ctx) { } } + private void checkParamDuplication( + Set seenParams, TerminalNode terminalNode, String errorFormat) { + String paramName = terminalNode.getText(); + if (!seenParams.add(paramName)) { + throw new IllegalArgumentException(StringUtils.format(errorFormat, paramName)); + } + } + /** Sort command. */ @Override public UnresolvedPlan visitSortCommand(SortCommandContext ctx) { @@ -728,6 +708,38 @@ public UnresolvedPlan visitReverseCommand(OpenSearchPPLParser.ReverseCommandCont return new Reverse(); } + /** Chart command. */ + @Override + public UnresolvedPlan visitChartCommand(OpenSearchPPLParser.ChartCommandContext ctx) { + UnresolvedExpression rowSplit = + ctx.rowSplit() == null ? null : internalVisitExpression(ctx.rowSplit()); + UnresolvedExpression columnSplit = + ctx.columnSplit() == null ? null : internalVisitExpression(ctx.columnSplit()); + List arguments = ArgumentFactory.getArgumentList(ctx); + UnresolvedExpression aggFunction = parseAggTerms(List.of(ctx.statsAggTerm())).getFirst(); + return Chart.builder() + .rowSplit(rowSplit) + .columnSplit(columnSplit) + .aggregationFunction(aggFunction) + .arguments(arguments) + .build(); + } + + private List parseAggTerms( + List statsAggTermContexts) { + ImmutableList.Builder aggListBuilder = new ImmutableList.Builder<>(); + for (OpenSearchPPLParser.StatsAggTermContext aggCtx : statsAggTermContexts) { + UnresolvedExpression aggExpression = internalVisitExpression(aggCtx.statsFunction()); + String name = + aggCtx.alias == null + ? getTextInQuery(aggCtx) + : StringUtils.unquoteIdentifier(aggCtx.alias.getText()); + Alias alias = new Alias(name, aggExpression); + aggListBuilder.add(alias); + } + return aggListBuilder.build(); + } + /** Timechart command. */ @Override public UnresolvedPlan visitTimechartCommand(OpenSearchPPLParser.TimechartCommandContext ctx) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 4a5230d356..5dc1bf44d8 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -1030,4 +1030,58 @@ public UnresolvedExpression visitTimeModifierExpression( : SearchComparison.Operator.LESS_OR_EQUAL; return new SearchComparison(implicitTimestampField, operator, osDateMathLiteral); } + + @Override + public UnresolvedExpression visitBinOption(OpenSearchPPLParser.BinOptionContext ctx) { + UnresolvedExpression option; + if (ctx.span != null) { + option = visit(ctx.span); + } else if (ctx.bins != null) { + option = visit(ctx.bins); + } else if (ctx.minspan != null) { + option = visit(ctx.minspan); + } else if (ctx.aligntime != null) { + option = + ctx.aligntime.EARLIEST() != null + ? org.opensearch.sql.ast.dsl.AstDSL.stringLiteral("earliest") + : ctx.aligntime.LATEST() != null + ? org.opensearch.sql.ast.dsl.AstDSL.stringLiteral("latest") + : visit(ctx.aligntime.literalValue()); + } else if (ctx.start != null) { + option = visit(ctx.start); + } else if (ctx.end != null) { + option = visit(ctx.end); + } else { + throw new SyntaxCheckException(StringUtils.format("Unknown bin option: %s", ctx.getText())); + } + return option; + } + + @Override + public UnresolvedExpression visitRowSplit(OpenSearchPPLParser.RowSplitContext ctx) { + // TODO: options ignored for now + Field field = (Field) visit(ctx.fieldExpression()); + for (var option : ctx.binOption()) { + if (option.span != null) { + return AstDSL.alias( + field.getField().toString(), + AstDSL.spanFromSpanLengthLiteral(field, (Literal) visit(option.binSpanValue()))); + } + } + return AstDSL.alias(ctx.fieldExpression().getText(), field); + } + + @Override + public UnresolvedExpression visitColumnSplit(OpenSearchPPLParser.ColumnSplitContext ctx) { + Field field = (Field) visit(ctx.fieldExpression()); + for (var option : ctx.binOption()) { + if (option.span != null) { + return AstDSL.alias( + field.getField().toString(), + AstDSL.spanFromSpanLengthLiteral(field, (Literal) visit(option.binSpanValue()))); + } + } + // TODO: options ignored for now + return AstDSL.alias(ctx.fieldExpression().getText(), field); + } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index acf204e803..41e9e91535 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -11,6 +11,7 @@ import java.util.List; import java.util.Optional; import org.antlr.v4.runtime.ParserRuleContext; +import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; @@ -21,6 +22,7 @@ import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.BooleanLiteralContext; +import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.ChartCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DecimalLiteralContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DedupCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DefaultSortFieldContext; @@ -199,6 +201,37 @@ private static Argument getTypeArgument(OpenSearchPPLParser.SortFieldExpressionC } } + public static List getArgumentList(ChartCommandContext ctx) { + List arguments = new ArrayList<>(); + for (var optionCtx : ctx.chartOptions()) { + if (optionCtx.LIMIT() != null) { + Literal limit; + if (optionCtx.integerLiteral() != null) { + limit = getArgumentValue(optionCtx.integerLiteral()); + } else { + limit = + AstDSL.intLiteral( + Integer.parseInt( + (optionCtx.TOP_K() != null ? optionCtx.TOP_K() : optionCtx.BOTTOM_K()) + .getText() + .replaceAll("[^0-9-]", ""))); + } + arguments.add(new Argument("limit", limit)); + // not specified | top presents -> true; bottom presents -> false + arguments.add(new Argument("top", AstDSL.booleanLiteral(optionCtx.BOTTOM_K() == null))); + } else if (optionCtx.USEOTHER() != null) { + arguments.add(new Argument("useother", getArgumentValue(optionCtx.booleanLiteral()))); + } else if (optionCtx.OTHERSTR() != null) { + arguments.add(new Argument("otherstr", getArgumentValue(optionCtx.stringLiteral()))); + } else if (optionCtx.USENULL() != null) { + arguments.add(new Argument("usenull", getArgumentValue(optionCtx.booleanLiteral()))); + } else if (optionCtx.NULLSTR() != null) { + arguments.add(new Argument("nullstr", getArgumentValue(optionCtx.stringLiteral()))); + } + } + return arguments; + } + /** * Get list of {@link Argument}. * diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 5b599ae162..0971924295 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -56,6 +56,7 @@ import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; import org.opensearch.sql.ast.tree.Bin; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CountBin; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.DefaultBin; @@ -531,6 +532,42 @@ public String visitTimechart(Timechart node, String context) { return StringUtils.format("%s%s", child, timechartCommand.toString()); } + @Override + public String visitChart(Chart node, String context) { + String child = node.getChild().get(0).accept(this, context); + StringBuilder chartCommand = new StringBuilder(); + chartCommand.append(" | chart"); + + for (Argument arg : node.getArguments()) { + String argName = arg.getArgName(); + // Skip the auto-generated "top" parameter that's added when limit is specified + if ("top".equals(argName)) { + continue; + } + if ("limit".equals(argName) || "useother".equals(argName) || "usenull".equals(argName)) { + chartCommand.append(" ").append(argName).append("=").append(MASK_LITERAL); + } else if ("otherstr".equals(argName) || "nullstr".equals(argName)) { + chartCommand.append(" ").append(argName).append("=").append(MASK_LITERAL); + } + } + + chartCommand.append(" ").append(visitExpression(node.getAggregationFunction())); + + if (node.getRowSplit() != null && node.getColumnSplit() != null) { + chartCommand + .append(" by ") + .append(visitExpression(node.getRowSplit())) + .append(" ") + .append(visitExpression(node.getColumnSplit())); + } else if (node.getRowSplit() != null) { + chartCommand.append(" by ").append(visitExpression(node.getRowSplit())); + } else if (node.getColumnSplit() != null) { + chartCommand.append(" by ").append(visitExpression(node.getColumnSplit())); + } + + return StringUtils.format("%s%s", child, chartCommand.toString()); + } + public String visitRex(Rex node, String context) { String child = node.getChild().get(0).accept(this, context); String field = visitExpression(node.getField()); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java new file mode 100644 index 0000000000..2619fce64b --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java @@ -0,0 +1,384 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.DataContext; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.linq4j.Enumerable; +import org.apache.calcite.linq4j.Linq4j; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.schema.ScannableTable; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.test.CalciteAssert; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.Programs; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.junit.Test; +import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; +import org.opensearch.sql.ppl.parser.AstBuilder; + +public class CalcitePPLChartTest extends CalcitePPLAbstractTest { + + public CalcitePPLChartTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Override + protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) { + final SchemaPlus rootSchema = Frameworks.createRootSchema(true); + final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); + // Add events table for chart tests - similar to bank data used in integration tests + ImmutableList rows = + ImmutableList.of( + new Object[] {32838, "F", 28, "VA", java.sql.Timestamp.valueOf("2024-07-01 00:00:00")}, + new Object[] {40540, "F", 39, "PA", java.sql.Timestamp.valueOf("2024-07-01 00:01:00")}, + new Object[] {39225, "M", 32, "IL", java.sql.Timestamp.valueOf("2024-07-01 00:02:00")}, + new Object[] {4180, "M", 33, "MD", java.sql.Timestamp.valueOf("2024-07-01 00:03:00")}, + new Object[] {11052, "M", 36, "WA", java.sql.Timestamp.valueOf("2024-07-01 00:04:00")}, + new Object[] {48086, "F", 34, "IN", java.sql.Timestamp.valueOf("2024-07-01 00:05:00")}); + schema.add("bank", new BankTable(rows)); + + // Add time_data table for span tests + ImmutableList timeRows = + ImmutableList.of( + new Object[] {java.sql.Timestamp.valueOf("2025-07-28 00:00:00"), "A", 9367}, + new Object[] {java.sql.Timestamp.valueOf("2025-07-29 00:00:00"), "B", 9521}, + new Object[] {java.sql.Timestamp.valueOf("2025-07-30 00:00:00"), "C", 9187}, + new Object[] {java.sql.Timestamp.valueOf("2025-07-31 00:00:00"), "D", 8736}, + new Object[] {java.sql.Timestamp.valueOf("2025-08-01 00:00:00"), "A", 9015}); + schema.add("time_data", new TimeDataTable(timeRows)); + + return Frameworks.newConfigBuilder() + .parserConfig(SqlParser.Config.DEFAULT) + .defaultSchema(schema) + .traitDefs((List) null) + .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2)); + } + + @Test + public void testChartWithSingleGroupKey() { + String ppl = "source=bank | chart avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithOverSyntax() { + String ppl = "source=bank | chart avg(balance) over gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithMultipleGroupKeys() { + String ppl = "source=bank | chart avg(balance) over gender by age"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `t1`.`gender`, CASE WHEN `t1`.`age` IS NULL THEN 'NULL' WHEN" + + " `t7`.`_row_number_chart_` <= 10 THEN `t1`.`age` ELSE 'OTHER' END `age`," + + " AVG(`t1`.`avg(balance)`) `avg(balance)`\n" + + "FROM (SELECT `gender`, SAFE_CAST(`age` AS STRING) `age`, AVG(`balance`)" + + " `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`, `age`) `t1`\n" + + "LEFT JOIN (SELECT `age`, AVG(`avg(balance)`) `__grand_total__`, ROW_NUMBER() OVER" + + " (ORDER BY AVG(`avg(balance)`) DESC) `_row_number_chart_`\n" + + "FROM (SELECT SAFE_CAST(`age` AS STRING) `age`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`, `age`) `t4`\n" + + "WHERE `age` IS NOT NULL\n" + + "GROUP BY `age`) `t7` ON `t1`.`age` = `t7`.`age`\n" + + "GROUP BY `t1`.`gender`, CASE WHEN `t1`.`age` IS NULL THEN 'NULL' WHEN" + + " `t7`.`_row_number_chart_` <= 10 THEN `t1`.`age` ELSE 'OTHER' END"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithMultipleGroupKeysAlternativeSyntax() { + String ppl = "source=bank | chart avg(balance) by gender, age"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `t1`.`gender`, CASE WHEN `t1`.`age` IS NULL THEN 'NULL' WHEN" + + " `t7`.`_row_number_chart_` <= 10 THEN `t1`.`age` ELSE 'OTHER' END `age`," + + " AVG(`t1`.`avg(balance)`) `avg(balance)`\n" + + "FROM (SELECT `gender`, SAFE_CAST(`age` AS STRING) `age`, AVG(`balance`)" + + " `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`, `age`) `t1`\n" + + "LEFT JOIN (SELECT `age`, AVG(`avg(balance)`) `__grand_total__`, ROW_NUMBER() OVER" + + " (ORDER BY AVG(`avg(balance)`) DESC) `_row_number_chart_`\n" + + "FROM (SELECT SAFE_CAST(`age` AS STRING) `age`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`, `age`) `t4`\n" + + "WHERE `age` IS NOT NULL\n" + + "GROUP BY `age`) `t7` ON `t1`.`age` = `t7`.`age`\n" + + "GROUP BY `t1`.`gender`, CASE WHEN `t1`.`age` IS NULL THEN 'NULL' WHEN" + + " `t7`.`_row_number_chart_` <= 10 THEN `t1`.`age` ELSE 'OTHER' END"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithLimit() { + String ppl = "source=bank | chart limit=2 avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithLimitZero() { + String ppl = "source=bank | chart limit=0 avg(balance) over state by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `state`, `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `state`, `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithSpan() { + String ppl = "source=bank | chart max(balance) by age span=10"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `SPAN`(`age`, 10, NULL) `age`, MAX(`balance`) `max(balance)`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `SPAN`(`age`, 10, NULL)"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithTimeSpan() { + String ppl = "source=time_data | chart max(value) over timestamp span=1week by category"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `t1`.`timestamp`, CASE WHEN `t1`.`category` IS NULL THEN 'NULL' WHEN" + + " `t7`.`_row_number_chart_` <= 10 THEN `t1`.`category` ELSE 'OTHER' END `category`," + + " MAX(`t1`.`max(value)`) `max(value)`\n" + + "FROM (SELECT `SPAN`(`timestamp`, 1, 'w') `timestamp`, `category`, MAX(`value`)" + + " `max(value)`\n" + + "FROM `scott`.`time_data`\n" + + "GROUP BY `category`, `SPAN`(`timestamp`, 1, 'w')) `t1`\n" + + "LEFT JOIN (SELECT `category`, MAX(`max(value)`) `__grand_total__`, ROW_NUMBER() OVER" + + " (ORDER BY MAX(`max(value)`) DESC) `_row_number_chart_`\n" + + "FROM (SELECT `category`, MAX(`value`) `max(value)`\n" + + "FROM `scott`.`time_data`\n" + + "GROUP BY `category`, `SPAN`(`timestamp`, 1, 'w')) `t4`\n" + + "WHERE `category` IS NOT NULL\n" + + "GROUP BY `category`) `t7` ON `t1`.`category` = `t7`.`category`\n" + + "GROUP BY `t1`.`timestamp`, CASE WHEN `t1`.`category` IS NULL THEN 'NULL' WHEN" + + " `t7`.`_row_number_chart_` <= 10 THEN `t1`.`category` ELSE 'OTHER' END"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithUseOtherTrue() { + String ppl = "source=bank | chart useother=true avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithUseOtherFalse() { + String ppl = "source=bank | chart useother=false limit=2 avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithOtherStr() { + String ppl = "source=bank | chart limit=1 otherstr='other_values' avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithNullStr() { + String ppl = "source=bank | chart nullstr='null_values' avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithUseNull() { + String ppl = "source=bank | chart usenull=false avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "WHERE `gender` IS NOT NULL\n" + + "GROUP BY `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + private UnresolvedPlan parsePPL(String query) { + PPLSyntaxParser parser = new PPLSyntaxParser(); + AstBuilder astBuilder = new AstBuilder(query); + return astBuilder.visit(parser.parse(query)); + } + + @RequiredArgsConstructor + public static class BankTable implements ScannableTable { + private final ImmutableList rows; + + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("balance", SqlTypeName.INTEGER) + .nullable(true) + .add("gender", SqlTypeName.VARCHAR) + .nullable(true) + .add("age", SqlTypeName.INTEGER) + .nullable(true) + .add("state", SqlTypeName.VARCHAR) + .nullable(true) + .add("timestamp", SqlTypeName.TIMESTAMP) + .nullable(true) + .build(); + + @Override + public Enumerable<@Nullable Object[]> scan(DataContext root) { + return Linq4j.asEnumerable(rows); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } + + @RequiredArgsConstructor + public static class TimeDataTable implements ScannableTable { + private final ImmutableList rows; + + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("timestamp", SqlTypeName.TIMESTAMP) + .nullable(true) + .add("category", SqlTypeName.VARCHAR) + .nullable(true) + .add("value", SqlTypeName.INTEGER) + .nullable(true) + .build(); + + @Override + public Enumerable<@Nullable Object[]> scan(DataContext root) { + return Linq4j.asEnumerable(rows); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java index ee6b82f2d8..c3ed1ebcee 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java @@ -260,13 +260,13 @@ public void testTimechartWithSpan1m() { RelNode root = getRelNode(ppl); String expectedSparkSql = "SELECT `t1`.`@timestamp`, CASE WHEN `t7`.`region` IS NOT NULL THEN `t1`.`region` ELSE CASE" - + " WHEN `t1`.`region` IS NULL THEN NULL ELSE 'OTHER' END END `region`, SUM(`t1`.`$f2`)" + + " WHEN `t1`.`region` IS NULL THEN NULL ELSE 'OTHER' END END `region`, AVG(`t1`.`$f2`)" + " `avg(cpu_usage)`\n" + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `region`, AVG(`cpu_usage`)" + " `$f2`\n" + "FROM `scott`.`events`\n" + "GROUP BY `region`, `SPAN`(`@timestamp`, 1, 'm')) `t1`\n" - + "LEFT JOIN (SELECT `region`, SUM(`$f2`) `grand_total`\n" + + "LEFT JOIN (SELECT `region`, AVG(`$f2`) `grand_total`\n" + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `region`, AVG(`cpu_usage`)" + " `$f2`\n" + "FROM `scott`.`events`\n" @@ -297,13 +297,13 @@ public void testTimechartWithLimitAndUseOtherFalse() { RelNode root = getRelNode(ppl); String expectedSparkSql = "SELECT `t1`.`@timestamp`, CASE WHEN `t7`.`host` IS NOT NULL THEN `t1`.`host` ELSE CASE" - + " WHEN `t1`.`host` IS NULL THEN NULL ELSE 'OTHER' END END `host`, SUM(`t1`.`$f2`)" + + " WHEN `t1`.`host` IS NULL THEN NULL ELSE 'OTHER' END END `host`, AVG(`t1`.`$f2`)" + " `avg(cpu_usage)`\n" + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, AVG(`cpu_usage`)" + " `$f2`\n" + "FROM `scott`.`events`\n" + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t1`\n" - + "LEFT JOIN (SELECT `host`, SUM(`$f2`) `grand_total`\n" + + "LEFT JOIN (SELECT `host`, AVG(`$f2`) `grand_total`\n" + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, AVG(`cpu_usage`)" + " `$f2`\n" + "FROM `scott`.`events`\n" diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index a1823e4bef..f1464e3106 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -67,6 +67,7 @@ import org.mockito.Mockito; import org.opensearch.sql.ast.Node; import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.ParseMethod; @@ -74,6 +75,7 @@ import org.opensearch.sql.ast.expression.PatternMode; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.tree.AD; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.RareTopN.CommandType; @@ -1488,4 +1490,78 @@ public void testReplaceCommandWithMultiplePairs() { // Test multiple pattern/replacement pairs plan("source=t | replace 'a' WITH 'A', 'b' WITH 'B' IN field"); } + + @Test + public void testChartCommandBasic() { + assertEqual( + "source=t | chart count() by age", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("age", field("age"))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) + .arguments(emptyList()) + .build()); + } + + @Test + public void testChartCommandWithRowSplit() { + assertEqual( + "source=t | chart count() over status by age", + Chart.builder() + .child(relation("t")) + .rowSplit(alias("status", field("status"))) + .columnSplit(alias("age", field("age"))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) + .arguments(emptyList()) + .build()); + } + + @Test + public void testChartCommandWithOptions() { + assertEqual( + "source=t | chart limit=10 useother=true count() by status", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("status", field("status"))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) + .arguments( + exprList( + argument("limit", intLiteral(10)), + argument("top", booleanLiteral(true)), + argument("useother", booleanLiteral(true)))) + .build()); + } + + @Test + public void testChartCommandWithAllOptions() { + assertEqual( + "source=t | chart limit=top5 useother=false otherstr='OTHER' usenull=true nullstr='NULL'" + + " avg(balance) by gender", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("gender", field("gender"))) + .aggregationFunction(alias("avg(balance)", aggregate("avg", field("balance")))) + .arguments( + exprList( + argument("limit", intLiteral(5)), + argument("top", booleanLiteral(true)), + argument("useother", booleanLiteral(false)), + argument("otherstr", stringLiteral("OTHER")), + argument("usenull", booleanLiteral(true)), + argument("nullstr", stringLiteral("NULL")))) + .build()); + } + + @Test + public void testChartCommandWithBottomLimit() { + assertEqual( + "source=t | chart limit=bottom3 count() by category", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("category", field("category"))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) + .arguments( + exprList(argument("limit", intLiteral(3)), argument("top", booleanLiteral(false)))) + .build()); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index 6b0e0a081f..5d10960ea6 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -14,6 +14,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.allFields; import static org.opensearch.sql.ast.dsl.AstDSL.and; import static org.opensearch.sql.ast.dsl.AstDSL.argument; +import static org.opensearch.sql.ast.dsl.AstDSL.bin; import static org.opensearch.sql.ast.dsl.AstDSL.booleanLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.caseWhen; import static org.opensearch.sql.ast.dsl.AstDSL.cast; @@ -1605,4 +1606,66 @@ public void testVisitSpanLiteral() { .useOther(true) .build()); } + + @Test + public void testBinOptionWithSpan() { + assertEqual( + "source=t | bin age span=10", + bin(relation("t"), field("age"), argument("span", intLiteral(10)))); + } + + @Test + public void testBinOptionWithBins() { + assertEqual( + "source=t | bin age bins=5", + bin(relation("t"), field("age"), argument("bins", intLiteral(5)))); + } + + @Test + public void testBinOptionWithMinspan() { + assertEqual( + "source=t | bin age minspan=100", + bin(relation("t"), field("age"), argument("minspan", intLiteral(100)))); + } + + @Test + public void testBinOptionWithAligntimeEarliest() { + assertEqual( + "source=t | bin age span=10 aligntime=earliest", + bin( + relation("t"), + field("age"), + argument("span", intLiteral(10)), + argument("aligntime", stringLiteral("earliest")))); + } + + @Test + public void testBinOptionWithAligntimeLiteralValue() { + assertEqual( + "source=t | bin age span=10 aligntime=1000", + bin( + relation("t"), + field("age"), + argument("span", intLiteral(10)), + argument("aligntime", intLiteral(1000)))); + } + + @Test + public void testBinOptionWithStartAndEnd() { + assertEqual( + "source=t | bin age bins=10 start=0 end=100", + bin( + relation("t"), + field("age"), + argument("bins", intLiteral(10)), + argument("start", intLiteral(0)), + argument("end", intLiteral(100)))); + } + + @Test + public void testBinOptionWithTimeSpan() { + assertEqual( + "source=t | bin timestamp span=1h", + bin(relation("t"), field("timestamp"), argument("span", stringLiteral("1h")))); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java index adb9ec719e..dc2a9d6606 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java @@ -21,7 +21,9 @@ import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import org.junit.Test; +import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ppl.parser.AstBuilderTest; public class ArgumentFactoryTest extends AstBuilderTest { @@ -100,6 +102,39 @@ public void testSortFieldArgument() { argument("type", stringLiteral("auto")))))); } + @Test + public void testChartCommandArguments() { + assertEqual( + "source=t | chart limit=5 useother=true otherstr='OTHER_VAL' usenull=false" + + " nullstr='NULL_VAL' count() by age", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("age", field("age"))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) + .arguments( + exprList( + argument("limit", intLiteral(5)), + argument("top", booleanLiteral(true)), + argument("useother", booleanLiteral(true)), + argument("otherstr", stringLiteral("OTHER_VAL")), + argument("usenull", booleanLiteral(false)), + argument("nullstr", stringLiteral("NULL_VAL")))) + .build()); + } + + @Test + public void testChartCommandBottomArguments() { + assertEqual( + "source=t | chart limit=bottom3 count() by status", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("status", field("status"))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) + .arguments( + exprList(argument("limit", intLiteral(3)), argument("top", booleanLiteral(false)))) + .build()); + } + @Test public void testNoArgConstructorForArgumentFactoryShouldPass() { new ArgumentFactory(); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 48f6c45b4c..ec87000b5b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -260,6 +260,34 @@ public void testTimechartCommand() { anonymize("source=t | timechart count() by host")); } + @Test + public void testChartCommand() { + assertEquals( + "source=table | chart count(identifier) by identifier identifier", + anonymize("source=t | chart count(age) by gender country")); + } + + @Test + public void testChartCommandWithParameters() { + assertEquals( + "source=table | chart limit=*** useother=*** avg(identifier) by identifier", + anonymize("source=t | chart limit=5 useother=false avg(balance) by state")); + } + + @Test + public void testChartCommandOver() { + assertEquals( + "source=table | chart avg(identifier) by identifier", + anonymize("source=t | chart avg(balance) over gender")); + } + + @Test + public void testChartCommandOverBy() { + assertEquals( + "source=table | chart sum(identifier) by identifier identifier", + anonymize("source=t | chart sum(amount) over gender by age")); + } + // todo, sort order is ignored, it doesn't impact the log analysis. @Test public void testSortCommandWithOptions() {