From 82cac9b1e121f4349ff6012a82e9fa2eccdf2c15 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Wed, 15 Oct 2025 16:55:38 +0800 Subject: [PATCH 01/23] WIP: Make poc implementation for chart command Signed-off-by: Yuanchun Shen --- .../sql/ast/AbstractNodeVisitor.java | 5 + .../org/opensearch/sql/ast/tree/Chart.java | 46 +++++++++ .../sql/calcite/CalciteRelNodeVisitor.java | 93 ++++++++++++++++++- ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 3 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 19 ++++ .../opensearch/sql/ppl/parser/AstBuilder.java | 85 ++++++++++------- .../sql/ppl/parser/AstExpressionBuilder.java | 54 +++++++++++ .../sql/ppl/utils/ArgumentFactory.java | 16 ++++ 8 files changed, 284 insertions(+), 37 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/Chart.java diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index f5d2a1623b3..28d39ca39bc 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -49,6 +49,7 @@ import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; import org.opensearch.sql.ast.tree.Bin; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; @@ -274,6 +275,10 @@ public T visitReverse(Reverse node, C context) { return visitChildren(node, context); } + public T visitChart(Chart node, C context) { + return visitChildren(node, context); + } + public T visitTimechart(Timechart node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java b/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java new file mode 100644 index 00000000000..5d4a036bc70 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java @@ -0,0 +1,46 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +/** AST node represent chart command. */ +@Getter +@ToString +@EqualsAndHashCode(callSuper = false) +@AllArgsConstructor +@lombok.Builder(toBuilder = true) +public class Chart extends UnresolvedPlan { + private UnresolvedPlan child; + private UnresolvedExpression rowSplit; + private UnresolvedExpression columnSplit; + private List aggregationFunctions; + private List arguments; + + @Override + public UnresolvedPlan attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitChart(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index d22dfe391c6..ac665e5e400 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -98,6 +98,7 @@ import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; import org.opensearch.sql.ast.tree.Bin; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; @@ -1071,6 +1072,11 @@ private Pair, List> resolveAttributesForAggregation( @Override public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { + return visitAggregationAndReturnProjection(node, context).getLeft(); + } + + private Pair> visitAggregationAndReturnProjection( + Aggregation node, CalcitePlanContext context) { visitChildren(node, context); List aggExprList = node.getAggExprList(); @@ -1155,7 +1161,7 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { reordered.addAll(aliasedGroupByList); context.relBuilder.project(reordered); - return context.relBuilder.peek(); + return Pair.of(context.relBuilder.peek(), reordered); } private Optional getTimeSpanField(UnresolvedExpression expr) { @@ -2007,6 +2013,90 @@ private String getValueFunctionName(UnresolvedExpression aggregateFunction) { return sb.toString(); } + @Override + public RelNode visitChart(Chart node, CalcitePlanContext context) { + visitChildren(node, context); + ArgumentMap argMap = ArgumentMap.of(node.getArguments()); + List groupExprList = new ArrayList<>(); + UnresolvedExpression span; + if (node.getColumnSplit() instanceof Span && node.getRowSplit() instanceof Span) { + throw new UnsupportedOperationException("It is not supported to have two span splits"); + } else if (node.getRowSplit() instanceof Span) { + if (node.getColumnSplit() != null) { + groupExprList.add(node.getColumnSplit()); + } + span = node.getRowSplit(); + } else if (node.getColumnSplit() instanceof Span) { + if (node.getRowSplit() != null) { + groupExprList.add(node.getRowSplit()); + } + span = node.getColumnSplit(); + } else { + groupExprList.addAll( + Stream.of(node.getRowSplit(), node.getColumnSplit()).filter(Objects::nonNull).toList()); + span = null; + } + Aggregation aggregation = + new Aggregation(node.getAggregationFunctions(), List.of(), groupExprList, span, List.of()); + Pair> aggregated = + visitAggregationAndReturnProjection(aggregation, context); + // If row or column split does not present or limit equals 0, this is the same as `stats agg + // [group by col]` + + Integer limit = + Optional.ofNullable(argMap.get("limit")).map(l -> (Integer) l.getValue()).orElse(10); + Boolean top = + Optional.ofNullable(argMap.get("top")).map(t -> (Boolean) t.getValue()).orElse(true); + if (node.getRowSplit() == null || node.getColumnSplit() == null || Objects.equals(limit, 0)) { + return aggregated.getLeft(); + } + List projected = aggregated.getRight(); + String columSplitName = aggregated.getLeft().getRowType().getFieldNames().getLast(); + RelBuilder relBuilder = context.relBuilder; + // 0: agg; 2: column-split + relBuilder.project(relBuilder.field(0), relBuilder.field(2)); + relBuilder.filter(relBuilder.isNotNull(relBuilder.field(1))); + // 1: column split; 0: agg + relBuilder.aggregate( + relBuilder.groupKey(relBuilder.field(1)), + relBuilder.sum(relBuilder.field(0)).as("__grand_total__")); // results: group key, agg calls + RexNode grandTotal = relBuilder.field("__grand_total__"); + if (top) { + grandTotal = relBuilder.desc(grandTotal); + } + RexNode rowNum = + PlanUtils.makeOver( + context, + BuiltinFunctionName.ROW_NUMBER, + relBuilder.literal(1), + List.of(), + List.of(), + List.of(grandTotal), + WindowFrame.toCurrentRow()); + relBuilder.projectPlus(relBuilder.alias(rowNum, "__row_number__")); + RelNode ranked = relBuilder.build(); + + relBuilder.push(aggregated.getLeft()); + relBuilder.push(ranked); + + // on column-split = group key + relBuilder.join( + JoinRelType.INNER, relBuilder.equals(relBuilder.field(2, 0, 2), relBuilder.field(2, 1, 0))); + RexNode caseExpr = + relBuilder.alias( + relBuilder.call( + SqlStdOperatorTable.CASE, + relBuilder.call( + SqlStdOperatorTable.LESS_THAN_OR_EQUAL, + relBuilder.field("__row_number__"), + relBuilder.literal(limit)), + relBuilder.field(2), + relBuilder.literal("OTHER")), + columSplitName); + relBuilder.project(relBuilder.field(0), relBuilder.field(1), caseExpr); + return relBuilder.peek(); + } + /** Transforms timechart command into SQL-based operations. */ @Override public RelNode visitTimechart( @@ -2124,7 +2214,6 @@ private RelNode buildTopCategoriesQuery( if (limit > 0) { context.relBuilder.limit(0, limit); } - return context.relBuilder.build(); } diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index ba1e4960bb2..51fc5fbc8a1 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -45,6 +45,7 @@ ML: 'ML'; FILLNULL: 'FILLNULL'; FLATTEN: 'FLATTEN'; TRENDLINE: 'TRENDLINE'; +CHART: 'CHART'; TIMECHART: 'TIMECHART'; APPENDCOL: 'APPENDCOL'; EXPAND: 'EXPAND'; @@ -76,6 +77,7 @@ RIGHT_HINT: 'HINT.RIGHT'; // COMMAND ASSIST KEYWORDS AS: 'AS'; BY: 'BY'; +OVER: 'OVER'; SOURCE: 'SOURCE'; INDEX: 'INDEX'; A: 'A'; @@ -92,6 +94,7 @@ COST: 'COST'; EXTENDED: 'EXTENDED'; OVERRIDE: 'OVERRIDE'; OVERWRITE: 'OVERWRITE'; +BOTTOM: 'BOTTOM'; // SORT FIELD KEYWORDS // TODO #3180: Fix broken sort functionality diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 35b81bbd348..162007b4c39 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -77,6 +77,7 @@ commands | flattenCommand | reverseCommand | regexCommand + | chartCommand | timechartCommand | rexCommand | replaceCommand @@ -258,6 +259,24 @@ reverseCommand : REVERSE ; +chartCommand + : CHART chartOptions* statsAggTerm (COMMA statsAggTerm)* (OVER rowSplit)? (BY columnSplit)? + | CHART chartOptions* statsAggTerm (COMMA statsAggTerm)* BY rowSplit (COMMA)? columnSplit + ; + +chartOptions + : LIMIT EQUAL (TOP | BOTTOM)? integerLiteral + | USEOTHER EQUAL booleanLiteral + ; + +rowSplit + : fieldExpression binOption* + ; + +columnSplit + : fieldExpression binOption* + ; + timechartCommand : TIMECHART timechartParameter* statsFunction (BY fieldExpression)? ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index f6ce4b10933..b7eda05944f 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -43,6 +43,7 @@ import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.TerminalNode; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.ast.EmptySourcePropagateVisitor; import org.opensearch.sql.ast.dsl.AstDSL; @@ -51,6 +52,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CountBin; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.DefaultBin; @@ -500,60 +502,39 @@ public UnresolvedPlan visitBinCommand(BinCommandContext ctx) { UnresolvedExpression aligntime = null; UnresolvedExpression start = null; UnresolvedExpression end = null; - + String errorFormat = "Duplicate %s parameter in bin command"; // Process each bin option: detect duplicates and assign values in one shot for (OpenSearchPPLParser.BinOptionContext option : ctx.binOption()) { + UnresolvedExpression resolvedOption = internalVisitExpression(option); // SPAN parameter if (option.span != null) { - if (!seenParams.add("SPAN")) { - throw new IllegalArgumentException("Duplicate SPAN parameter in bin command"); - } - span = internalVisitExpression(option.span); + checkParamDuplication(seenParams, option.SPAN(), errorFormat); + span = resolvedOption; } - // BINS parameter if (option.bins != null) { - if (!seenParams.add("BINS")) { - throw new IllegalArgumentException("Duplicate BINS parameter in bin command"); - } - bins = Integer.parseInt(option.bins.getText()); + checkParamDuplication(seenParams, option.SPAN(), errorFormat); + bins = (Integer) ((Literal) resolvedOption).getValue(); } - // MINSPAN parameter if (option.minspan != null) { - if (!seenParams.add("MINSPAN")) { - throw new IllegalArgumentException("Duplicate MINSPAN parameter in bin command"); - } - minspan = internalVisitExpression(option.minspan); + checkParamDuplication(seenParams, option.MINSPAN(), errorFormat); + minspan = resolvedOption; } - // ALIGNTIME parameter if (option.aligntime != null) { - if (!seenParams.add("ALIGNTIME")) { - throw new IllegalArgumentException("Duplicate ALIGNTIME parameter in bin command"); - } - aligntime = - option.aligntime.EARLIEST() != null - ? org.opensearch.sql.ast.dsl.AstDSL.stringLiteral("earliest") - : option.aligntime.LATEST() != null - ? org.opensearch.sql.ast.dsl.AstDSL.stringLiteral("latest") - : internalVisitExpression(option.aligntime.literalValue()); + checkParamDuplication(seenParams, option.ALIGNTIME(), errorFormat); + aligntime = resolvedOption; } - // START parameter if (option.start != null) { - if (!seenParams.add("START")) { - throw new IllegalArgumentException("Duplicate START parameter in bin command"); - } - start = internalVisitExpression(option.start); + checkParamDuplication(seenParams, option.START(), errorFormat); + start = resolvedOption; } - // END parameter if (option.end != null) { - if (!seenParams.add("END")) { - throw new IllegalArgumentException("Duplicate END parameter in bin command"); - } - end = internalVisitExpression(option.end); + checkParamDuplication(seenParams, option.END(), errorFormat); + end = resolvedOption; } } @@ -582,6 +563,14 @@ public UnresolvedPlan visitBinCommand(BinCommandContext ctx) { } } + private void checkParamDuplication( + Set seenParams, TerminalNode terminalNode, String errorFormat) { + String paramName = terminalNode.getText(); + if (!seenParams.add(paramName)) { + throw new IllegalArgumentException(StringUtils.format(errorFormat, paramName)); + } + } + /** Sort command. */ @Override public UnresolvedPlan visitSortCommand(SortCommandContext ctx) { @@ -617,6 +606,32 @@ public UnresolvedPlan visitReverseCommand(OpenSearchPPLParser.ReverseCommandCont return new Reverse(); } + /** Chart command. */ + @Override + public UnresolvedPlan visitChartCommand(OpenSearchPPLParser.ChartCommandContext ctx) { + UnresolvedExpression rowSplit = + ctx.rowSplit() == null ? null : internalVisitExpression(ctx.rowSplit()); + UnresolvedExpression columnSplit = + ctx.columnSplit() == null ? null : internalVisitExpression(ctx.columnSplit()); + List arguments = ArgumentFactory.getArgumentList(ctx); + ImmutableList.Builder aggListBuilder = new ImmutableList.Builder<>(); + for (OpenSearchPPLParser.StatsAggTermContext aggCtx : ctx.statsAggTerm()) { + UnresolvedExpression aggExpression = internalVisitExpression(aggCtx.statsFunction()); + String name = + aggCtx.alias == null + ? getTextInQuery(aggCtx) + : StringUtils.unquoteIdentifier(aggCtx.alias.getText()); + Alias alias = new Alias(name, aggExpression); + aggListBuilder.add(alias); + } + return Chart.builder() + .rowSplit(rowSplit) + .columnSplit(columnSplit) + .aggregationFunctions(aggListBuilder.build()) + .arguments(arguments) + .build(); + } + /** Timechart command. */ @Override public UnresolvedPlan visitTimechartCommand(OpenSearchPPLParser.TimechartCommandContext ctx) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 9850231463f..b5030a8d37f 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -996,4 +996,58 @@ public UnresolvedExpression visitTimeModifierExpression( : SearchComparison.Operator.LESS_OR_EQUAL; return new SearchComparison(implicitTimestampField, operator, osDateMathLiteral); } + + @Override + public UnresolvedExpression visitBinOption(OpenSearchPPLParser.BinOptionContext ctx) { + UnresolvedExpression option; + if (ctx.span != null) { + option = visit(ctx.span); + } else if (ctx.bins != null) { + option = visit(ctx.bins); + } else if (ctx.minspan != null) { + option = visit(ctx.minspan); + } else if (ctx.aligntime != null) { + option = + ctx.aligntime.EARLIEST() != null + ? org.opensearch.sql.ast.dsl.AstDSL.stringLiteral("earliest") + : ctx.aligntime.LATEST() != null + ? org.opensearch.sql.ast.dsl.AstDSL.stringLiteral("latest") + : visit(ctx.aligntime.literalValue()); + } else if (ctx.start != null) { + option = visit(ctx.start); + } else if (ctx.end != null) { + option = visit(ctx.end); + } else { + throw new SyntaxCheckException(StringUtils.format("Unknown bin option: %s", ctx.getText())); + } + return option; + } + + @Override + public UnresolvedExpression visitRowSplit(OpenSearchPPLParser.RowSplitContext ctx) { + // TODO: options ignored for now + Field field = (Field) visit(ctx.fieldExpression()); + for (var option : ctx.binOption()) { + if (option.span != null) { + return AstDSL.alias( + field.getField().toString(), + AstDSL.spanFromSpanLengthLiteral(field, (Literal) visit(option.binSpanValue()))); + } + } + return AstDSL.alias(ctx.fieldExpression().getText(), field); + } + + @Override + public UnresolvedExpression visitColumnSplit(OpenSearchPPLParser.ColumnSplitContext ctx) { + Field field = (Field) visit(ctx.fieldExpression()); + for (var option : ctx.binOption()) { + if (option.span != null) { + return AstDSL.alias( + field.getField().toString(), + AstDSL.spanFromSpanLengthLiteral(field, (Literal) visit(option.binSpanValue()))); + } + } + // TODO: options ignored for now + return AstDSL.alias(ctx.fieldExpression().getText(), field); + } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index e1d892fdfce..7f9a42a01b5 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -10,6 +10,7 @@ import java.util.Collections; import java.util.List; import org.antlr.v4.runtime.ParserRuleContext; +import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; @@ -19,6 +20,7 @@ import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.BooleanLiteralContext; +import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.ChartCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DecimalLiteralContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DedupCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.FieldsCommandContext; @@ -146,6 +148,20 @@ public static List getArgumentList(TopCommandContext ctx) { : new Argument("showCount", new Literal(true, DataType.BOOLEAN))); } + public static List getArgumentList(ChartCommandContext ctx) { + List arguments = new ArrayList<>(); + for (var optionCtx : ctx.chartOptions()) { + if (optionCtx.LIMIT() != null) { + arguments.add(new Argument("limit", getArgumentValue(optionCtx.integerLiteral()))); + // not specified | top presents -> true; bottom presents -> false + arguments.add(new Argument("top", AstDSL.booleanLiteral(optionCtx.BOTTOM() == null))); + } else if (optionCtx.USEOTHER() != null) { + arguments.add(new Argument("useother", getArgumentValue(optionCtx.booleanLiteral()))); + } + } + return arguments; + } + /** * Get list of {@link Argument}. * From c5e11decfe2074020f7ec245b2ad40d6d04abdc0 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Thu, 16 Oct 2025 11:30:09 +0800 Subject: [PATCH 02/23] Support param useother and otherstr Signed-off-by: Yuanchun Shen --- .../sql/calcite/CalciteRelNodeVisitor.java | 39 +++++++++++++------ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 1 + .../opensearch/sql/ppl/parser/AstBuilder.java | 2 +- .../sql/ppl/utils/ArgumentFactory.java | 2 + 5 files changed, 32 insertions(+), 13 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index ac665e5e400..09ee51da4a3 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2047,6 +2047,10 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { Optional.ofNullable(argMap.get("limit")).map(l -> (Integer) l.getValue()).orElse(10); Boolean top = Optional.ofNullable(argMap.get("top")).map(t -> (Boolean) t.getValue()).orElse(true); + Boolean useOther = + Optional.ofNullable(argMap.get("useother")).map(u -> (Boolean) u.getValue()).orElse(true); + String otherStr = + Optional.ofNullable(argMap.get("otherstr")).map(o -> (String) o.getValue()).orElse("OTHER"); if (node.getRowSplit() == null || node.getColumnSplit() == null || Objects.equals(limit, 0)) { return aggregated.getLeft(); } @@ -2082,18 +2086,29 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { // on column-split = group key relBuilder.join( JoinRelType.INNER, relBuilder.equals(relBuilder.field(2, 0, 2), relBuilder.field(2, 1, 0))); - RexNode caseExpr = - relBuilder.alias( - relBuilder.call( - SqlStdOperatorTable.CASE, - relBuilder.call( - SqlStdOperatorTable.LESS_THAN_OR_EQUAL, - relBuilder.field("__row_number__"), - relBuilder.literal(limit)), - relBuilder.field(2), - relBuilder.literal("OTHER")), - columSplitName); - relBuilder.project(relBuilder.field(0), relBuilder.field(1), caseExpr); + + RexNode condition = + relBuilder.call( + SqlStdOperatorTable.LESS_THAN_OR_EQUAL, + relBuilder.field("__row_number__"), + relBuilder.literal(limit)); + RexNode columnSplitExpr; + if (useOther) { + columnSplitExpr = + relBuilder.call( + SqlStdOperatorTable.CASE, + condition, + relBuilder.field(2), + relBuilder.literal(otherStr)); + } else { + relBuilder.filter(condition); + columnSplitExpr = relBuilder.field(2); + } + + relBuilder.project( + relBuilder.field(0), + relBuilder.field(1), + relBuilder.alias(columnSplitExpr, columSplitName)); return relBuilder.peek(); } diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 51fc5fbc8a1..a6fbc6733db 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -134,6 +134,7 @@ COUNTFIELD: 'COUNTFIELD'; SHOWCOUNT: 'SHOWCOUNT'; LIMIT: 'LIMIT'; USEOTHER: 'USEOTHER'; +OTHERSTR: 'OTHERSTR'; INPUT: 'INPUT'; OUTPUT: 'OUTPUT'; PATH: 'PATH'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 162007b4c39..d1b81989519 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -267,6 +267,7 @@ chartCommand chartOptions : LIMIT EQUAL (TOP | BOTTOM)? integerLiteral | USEOTHER EQUAL booleanLiteral + | OTHERSTR EQUAL stringLiteral ; rowSplit diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index b7eda05944f..8d2d5b63283 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -513,7 +513,7 @@ public UnresolvedPlan visitBinCommand(BinCommandContext ctx) { } // BINS parameter if (option.bins != null) { - checkParamDuplication(seenParams, option.SPAN(), errorFormat); + checkParamDuplication(seenParams, option.BINS(), errorFormat); bins = (Integer) ((Literal) resolvedOption).getValue(); } // MINSPAN parameter diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index 7f9a42a01b5..f62b82cab40 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -157,6 +157,8 @@ public static List getArgumentList(ChartCommandContext ctx) { arguments.add(new Argument("top", AstDSL.booleanLiteral(optionCtx.BOTTOM() == null))); } else if (optionCtx.USEOTHER() != null) { arguments.add(new Argument("useother", getArgumentValue(optionCtx.booleanLiteral()))); + } else if (optionCtx.OTHERSTR() != null) { + arguments.add(new Argument("otherstr", getArgumentValue(optionCtx.stringLiteral()))); } } return arguments; From bcb6c6807406074154186761876765bdbf49dc33 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 17 Oct 2025 18:03:34 +0800 Subject: [PATCH 03/23] Support usenull and nullstr (when both row split and col split present) Signed-off-by: Yuanchun Shen --- .../org/opensearch/sql/analysis/Analyzer.java | 6 +++ .../org/opensearch/sql/ast/tree/Chart.java | 9 ++++ .../sql/calcite/CalciteRelNodeVisitor.java | 53 +++++++++++-------- ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 2 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 2 + .../sql/ppl/utils/ArgumentFactory.java | 4 ++ 6 files changed, 53 insertions(+), 23 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 9f78b245942..297998d028c 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -61,6 +61,7 @@ import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; import org.opensearch.sql.ast.tree.Bin; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; @@ -764,6 +765,11 @@ public LogicalPlan visitSpath(SPath node, AnalysisContext context) { throw getOnlyForCalciteException("Spath"); } + @Override + public LogicalPlan visitChart(Chart node, AnalysisContext context) { + throw getOnlyForCalciteException("Chart"); + } + @Override public LogicalPlan visitTimechart(Timechart node, AnalysisContext context) { throw getOnlyForCalciteException("Timechart"); diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java b/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java index 5d4a036bc70..02e0878e12d 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java @@ -12,7 +12,9 @@ import lombok.Getter; import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.UnresolvedExpression; /** AST node represent chart command. */ @@ -22,6 +24,13 @@ @AllArgsConstructor @lombok.Builder(toBuilder = true) public class Chart extends UnresolvedPlan { + public static final Literal DEFAULT_USE_OTHER = Literal.TRUE; + public static final Literal DEFAULT_OTHER_STR = AstDSL.stringLiteral("OTHER"); + public static final Literal DEFAULT_LIMIT = AstDSL.intLiteral(10); + public static final Literal DEFAULT_USE_NULL = Literal.TRUE; + public static final Literal DEFAULT_NULL_STR = AstDSL.stringLiteral("NULL"); + public static final Literal DEFAULT_TOP = Literal.TRUE; + private UnresolvedPlan child; private UnresolvedExpression rowSplit; private UnresolvedExpression columnSplit; diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 09ee51da4a3..e2acb22d6b2 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2043,23 +2043,25 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { // If row or column split does not present or limit equals 0, this is the same as `stats agg // [group by col]` - Integer limit = - Optional.ofNullable(argMap.get("limit")).map(l -> (Integer) l.getValue()).orElse(10); - Boolean top = - Optional.ofNullable(argMap.get("top")).map(t -> (Boolean) t.getValue()).orElse(true); - Boolean useOther = - Optional.ofNullable(argMap.get("useother")).map(u -> (Boolean) u.getValue()).orElse(true); - String otherStr = - Optional.ofNullable(argMap.get("otherstr")).map(o -> (String) o.getValue()).orElse("OTHER"); + Integer limit = (Integer) argMap.getOrDefault("limit", Chart.DEFAULT_LIMIT).getValue(); if (node.getRowSplit() == null || node.getColumnSplit() == null || Objects.equals(limit, 0)) { return aggregated.getLeft(); } - List projected = aggregated.getRight(); + + Boolean top = (Boolean) argMap.getOrDefault("top", Chart.DEFAULT_TOP).getValue(); + Boolean useOther = + (Boolean) argMap.getOrDefault("useother", Chart.DEFAULT_USE_OTHER).getValue(); + Boolean useNull = (Boolean) argMap.getOrDefault("usenull", Chart.DEFAULT_USE_NULL).getValue(); + String otherStr = (String) argMap.getOrDefault("otherstr", Chart.DEFAULT_OTHER_STR).getValue(); + String nullStr = (String) argMap.getOrDefault("nullstr", Chart.DEFAULT_NULL_STR).getValue(); + String columSplitName = aggregated.getLeft().getRowType().getFieldNames().getLast(); RelBuilder relBuilder = context.relBuilder; // 0: agg; 2: column-split relBuilder.project(relBuilder.field(0), relBuilder.field(2)); - relBuilder.filter(relBuilder.isNotNull(relBuilder.field(1))); + if (!useNull) { + relBuilder.filter(relBuilder.isNotNull(relBuilder.field(1))); + } // 1: column split; 0: agg relBuilder.aggregate( relBuilder.groupKey(relBuilder.field(1)), @@ -2068,11 +2070,13 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { if (top) { grandTotal = relBuilder.desc(grandTotal); } + // Always set it to null last so that it does not interfere with top / bottom calculation + grandTotal = relBuilder.nullsLast(grandTotal); RexNode rowNum = PlanUtils.makeOver( context, BuiltinFunctionName.ROW_NUMBER, - relBuilder.literal(1), + relBuilder.literal(1), // dummy expression for row number calculation List.of(), List.of(), List.of(grandTotal), @@ -2085,26 +2089,29 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { // on column-split = group key relBuilder.join( - JoinRelType.INNER, relBuilder.equals(relBuilder.field(2, 0, 2), relBuilder.field(2, 1, 0))); + JoinRelType.LEFT, relBuilder.equals(relBuilder.field(2, 0, 2), relBuilder.field(2, 1, 0))); - RexNode condition = + RexNode colSplitPostJoin = relBuilder.field(2); + RexNode lteCondition = relBuilder.call( SqlStdOperatorTable.LESS_THAN_OR_EQUAL, relBuilder.field("__row_number__"), relBuilder.literal(limit)); + RexNode nullCondition = relBuilder.isNull(colSplitPostJoin); RexNode columnSplitExpr; - if (useOther) { - columnSplitExpr = - relBuilder.call( - SqlStdOperatorTable.CASE, - condition, - relBuilder.field(2), - relBuilder.literal(otherStr)); - } else { - relBuilder.filter(condition); - columnSplitExpr = relBuilder.field(2); + if (!useOther) { + relBuilder.filter(lteCondition); } + columnSplitExpr = + relBuilder.call( + SqlStdOperatorTable.CASE, + nullCondition, + relBuilder.literal(nullStr), + lteCondition, + relBuilder.field(2), + relBuilder.literal(otherStr)); + relBuilder.project( relBuilder.field(0), relBuilder.field(1), diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index a6fbc6733db..a4b32241621 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -135,6 +135,8 @@ SHOWCOUNT: 'SHOWCOUNT'; LIMIT: 'LIMIT'; USEOTHER: 'USEOTHER'; OTHERSTR: 'OTHERSTR'; +USENULL: 'USENULL'; +NULLSTR: 'NULLSTR'; INPUT: 'INPUT'; OUTPUT: 'OUTPUT'; PATH: 'PATH'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index d1b81989519..77316e00c98 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -268,6 +268,8 @@ chartOptions : LIMIT EQUAL (TOP | BOTTOM)? integerLiteral | USEOTHER EQUAL booleanLiteral | OTHERSTR EQUAL stringLiteral + | USENULL EQUAL booleanLiteral + | NULLSTR EQUAL stringLiteral ; rowSplit diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index f62b82cab40..0b3501d4719 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -159,6 +159,10 @@ public static List getArgumentList(ChartCommandContext ctx) { arguments.add(new Argument("useother", getArgumentValue(optionCtx.booleanLiteral()))); } else if (optionCtx.OTHERSTR() != null) { arguments.add(new Argument("otherstr", getArgumentValue(optionCtx.stringLiteral()))); + } else if (optionCtx.USENULL() != null) { + arguments.add(new Argument("usenull", getArgumentValue(optionCtx.booleanLiteral()))); + } else if (optionCtx.NULLSTR() != null) { + arguments.add(new Argument("nullstr", getArgumentValue(optionCtx.stringLiteral()))); } } return arguments; From 14a7d7c441f40aacdaa05466b24d8ee563aa53cb Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Mon, 20 Oct 2025 17:37:11 +0800 Subject: [PATCH 04/23] Append a final aggregation to merge OTHER categories Signed-off-by: Yuanchun Shen --- .../sql/calcite/CalciteRelNodeVisitor.java | 59 +++++++++++-------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index e2acb22d6b2..b4ea0c7db2f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -1072,11 +1072,6 @@ private Pair, List> resolveAttributesForAggregation( @Override public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { - return visitAggregationAndReturnProjection(node, context).getLeft(); - } - - private Pair> visitAggregationAndReturnProjection( - Aggregation node, CalcitePlanContext context) { visitChildren(node, context); List aggExprList = node.getAggExprList(); @@ -1161,7 +1156,7 @@ private Pair> visitAggregationAndReturnProjection( reordered.addAll(aliasedGroupByList); context.relBuilder.project(reordered); - return Pair.of(context.relBuilder.peek(), reordered); + return context.relBuilder.peek(); } private Optional getTimeSpanField(UnresolvedExpression expr) { @@ -2036,32 +2031,33 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { Stream.of(node.getRowSplit(), node.getColumnSplit()).filter(Objects::nonNull).toList()); span = null; } + Boolean useNull = (Boolean) argMap.getOrDefault("usenull", Chart.DEFAULT_USE_NULL).getValue(); Aggregation aggregation = - new Aggregation(node.getAggregationFunctions(), List.of(), groupExprList, span, List.of()); - Pair> aggregated = - visitAggregationAndReturnProjection(aggregation, context); + new Aggregation( + node.getAggregationFunctions(), + List.of(), + groupExprList, + span, + List.of(new Argument(Argument.BUCKET_NULLABLE, AstDSL.booleanLiteral(useNull)))); + RelNode aggregated = visitAggregation(aggregation, context); + // If row or column split does not present or limit equals 0, this is the same as `stats agg // [group by col]` - Integer limit = (Integer) argMap.getOrDefault("limit", Chart.DEFAULT_LIMIT).getValue(); if (node.getRowSplit() == null || node.getColumnSplit() == null || Objects.equals(limit, 0)) { - return aggregated.getLeft(); + return aggregated; } Boolean top = (Boolean) argMap.getOrDefault("top", Chart.DEFAULT_TOP).getValue(); Boolean useOther = (Boolean) argMap.getOrDefault("useother", Chart.DEFAULT_USE_OTHER).getValue(); - Boolean useNull = (Boolean) argMap.getOrDefault("usenull", Chart.DEFAULT_USE_NULL).getValue(); String otherStr = (String) argMap.getOrDefault("otherstr", Chart.DEFAULT_OTHER_STR).getValue(); String nullStr = (String) argMap.getOrDefault("nullstr", Chart.DEFAULT_NULL_STR).getValue(); - String columSplitName = aggregated.getLeft().getRowType().getFieldNames().getLast(); + String columSplitName = aggregated.getRowType().getFieldNames().getLast(); RelBuilder relBuilder = context.relBuilder; // 0: agg; 2: column-split relBuilder.project(relBuilder.field(0), relBuilder.field(2)); - if (!useNull) { - relBuilder.filter(relBuilder.isNotNull(relBuilder.field(1))); - } // 1: column split; 0: agg relBuilder.aggregate( relBuilder.groupKey(relBuilder.field(1)), @@ -2084,7 +2080,7 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { relBuilder.projectPlus(relBuilder.alias(rowNum, "__row_number__")); RelNode ranked = relBuilder.build(); - relBuilder.push(aggregated.getLeft()); + relBuilder.push(aggregated); relBuilder.push(ranked); // on column-split = group key @@ -2103,19 +2099,32 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { relBuilder.filter(lteCondition); } - columnSplitExpr = - relBuilder.call( - SqlStdOperatorTable.CASE, - nullCondition, - relBuilder.literal(nullStr), - lteCondition, - relBuilder.field(2), - relBuilder.literal(otherStr)); + if (useNull) { + columnSplitExpr = + relBuilder.call( + SqlStdOperatorTable.CASE, + nullCondition, + relBuilder.literal(nullStr), + lteCondition, + relBuilder.field(2), + relBuilder.literal(otherStr)); + } else { + columnSplitExpr = + relBuilder.call( + SqlStdOperatorTable.CASE, + lteCondition, + relBuilder.field(2), + relBuilder.literal(otherStr)); + } + String aggFieldName = relBuilder.peek().getRowType().getFieldNames().getFirst(); relBuilder.project( relBuilder.field(0), relBuilder.field(1), relBuilder.alias(columnSplitExpr, columSplitName)); + relBuilder.aggregate( + relBuilder.groupKey(relBuilder.field(1), relBuilder.field(2)), + relBuilder.sum(relBuilder.field(0)).as(aggFieldName)); return relBuilder.peek(); } From 9d19a449b91d94a5fbb86b3d4027967f1a2b231d Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 17 Oct 2025 15:30:51 +0800 Subject: [PATCH 05/23] Handle common agg functions for OTHER category for timechart Signed-off-by: Yuanchun Shen # Conflicts: # core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java # integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java --- .../sql/calcite/CalciteRelNodeVisitor.java | 107 ++++++++++++---- .../sql/calcite/remote/CalciteExplainIT.java | 5 +- .../calcite/explain_timechart.yaml | 38 +++--- .../explain_timechart_no_pushdown.yaml | 37 ------ .../explain_timechart.yaml | 39 ++++++ .../rest-api-spec/test/issues/4582.yml | 120 ++++++++++++++++++ .../ppl/calcite/CalcitePPLTimechartTest.java | 8 +- 7 files changed, 266 insertions(+), 88 deletions(-) delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_timechart_no_pushdown.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart.yaml create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4582.yml diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index b4ea0c7db2f..6237eb2dfd5 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -1971,7 +1971,7 @@ public RelNode visitFlatten(Flatten node, CalcitePlanContext context) { } /** Helper method to get the function name for proper column naming */ - private String getValueFunctionName(UnresolvedExpression aggregateFunction) { + private String getAggFieldAlias(UnresolvedExpression aggregateFunction) { if (aggregateFunction instanceof Alias) { return ((Alias) aggregateFunction).getName(); } @@ -2141,7 +2141,7 @@ public RelNode visitTimechart( // Handle no by field case if (node.getByField() == null) { - String valueFunctionName = getValueFunctionName(node.getAggregateFunction()); + String aggFieldAlias = getAggFieldAlias(node.getAggregateFunction()); // Create group expression list with just the timestamp span but use a different alias // to avoid @timestamp naming conflict @@ -2149,7 +2149,7 @@ public RelNode visitTimechart( simpleGroupExprList.add(new Alias("timestamp", spanExpr)); // Create agg expression list with the aggregate function List simpleAggExprList = - List.of(new Alias(valueFunctionName, node.getAggregateFunction())); + List.of(new Alias(aggFieldAlias, node.getAggregateFunction())); // Create an Aggregation object Aggregation aggregation = new Aggregation( @@ -2164,9 +2164,9 @@ public RelNode visitTimechart( context.relBuilder.push(result); // Reorder fields: timestamp first, then count context.relBuilder.project( - context.relBuilder.field("timestamp"), context.relBuilder.field(valueFunctionName)); + context.relBuilder.field("timestamp"), context.relBuilder.field(aggFieldAlias)); // Rename timestamp to @timestamp - context.relBuilder.rename(List.of("@timestamp", valueFunctionName)); + context.relBuilder.rename(List.of("@timestamp", aggFieldAlias)); context.relBuilder.sort(context.relBuilder.field(0)); return context.relBuilder.peek(); @@ -2175,7 +2175,7 @@ public RelNode visitTimechart( // Extract parameters for byField case UnresolvedExpression byField = node.getByField(); String byFieldName = ((Field) byField).getField().toString(); - String valueFunctionName = getValueFunctionName(node.getAggregateFunction()); + String aggFieldAlias = getAggFieldAlias(node.getAggregateFunction()); int limit = Optional.ofNullable(node.getLimit()).orElse(10); boolean useOther = Optional.ofNullable(node.getUseOther()).orElse(true); @@ -2202,11 +2202,11 @@ public RelNode visitTimechart( // Handle no limit case - just sort and return with proper field aliases if (limit == 0) { - // Add final projection with proper aliases: [@timestamp, byField, valueFunctionName] + // Add final projection with proper aliases: [@timestamp, byField, aggFieldAlias] context.relBuilder.project( context.relBuilder.alias(context.relBuilder.field(0), "@timestamp"), context.relBuilder.alias(context.relBuilder.field(1), byFieldName), - context.relBuilder.alias(context.relBuilder.field(2), valueFunctionName)); + context.relBuilder.alias(context.relBuilder.field(2), aggFieldAlias)); context.relBuilder.sort(context.relBuilder.field(0), context.relBuilder.field(1)); return context.relBuilder.peek(); } @@ -2216,32 +2216,61 @@ public RelNode visitTimechart( // Step 2: Find top N categories using window function approach (more efficient than separate // aggregation) - RelNode topCategories = buildTopCategoriesQuery(completeResults, limit, context); + String aggFunctionName = getAggFunctionName(node.getAggregateFunction()); + Optional aggFuncNameOptional = BuiltinFunctionName.of(aggFunctionName); + if (aggFuncNameOptional.isEmpty()) { + throw new IllegalArgumentException( + StringUtils.format("Unrecognized aggregation function: %s", aggFunctionName)); + } + BuiltinFunctionName aggFunction = aggFuncNameOptional.get(); + RelNode topCategories = buildTopCategoriesQuery(completeResults, limit, aggFunction, context); // Step 3: Apply OTHER logic with single pass return buildFinalResultWithOther( - completeResults, topCategories, byFieldName, valueFunctionName, useOther, limit, context); + completeResults, + topCategories, + byFieldName, + aggFunction, + aggFieldAlias, + useOther, + limit, + context); } catch (Exception e) { throw new RuntimeException("Error in visitTimechart: " + e.getMessage(), e); } } + private String getAggFunctionName(UnresolvedExpression aggregateFunction) { + if (aggregateFunction instanceof Alias alias) { + return getAggFunctionName(alias.getDelegated()); + } + return ((AggregateFunction) aggregateFunction).getFuncName(); + } + /** Build top categories query - simpler approach that works better with OTHER handling */ private RelNode buildTopCategoriesQuery( - RelNode completeResults, int limit, CalcitePlanContext context) { + RelNode completeResults, + int limit, + BuiltinFunctionName aggFunction, + CalcitePlanContext context) { context.relBuilder.push(completeResults); // Filter out null values when determining top categories - null should not count towards limit context.relBuilder.filter(context.relBuilder.isNotNull(context.relBuilder.field(1))); // Get totals for non-null categories - field positions: 0=@timestamp, 1=byField, 2=value + RexInputRef valueField = context.relBuilder.field(2); + AggCall call = buildAggCall(context.relBuilder, aggFunction, valueField); + context.relBuilder.aggregate( - context.relBuilder.groupKey(context.relBuilder.field(1)), - context.relBuilder.sum(context.relBuilder.field(2)).as("grand_total")); + context.relBuilder.groupKey(context.relBuilder.field(1)), call.as("grand_total")); // Apply sorting and limit to non-null categories only - context.relBuilder.sort(context.relBuilder.desc(context.relBuilder.field("grand_total"))); + RexNode sortField = context.relBuilder.field("grand_total"); + sortField = + aggFunction == BuiltinFunctionName.MIN ? sortField : context.relBuilder.desc(sortField); + context.relBuilder.sort(sortField); if (limit > 0) { context.relBuilder.limit(0, limit); } @@ -2253,18 +2282,25 @@ private RelNode buildFinalResultWithOther( RelNode completeResults, RelNode topCategories, String byFieldName, - String valueFunctionName, + BuiltinFunctionName aggFunction, + String aggFieldAlias, boolean useOther, int limit, CalcitePlanContext context) { // Use zero-filling for count aggregations, standard result for others - if (valueFunctionName.equals("count")) { + if (aggFieldAlias.equals("count")) { return buildZeroFilledResult( - completeResults, topCategories, byFieldName, valueFunctionName, useOther, limit, context); + completeResults, topCategories, byFieldName, aggFieldAlias, useOther, limit, context); } else { return buildStandardResult( - completeResults, topCategories, byFieldName, valueFunctionName, useOther, context); + completeResults, + topCategories, + byFieldName, + aggFunction, + aggFieldAlias, + useOther, + context); } } @@ -2273,7 +2309,8 @@ private RelNode buildStandardResult( RelNode completeResults, RelNode topCategories, String byFieldName, - String valueFunctionName, + BuiltinFunctionName aggFunctionName, + String aggFieldAlias, boolean useOther, CalcitePlanContext context) { @@ -2296,11 +2333,13 @@ private RelNode buildStandardResult( context.relBuilder.project( context.relBuilder.alias(context.relBuilder.field(0), "@timestamp"), context.relBuilder.alias(categoryExpr, byFieldName), - context.relBuilder.alias(context.relBuilder.field(2), valueFunctionName)); + context.relBuilder.alias(context.relBuilder.field(2), aggFieldAlias)); + RexInputRef valueField = context.relBuilder.field(2); + AggCall aggCall = buildAggCall(context.relBuilder, aggFunctionName, valueField); context.relBuilder.aggregate( context.relBuilder.groupKey(context.relBuilder.field(0), context.relBuilder.field(1)), - context.relBuilder.sum(context.relBuilder.field(2)).as(valueFunctionName)); + aggCall.as(aggFieldAlias)); applyFiltersAndSort(useOther, context); return context.relBuilder.peek(); @@ -2335,7 +2374,7 @@ private RelNode buildZeroFilledResult( RelNode completeResults, RelNode topCategories, String byFieldName, - String valueFunctionName, + String aggFieldAlias, boolean useOther, int limit, CalcitePlanContext context) { @@ -2374,7 +2413,7 @@ private RelNode buildZeroFilledResult( context.relBuilder.cast(context.relBuilder.field(0), SqlTypeName.TIMESTAMP), "@timestamp"), context.relBuilder.alias(context.relBuilder.field(1), byFieldName), - context.relBuilder.alias(context.relBuilder.literal(0), valueFunctionName)); + context.relBuilder.alias(context.relBuilder.literal(0), aggFieldAlias)); RelNode zeroFilledCombinations = context.relBuilder.build(); // Get actual results with OTHER logic applied @@ -2396,7 +2435,7 @@ private RelNode buildZeroFilledResult( context.relBuilder.cast(context.relBuilder.field(0), SqlTypeName.TIMESTAMP), "@timestamp"), context.relBuilder.alias(actualCategoryExpr, byFieldName), - context.relBuilder.alias(context.relBuilder.field(2), valueFunctionName)); + context.relBuilder.alias(context.relBuilder.field(2), aggFieldAlias)); context.relBuilder.aggregate( context.relBuilder.groupKey(context.relBuilder.field(0), context.relBuilder.field(1)), @@ -2411,12 +2450,30 @@ private RelNode buildZeroFilledResult( // Aggregate to combine actual and zero-filled data context.relBuilder.aggregate( context.relBuilder.groupKey(context.relBuilder.field(0), context.relBuilder.field(1)), - context.relBuilder.sum(context.relBuilder.field(2)).as(valueFunctionName)); + context.relBuilder.sum(context.relBuilder.field(2)).as(aggFieldAlias)); applyFiltersAndSort(useOther, context); return context.relBuilder.peek(); } + /** + * Aggregate a field based on a given built-in aggregation function name. + * + *

It is intended for secondary aggregations in timechart and chart commands. Using it + * elsewhere may lead to unintended results. It handles explicitly only MIN, MAX, AVG, COUNT, + * DISTINCT_COUNT, EARLIEST, and LATEST. It sums the results for the rest aggregation types, + * assuming them to be accumulative. + */ + private AggCall buildAggCall( + RelBuilder relBuilder, BuiltinFunctionName aggFunction, RexNode node) { + return switch (aggFunction) { + case MIN, EARLIEST -> relBuilder.min(node); + case MAX, LATEST -> relBuilder.max(node); + case AVG -> relBuilder.avg(node); + default -> relBuilder.sum(node); + }; + } + @Override public RelNode visitTrendline(Trendline node, CalcitePlanContext context) { visitChildren(node, context); diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 15087d5d010..fe3e5e9224b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -422,10 +422,7 @@ public void testExplainWithReverse() throws IOException { @Test public void testExplainWithTimechartAvg() throws IOException { var result = explainQueryYaml("source=events | timechart span=1m avg(cpu_usage) by host"); - String expected = - !isPushdownDisabled() - ? loadFromFile("expectedOutput/calcite/explain_timechart.yaml") - : loadFromFile("expectedOutput/calcite/explain_timechart_no_pushdown.yaml"); + String expected = loadExpectedPlan("explain_timechart.yaml"); assertYamlEqualsIgnoreId(expected, result); } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml index f212b4c8bfd..a315860aac9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml @@ -2,7 +2,7 @@ calcite: logical: | LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) - LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)]) + LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[AVG($2)]) LogicalProject(@timestamp=[$0], host=[CASE(IS NOT NULL($3), $1, CASE(IS NULL($1), null:NULL, 'OTHER'))], avg(cpu_usage)=[$2]) LogicalJoin(condition=[=($1, $3)], joinType=[left]) LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) @@ -10,7 +10,7 @@ calcite: LogicalProject(host=[$4], cpu_usage=[$7], $f3=[SPAN($1, 1, 'm')]) CalciteLogicalIndexScan(table=[[OpenSearch, events]]) LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10]) - LogicalAggregate(group=[{1}], grand_total=[SUM($2)]) + LogicalAggregate(group=[{1}], grand_total=[AVG($2)]) LogicalFilter(condition=[IS NOT NULL($1)]) LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)]) @@ -19,19 +19,21 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) - EnumerableAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=[IS NULL($t1)], expr#7=[null:NULL], expr#8=['OTHER'], expr#9=[CASE($t6, $t7, $t8)], expr#10=[CASE($t5, $t1, $t9)], @timestamp=[$t0], host=[$t10], avg(cpu_usage)=[$t2]) - EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) - EnumerableSort(sort0=[$1], dir0=[ASC]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], $f2=[$t8]) - EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, cpu_usage, @timestamp]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["host","cpu_usage","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableSort(sort0=[$0], dir0=[ASC]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$1], dir0=[DESC]) - EnumerableAggregate(group=[{0}], grand_total=[SUM($1)]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], host=[$t0], $f2=[$t8]) - EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, cpu_usage, @timestamp], FILTER->IS NOT NULL($0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"host","boost":1.0}},"_source":{"includes":["host","cpu_usage","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(cpu_usage)=[$t8]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=[IS NULL($t1)], expr#7=[null:NULL], expr#8=['OTHER'], expr#9=[CASE($t6, $t7, $t8)], expr#10=[CASE($t5, $t1, $t9)], @timestamp=[$t0], host=[$t10], avg(cpu_usage)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], $f2=[$t8]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, cpu_usage, @timestamp]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["host","cpu_usage","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableLimit(fetch=[10]) + EnumerableSort(sort0=[$1], dir0=[DESC]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], host=[$t0], grand_total=[$t7]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], host=[$t0], $f2=[$t8]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, cpu_usage, @timestamp], FILTER->IS NOT NULL($0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"host","boost":1.0}},"_source":{"includes":["host","cpu_usage","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart_no_pushdown.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart_no_pushdown.yaml deleted file mode 100644 index ae966d7eea7..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart_no_pushdown.yaml +++ /dev/null @@ -1,37 +0,0 @@ -calcite: - logical: | - LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) - LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)]) - LogicalProject(@timestamp=[$0], host=[CASE(IS NOT NULL($3), $1, CASE(IS NULL($1), null:NULL, 'OTHER'))], avg(cpu_usage)=[$2]) - LogicalJoin(condition=[=($1, $3)], joinType=[left]) - LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) - LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)]) - LogicalProject(host=[$4], cpu_usage=[$7], $f3=[SPAN($1, 1, 'm')]) - CalciteLogicalIndexScan(table=[[OpenSearch, events]]) - LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10]) - LogicalAggregate(group=[{1}], grand_total=[SUM($2)]) - LogicalFilter(condition=[IS NOT NULL($1)]) - LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) - LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)]) - LogicalProject(host=[$4], cpu_usage=[$7], $f3=[SPAN($1, 1, 'm')]) - CalciteLogicalIndexScan(table=[[OpenSearch, events]]) - physical: | - EnumerableLimit(fetch=[10000]) - EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) - EnumerableAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=[IS NULL($t1)], expr#7=[null:NULL], expr#8=['OTHER'], expr#9=[CASE($t6, $t7, $t8)], expr#10=[CASE($t5, $t1, $t9)], @timestamp=[$t0], host=[$t10], avg(cpu_usage)=[$t2]) - EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) - EnumerableSort(sort0=[$1], dir0=[ASC]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], $f2=[$t8]) - EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], cpu_usage=[$t7], $f3=[$t18]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) - EnumerableSort(sort0=[$0], dir0=[ASC]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$1], dir0=[DESC]) - EnumerableAggregate(group=[{0}], grand_total=[SUM($2)]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], expr#9=[IS NOT NULL($t0)], proj#0..1=[{exprs}], $f2=[$t8], $condition=[$t9]) - EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], cpu_usage=[$t7], $f3=[$t18]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart.yaml new file mode 100644 index 00000000000..5aa55ca656b --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart.yaml @@ -0,0 +1,39 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[AVG($2)]) + LogicalProject(@timestamp=[$0], host=[CASE(IS NOT NULL($3), $1, CASE(IS NULL($1), null:NULL, 'OTHER'))], avg(cpu_usage)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) + LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)]) + LogicalProject(host=[$4], cpu_usage=[$7], $f3=[SPAN($1, 1, 'm')]) + CalciteLogicalIndexScan(table=[[OpenSearch, events]]) + LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10]) + LogicalAggregate(group=[{1}], grand_total=[AVG($2)]) + LogicalFilter(condition=[IS NOT NULL($1)]) + LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) + LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)]) + LogicalProject(host=[$4], cpu_usage=[$7], $f3=[SPAN($1, 1, 'm')]) + CalciteLogicalIndexScan(table=[[OpenSearch, events]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(cpu_usage)=[$t8]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=[IS NULL($t1)], expr#7=[null:NULL], expr#8=['OTHER'], expr#9=[CASE($t6, $t7, $t8)], expr#10=[CASE($t5, $t1, $t9)], @timestamp=[$t0], host=[$t10], avg(cpu_usage)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], $f2=[$t8]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], cpu_usage=[$t7], $f3=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableLimit(fetch=[10]) + EnumerableSort(sort0=[$1], dir0=[DESC]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], host=[$t0], grand_total=[$t7]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], expr#9=[IS NOT NULL($t0)], proj#0..1=[{exprs}], $f2=[$t8], $condition=[$t9]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], cpu_usage=[$t7], $f3=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4582.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4582.yml new file mode 100644 index 00000000000..d1ecb079f3a --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4582.yml @@ -0,0 +1,120 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + - do: + indices.create: + index: test_timechart_4582 + body: + mappings: + properties: + "@timestamp": + type: date_nanos + severityNumber: + type: long + severityText: + type: keyword + body: + type: text + - do: + bulk: + index: test_timechart_4582 + refresh: true + body: + - '{"index": {}}' + - '{"@timestamp": "2024-01-15T10:30:04.567890123Z", "severityNumber": 9, "severityText": "INFO", "body": "Info message"}' + - '{"index": {}}' + - '{"@timestamp": "2024-01-15T10:30:05.567890123Z", "severityNumber": 13, "severityText": "WARN", "body": "Warning message"}' + - '{"index": {}}' + - '{"@timestamp": "2024-01-15T10:30:06.567890123Z", "severityNumber": 17, "severityText": "ERROR", "body": "Error message"}' + - '{"index": {}}' + - '{"@timestamp": "2024-01-15T10:30:07.567890123Z", "severityNumber": 21, "severityText": "FATAL", "body": "Fatal message"}' + - '{"index": {}}' + - '{"@timestamp": "2024-01-15T10:30:08.567890123Z", "severityNumber": 24, "severityText": "FATAL4", "body": "Fatal4 message"}' + - '{"index": {}}' + - '{"@timestamp": "2024-01-15T10:30:09.567890123Z", "severityNumber": 23, "severityText": "DEBUG", "body": "Debug message"}' + - '{"index": {}}' + - '{"@timestamp": "2024-01-15T10:30:10.567890123Z", "severityNumber": 20, "severityText": "TRACE", "body": "Trace message"}' + - '{"index": {}}' + - '{"@timestamp": "2024-01-15T10:30:11.567890123Z", "severityNumber": 22, "severityText": "CUSTOM", "body": "Custom message"}' + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + +--- +"timechart max aggregation with limit should not sum OTHER values": + - skip: + features: + - headers + - allowed_warnings + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_timechart_4582 | timechart limit=1 span=10seconds max(severityNumber) by severityText + + - match: { total: 3 } + - match: { "schema": [{"name": "@timestamp", "type": "timestamp"}, {"name": "severityText", "type": "string"}, {"name": "max(severityNumber)", "type": "bigint"}] } + - match: { "datarows": [["2024-01-15 10:30:00", "FATAL4", 24], ["2024-01-15 10:30:00", "OTHER", 23], ["2024-01-15 10:30:10", "OTHER",22]] } + +--- +"timechart min aggregation with limit should not sum OTHER values": + - skip: + features: + - headers + - allowed_warnings + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_timechart_4582 | timechart limit=2 span=1d min(severityNumber) by severityText + + - match: { total: 3 } + - match: { "schema": [{"name": "@timestamp", "type": "timestamp"}, {"name": "severityText", "type": "string"}, {"name": "min(severityNumber)", "type": "bigint"}] } + - match: { "datarows": [["2024-01-15 00:00:00", "INFO", 9], ["2024-01-15 00:00:00", "OTHER", 17], ["2024-01-15 00:00:00", "WARN", 13]] } + +--- +"timechart earliest aggregation with limit should not sum OTHER values": + - skip: + features: + - headers + - allowed_warnings + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_timechart_4582 | timechart limit=2 span=30seconds earliest(@timestamp) by severityText + + - match: { total: 3 } + - match: { "schema": [{"name": "@timestamp", "type": "timestamp"}, {"name": "severityText", "type": "string"}, {"name": "earliest(@timestamp)", "type": "timestamp"}] } + - match: { "datarows": [ + ["2024-01-15 10:30:00", "CUSTOM", "2024-01-15 10:30:11.567890123"], + ["2024-01-15 10:30:00", "OTHER", "2024-01-15 10:30:04.567890123"], + ["2024-01-15 10:30:00", "TRACE", "2024-01-15 10:30:10.567890123"]] } + +--- +"timechart count aggregation with limit should sum OTHER values": + - skip: + features: + - headers + - allowed_warnings + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_timechart_4582 | timechart limit=3 span=1min count() by severityText + + - match: { total: 4 } + - match: { "schema": [{"name": "@timestamp", "type": "timestamp"}, {"name": "severityText", "type": "string"}, {"name": "count", "type": "bigint"}] } + - match: { "datarows": [["2024-01-15 10:30:00", "CUSTOM", 1], ["2024-01-15 10:30:00", "DEBUG", 1], ["2024-01-15 10:30:00", "ERROR", 1], ["2024-01-15 10:30:00", "OTHER", 5]] } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java index 6e03447e243..ca8ddd3e39c 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java @@ -218,13 +218,13 @@ public void testTimechartWithSpan1m() { RelNode root = getRelNode(ppl); String expectedSparkSql = "SELECT `t1`.`@timestamp`, CASE WHEN `t7`.`region` IS NOT NULL THEN `t1`.`region` ELSE CASE" - + " WHEN `t1`.`region` IS NULL THEN NULL ELSE 'OTHER' END END `region`, SUM(`t1`.`$f2`)" + + " WHEN `t1`.`region` IS NULL THEN NULL ELSE 'OTHER' END END `region`, AVG(`t1`.`$f2`)" + " `avg(cpu_usage)`\n" + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `region`, AVG(`cpu_usage`)" + " `$f2`\n" + "FROM `scott`.`events`\n" + "GROUP BY `region`, `SPAN`(`@timestamp`, 1, 'm')) `t1`\n" - + "LEFT JOIN (SELECT `region`, SUM(`$f2`) `grand_total`\n" + + "LEFT JOIN (SELECT `region`, AVG(`$f2`) `grand_total`\n" + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `region`, AVG(`cpu_usage`)" + " `$f2`\n" + "FROM `scott`.`events`\n" @@ -255,13 +255,13 @@ public void testTimechartWithLimitAndUseOtherFalse() { RelNode root = getRelNode(ppl); String expectedSparkSql = "SELECT `t1`.`@timestamp`, CASE WHEN `t7`.`host` IS NOT NULL THEN `t1`.`host` ELSE CASE" - + " WHEN `t1`.`host` IS NULL THEN NULL ELSE 'OTHER' END END `host`, SUM(`t1`.`$f2`)" + + " WHEN `t1`.`host` IS NULL THEN NULL ELSE 'OTHER' END END `host`, AVG(`t1`.`$f2`)" + " `avg(cpu_usage)`\n" + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, AVG(`cpu_usage`)" + " `$f2`\n" + "FROM `scott`.`events`\n" + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t1`\n" - + "LEFT JOIN (SELECT `host`, SUM(`$f2`) `grand_total`\n" + + "LEFT JOIN (SELECT `host`, AVG(`$f2`) `grand_total`\n" + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, AVG(`cpu_usage`)" + " `$f2`\n" + "FROM `scott`.`events`\n" From ddccedce89029e336b57ffdd401d13b7f4410a90 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 17 Oct 2025 16:52:20 +0800 Subject: [PATCH 06/23] Fix timechart IT Signed-off-by: Yuanchun Shen --- .../remote/CalciteTimechartCommandIT.java | 30 +++++-------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java index 3b4ca27dab5..4d9352e9e87 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java @@ -183,27 +183,13 @@ public void testTimechartWithLimit() throws IOException { schema("host", "string"), schema("avg(cpu_usage)", "double")); - // Verify we have rows for web-01, web-02, and OTHER - boolean foundWeb01 = false; - boolean foundWeb02 = false; - boolean foundOther = false; - - for (int i = 0; i < result.getJSONArray("datarows").length(); i++) { - Object[] row = result.getJSONArray("datarows").getJSONArray(i).toList().toArray(); - String label = (String) row[1]; - - if ("web-01".equals(label)) { - foundWeb01 = true; - } else if ("web-02".equals(label)) { - foundWeb02 = true; - } else if ("OTHER".equals(label)) { - foundOther = true; - } - } - - assertTrue("web-01 not found in results", foundWeb01); - assertTrue("web-02 not found in results", foundWeb02); - assertTrue("OTHER category not found in results", foundOther); + verifyDataRows( + result, + rows("2024-07-01 00:00:00", "web-01", 45.2), + rows("2024-07-01 00:01:00", "OTHER", 38.7), + rows("2024-07-01 00:02:00", "web-01", 55.3), + rows("2024-07-01 00:03:00", "db-01", 42.1), + rows("2024-07-01 00:04:00", "OTHER", 41.8)); } @Test @@ -383,7 +369,7 @@ public void testTimechartWithLimitAndUseOther() throws IOException { if ("OTHER".equals(host)) { foundOther = true; - assertEquals(330.4, cpuUsage, 0.1); + assertEquals(41.3, cpuUsage, 0.1); } else if ("web-03".equals(host)) { foundWeb03 = true; assertEquals(55.3, cpuUsage, 0.1); From 226e3c10b215b8ef74db347c9499b59a9334b5e8 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Tue, 21 Oct 2025 12:41:30 +0800 Subject: [PATCH 07/23] Sort earliest results with asc order Signed-off-by: Yuanchun Shen --- .../org/opensearch/sql/calcite/CalciteRelNodeVisitor.java | 5 ++++- .../resources/rest-api-spec/test/issues/4582.yml | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 6237eb2dfd5..aa920f076ad 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2268,8 +2268,11 @@ private RelNode buildTopCategoriesQuery( // Apply sorting and limit to non-null categories only RexNode sortField = context.relBuilder.field("grand_total"); + // For MIN and EARLIEST, top results should be the minimum ones sortField = - aggFunction == BuiltinFunctionName.MIN ? sortField : context.relBuilder.desc(sortField); + aggFunction == BuiltinFunctionName.MIN || aggFunction == BuiltinFunctionName.EARLIEST + ? sortField + : context.relBuilder.desc(sortField); context.relBuilder.sort(sortField); if (limit > 0) { context.relBuilder.limit(0, limit); diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4582.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4582.yml index d1ecb079f3a..27973484d6c 100644 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4582.yml +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4582.yml @@ -98,9 +98,9 @@ teardown: - match: { total: 3 } - match: { "schema": [{"name": "@timestamp", "type": "timestamp"}, {"name": "severityText", "type": "string"}, {"name": "earliest(@timestamp)", "type": "timestamp"}] } - match: { "datarows": [ - ["2024-01-15 10:30:00", "CUSTOM", "2024-01-15 10:30:11.567890123"], - ["2024-01-15 10:30:00", "OTHER", "2024-01-15 10:30:04.567890123"], - ["2024-01-15 10:30:00", "TRACE", "2024-01-15 10:30:10.567890123"]] } + ["2024-01-15 10:30:00", "INFO", "2024-01-15 10:30:04.567890123"], + ["2024-01-15 10:30:00", "OTHER", "2024-01-15 10:30:06.567890123"], + ["2024-01-15 10:30:00", "WARN", "2024-01-15 10:30:05.567890123"]] } --- "timechart count aggregation with limit should sum OTHER values": From 9eb31807dc0796cb7254822d60b6a16c2a68e693 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Wed, 22 Oct 2025 16:15:52 +0800 Subject: [PATCH 08/23] Support non-string fields as column split Signed-off-by: Yuanchun Shen --- .../sql/calcite/CalciteRelNodeVisitor.java | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index aa920f076ad..6747a6e0ec2 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -62,6 +62,7 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeUtil; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilder.AggCall; import org.apache.calcite.util.Holder; @@ -2039,7 +2040,22 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { groupExprList, span, List.of(new Argument(Argument.BUCKET_NULLABLE, AstDSL.booleanLiteral(useNull)))); - RelNode aggregated = visitAggregation(aggregation, context); + visitAggregation(aggregation, context); + + // Convert the column split to string if necessary: column split was supposed to be pivoted to + // column names. This guarantees that its type being compatible with useother and usenull + RelBuilder relBuilder = context.relBuilder; + RexNode colSplit = relBuilder.field(2); + String columSplitName = relBuilder.peek().getRowType().getFieldNames().getLast(); + if (!SqlTypeUtil.isCharacter(colSplit.getType())) { + colSplit = + relBuilder.alias( + context.rexBuilder.makeCast( + UserDefinedFunctionUtils.NULLABLE_STRING, colSplit, true, true), + columSplitName); + } + relBuilder.project(relBuilder.field(0), relBuilder.field(1), colSplit); + RelNode aggregated = relBuilder.peek(); // If row or column split does not present or limit equals 0, this is the same as `stats agg // [group by col]` @@ -2054,8 +2070,6 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { String otherStr = (String) argMap.getOrDefault("otherstr", Chart.DEFAULT_OTHER_STR).getValue(); String nullStr = (String) argMap.getOrDefault("nullstr", Chart.DEFAULT_NULL_STR).getValue(); - String columSplitName = aggregated.getRowType().getFieldNames().getLast(); - RelBuilder relBuilder = context.relBuilder; // 0: agg; 2: column-split relBuilder.project(relBuilder.field(0), relBuilder.field(2)); // 1: column split; 0: agg From d301ff0c003fa0d7e11af7cc726da937a333307f Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Thu, 23 Oct 2025 13:13:53 +0800 Subject: [PATCH 09/23] Fix min/earliest order & fix non-accumulative agg for chart Signed-off-by: Yuanchun Shen --- .../sql/calcite/CalciteRelNodeVisitor.java | 62 +++++++++---------- .../calcite/remote/CalciteChartCommandIT.java | 62 +++++++++++++++++++ 2 files changed, 91 insertions(+), 33 deletions(-) create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 6747a6e0ec2..3ac7d40d307 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2013,37 +2013,35 @@ private String getAggFieldAlias(UnresolvedExpression aggregateFunction) { public RelNode visitChart(Chart node, CalcitePlanContext context) { visitChildren(node, context); ArgumentMap argMap = ArgumentMap.of(node.getArguments()); - List groupExprList = new ArrayList<>(); - UnresolvedExpression span; - if (node.getColumnSplit() instanceof Span && node.getRowSplit() instanceof Span) { - throw new UnsupportedOperationException("It is not supported to have two span splits"); - } else if (node.getRowSplit() instanceof Span) { - if (node.getColumnSplit() != null) { - groupExprList.add(node.getColumnSplit()); - } - span = node.getRowSplit(); - } else if (node.getColumnSplit() instanceof Span) { - if (node.getRowSplit() != null) { - groupExprList.add(node.getRowSplit()); - } - span = node.getColumnSplit(); - } else { - groupExprList.addAll( - Stream.of(node.getRowSplit(), node.getColumnSplit()).filter(Objects::nonNull).toList()); - span = null; - } + List groupExprList = + Stream.of(node.getRowSplit(), node.getColumnSplit()).filter(Objects::nonNull).toList(); Boolean useNull = (Boolean) argMap.getOrDefault("usenull", Chart.DEFAULT_USE_NULL).getValue(); Aggregation aggregation = new Aggregation( node.getAggregationFunctions(), List.of(), groupExprList, - span, + null, List.of(new Argument(Argument.BUCKET_NULLABLE, AstDSL.booleanLiteral(useNull)))); - visitAggregation(aggregation, context); + RelNode aggregated = visitAggregation(aggregation, context); + + // If row or column split does not present or limit equals 0, this is the same as `stats agg + // [group by col]` + Integer limit = (Integer) argMap.getOrDefault("limit", Chart.DEFAULT_LIMIT).getValue(); + if (node.getRowSplit() == null || node.getColumnSplit() == null || Objects.equals(limit, 0)) { + return aggregated; + } + + String aggFunctionName = getAggFunctionName(node.getAggregationFunctions().getFirst()); + Optional aggFuncNameOptional = BuiltinFunctionName.of(aggFunctionName); + if (aggFuncNameOptional.isEmpty()) { + throw new IllegalArgumentException( + StringUtils.format("Unrecognized aggregation function: %s", aggFunctionName)); + } + BuiltinFunctionName aggFunction = aggFuncNameOptional.get(); // Convert the column split to string if necessary: column split was supposed to be pivoted to - // column names. This guarantees that its type being compatible with useother and usenull + // column names. This guarantees that its type compatibility with useother and usenull RelBuilder relBuilder = context.relBuilder; RexNode colSplit = relBuilder.field(2); String columSplitName = relBuilder.peek().getRowType().getFieldNames().getLast(); @@ -2055,14 +2053,7 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { columSplitName); } relBuilder.project(relBuilder.field(0), relBuilder.field(1), colSplit); - RelNode aggregated = relBuilder.peek(); - - // If row or column split does not present or limit equals 0, this is the same as `stats agg - // [group by col]` - Integer limit = (Integer) argMap.getOrDefault("limit", Chart.DEFAULT_LIMIT).getValue(); - if (node.getRowSplit() == null || node.getColumnSplit() == null || Objects.equals(limit, 0)) { - return aggregated; - } + aggregated = relBuilder.peek(); Boolean top = (Boolean) argMap.getOrDefault("top", Chart.DEFAULT_TOP).getValue(); Boolean useOther = @@ -2075,11 +2066,16 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { // 1: column split; 0: agg relBuilder.aggregate( relBuilder.groupKey(relBuilder.field(1)), - relBuilder.sum(relBuilder.field(0)).as("__grand_total__")); // results: group key, agg calls + buildAggCall(context.relBuilder, aggFunction, relBuilder.field(0)) + .as("__grand_total__")); // results: group key, agg calls RexNode grandTotal = relBuilder.field("__grand_total__"); - if (top) { + // Apply sorting: for MIN/EARLIEST, reverse the top/bottom logic + boolean smallestFirst = + aggFunction == BuiltinFunctionName.MIN || aggFunction == BuiltinFunctionName.EARLIEST; + if (top != smallestFirst) { grandTotal = relBuilder.desc(grandTotal); } + // Always set it to null last so that it does not interfere with top / bottom calculation grandTotal = relBuilder.nullsLast(grandTotal); RexNode rowNum = @@ -2138,7 +2134,7 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { relBuilder.alias(columnSplitExpr, columSplitName)); relBuilder.aggregate( relBuilder.groupKey(relBuilder.field(1), relBuilder.field(2)), - relBuilder.sum(relBuilder.field(0)).as(aggFieldName)); + buildAggCall(context.relBuilder, aggFunction, relBuilder.field(0)).as(aggFieldName)); return relBuilder.peek(); } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java new file mode 100644 index 00000000000..b326f4ea073 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java @@ -0,0 +1,62 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +import java.io.IOException; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.util.MatcherUtils.assertJsonEquals; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +public class CalciteChartCommandIT extends PPLIntegTestCase { + @Override + protected void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.BANK); + loadIndex(Index.BANK_WITH_NULL_VALUES); + loadIndex(Index.OTELLOGS); + } + + @Test + public void testChartWithSingleGroupKey() throws IOException { + JSONObject result1 = executeQuery(String.format("source=%s | chart avg(balance) by gender", TEST_INDEX_BANK)); + verifySchema( + result1, + schema("avg(balance)", "double"), + schema("gender", "string")); + verifyDataRows(result1, rows(40488, "F"), rows(16377.25, "M")); + JSONObject result2 = executeQuery(String.format("source=%s | chart avg(balance) over gender", TEST_INDEX_BANK)); + assertJsonEquals(result1.toString(), result2.toString()); + } + + @Test + public void testChartWithMultipleGroupKeys() throws IOException { + JSONObject result1 = executeQuery(String.format("source=%s | chart avg(balance) by gender, age", TEST_INDEX_BANK)); + verifySchema( + result1, + schema("avg(balance)", "double"), + schema("gender", "string"), + schema("age", "string")); + verifyDataRows(result1, rows(40488, "F", "36"), rows(16377.25, "M", 36)); + JSONObject result2 = executeQuery(String.format("source=%s | chart avg(balance) over gender, age", TEST_INDEX_BANK)); + assertJsonEquals(result1.toString(), result2.toString()); + } + + // TODOs: + // Param nullstr: source=opensearch-sql_test_index_bank_with_null_values | eval age = cast(age as string) | chart nullstr='nil' max(account_number) over gender by age + // Param usenull: source=opensearch-sql_test_index_bank_with_null_values | eval age = cast(age as string) | chart usenull=false nullstr='nil' max(account_number) over gender by age + // Param limit = 0: source=bank | chart limit=0 avg(balance) over state by gender + // SPAN: + +} From b10608d87f88864749f42b856599d997630aca4c Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Thu, 23 Oct 2025 16:25:24 +0800 Subject: [PATCH 10/23] Hint non-null in aggregateWithTrimming Signed-off-by: Yuanchun Shen --- .../sql/calcite/CalciteRelNodeVisitor.java | 47 ++++++++++--------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 3ac7d40d307..0c011352539 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -896,12 +896,14 @@ private boolean isCountField(RexCall call) { * @param groupExprList group by expression list * @param aggExprList aggregate expression list * @param context CalcitePlanContext + * @param hintBucketNonNull adda bucket nullable hint on LogicalAggregate if set * @return Pair of (group-by list, field list, aggregate list) */ private Pair, List> aggregateWithTrimming( List groupExprList, List aggExprList, - CalcitePlanContext context) { + CalcitePlanContext context, + boolean hintBucketNonNull) { Pair, List> resolved = resolveAttributesForAggregation(groupExprList, aggExprList, context); List resolvedGroupByList = resolved.getLeft(); @@ -1005,6 +1007,7 @@ private Pair, List> aggregateWithTrimming( List intendedGroupKeyAliases = getGroupKeyNamesAfterAggregation(reResolved.getLeft()); context.relBuilder.aggregate( context.relBuilder.groupKey(reResolved.getLeft()), reResolved.getRight()); + if (hintBucketNonNull) hintBucketNonNullOnAggregate(context.relBuilder); // During aggregation, Calcite projects both input dependencies and output group-by fields. // When names conflict, Calcite adds numeric suffixes (e.g., "value0"). // Apply explicit renaming to restore the intended aliases. @@ -1013,6 +1016,24 @@ private Pair, List> aggregateWithTrimming( return Pair.of(reResolved.getLeft(), reResolved.getRight()); } + private void hintBucketNonNullOnAggregate(RelBuilder relBuilder) { + final RelHint statHits = + RelHint.builder("stats_args").hintOption(Argument.BUCKET_NULLABLE, "false").build(); + assert relBuilder.peek() instanceof LogicalAggregate + : "Stats hits should be added to LogicalAggregate"; + relBuilder.hints(statHits); + relBuilder + .getCluster() + .setHintStrategies( + HintStrategyTable.builder() + .hintStrategy( + "stats_args", + (hint, rel) -> { + return rel instanceof LogicalAggregate; + }) + .build()); + } + /** * Imitates {@code Registrar.registerExpression} of {@link RelBuilder} to derive the output order * of group-by keys after aggregation. @@ -1114,25 +1135,7 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { } Pair, List> aggregationAttributes = - aggregateWithTrimming(groupExprList, aggExprList, context); - if (toAddHintsOnAggregate) { - final RelHint statHits = - RelHint.builder("stats_args").hintOption(Argument.BUCKET_NULLABLE, "false").build(); - assert context.relBuilder.peek() instanceof LogicalAggregate - : "Stats hits should be added to LogicalAggregate"; - context.relBuilder.hints(statHits); - context - .relBuilder - .getCluster() - .setHintStrategies( - HintStrategyTable.builder() - .hintStrategy( - "stats_args", - (hint, rel) -> { - return rel instanceof LogicalAggregate; - }) - .build()); - } + aggregateWithTrimming(groupExprList, aggExprList, context, toAddHintsOnAggregate); // schema reordering // As an example, in command `stats count() by colA, colB`, @@ -1869,7 +1872,7 @@ public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) { groupExprList.addAll(fieldList); List aggExprList = List.of(AstDSL.alias(countFieldName, AstDSL.aggregate("count", null))); - aggregateWithTrimming(groupExprList, aggExprList, context); + aggregateWithTrimming(groupExprList, aggExprList, context, false); // 2. add a window column List partitionKeys = rexVisitor.analyze(node.getGroupExprList(), context); @@ -2193,7 +2196,7 @@ public RelNode visitTimechart( try { // Step 1: Initial aggregation - IMPORTANT: order is [spanExpr, byField] groupExprList = Arrays.asList(spanExpr, byField); - aggregateWithTrimming(groupExprList, List.of(node.getAggregateFunction()), context); + aggregateWithTrimming(groupExprList, List.of(node.getAggregateFunction()), context, false); // First rename the timestamp field (2nd to last) to @timestamp List fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); From b14920d1ca7a9ab8fbbcbf8b45625de08435ca54 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Thu, 23 Oct 2025 16:29:28 +0800 Subject: [PATCH 11/23] Add integration tests for chart command Signed-off-by: Yuanchun Shen --- .../calcite/remote/CalciteChartCommandIT.java | 325 +++++++++++++++--- 1 file changed, 280 insertions(+), 45 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java index b326f4ea073..ce6a63e3c24 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java @@ -5,58 +5,293 @@ package org.opensearch.sql.calcite.remote; -import org.json.JSONObject; -import org.junit.jupiter.api.Test; -import org.opensearch.sql.ppl.PPLIntegTestCase; - -import java.io.IOException; - import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OTEL_LOGS; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_TIME_DATA; import static org.opensearch.sql.util.MatcherUtils.assertJsonEquals; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; import static org.opensearch.sql.util.MatcherUtils.verifySchema; +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + public class CalciteChartCommandIT extends PPLIntegTestCase { - @Override - protected void init() throws Exception { - super.init(); - enableCalcite(); - loadIndex(Index.BANK); - loadIndex(Index.BANK_WITH_NULL_VALUES); - loadIndex(Index.OTELLOGS); - } - - @Test - public void testChartWithSingleGroupKey() throws IOException { - JSONObject result1 = executeQuery(String.format("source=%s | chart avg(balance) by gender", TEST_INDEX_BANK)); - verifySchema( - result1, - schema("avg(balance)", "double"), - schema("gender", "string")); - verifyDataRows(result1, rows(40488, "F"), rows(16377.25, "M")); - JSONObject result2 = executeQuery(String.format("source=%s | chart avg(balance) over gender", TEST_INDEX_BANK)); - assertJsonEquals(result1.toString(), result2.toString()); - } - - @Test - public void testChartWithMultipleGroupKeys() throws IOException { - JSONObject result1 = executeQuery(String.format("source=%s | chart avg(balance) by gender, age", TEST_INDEX_BANK)); - verifySchema( - result1, - schema("avg(balance)", "double"), - schema("gender", "string"), - schema("age", "string")); - verifyDataRows(result1, rows(40488, "F", "36"), rows(16377.25, "M", 36)); - JSONObject result2 = executeQuery(String.format("source=%s | chart avg(balance) over gender, age", TEST_INDEX_BANK)); - assertJsonEquals(result1.toString(), result2.toString()); - } - - // TODOs: - // Param nullstr: source=opensearch-sql_test_index_bank_with_null_values | eval age = cast(age as string) | chart nullstr='nil' max(account_number) over gender by age - // Param usenull: source=opensearch-sql_test_index_bank_with_null_values | eval age = cast(age as string) | chart usenull=false nullstr='nil' max(account_number) over gender by age - // Param limit = 0: source=bank | chart limit=0 avg(balance) over state by gender - // SPAN: + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.BANK); + loadIndex(Index.BANK_WITH_NULL_VALUES); + loadIndex(Index.OTELLOGS); + loadIndex(Index.TIME_TEST_DATA); + } + + @Test + public void testChartWithSingleGroupKey() throws IOException { + JSONObject result1 = + executeQuery(String.format("source=%s | chart avg(balance) by gender", TEST_INDEX_BANK)); + verifySchema(result1, schema("avg(balance)", "double"), schema("gender", "string")); + verifyDataRows(result1, rows(40488, "F"), rows(16377.25, "M")); + JSONObject result2 = + executeQuery(String.format("source=%s | chart avg(balance) over gender", TEST_INDEX_BANK)); + assertJsonEquals(result1.toString(), result2.toString()); + } + + @Test + public void testChartWithMultipleGroupKeys() throws IOException { + JSONObject result1 = + executeQuery( + String.format("source=%s | chart avg(balance) over gender by age", TEST_INDEX_BANK)); + verifySchema( + result1, + schema("gender", "string"), + schema("age", "string"), + schema("avg(balance)", "double")); + verifyDataRows( + result1, + rows("F", "28", 32838), + rows("F", "39", 40540), + rows("M", "32", 39225), + rows("M", "33", 4180), + rows("M", "36", 11052), + rows("F", "34", 48086)); + JSONObject result2 = + executeQuery( + String.format("source=%s | chart avg(balance) by gender, age", TEST_INDEX_BANK)); + assertJsonEquals(result1.toString(), result2.toString()); + } + + @Test + public void testChartCombineOverByWithLimit0() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart limit=0 avg(balance) over state by gender", TEST_INDEX_BANK)); + verifySchema( + result, + schema("avg(balance)", "double"), + schema("state", "string"), + schema("gender", "string")); + verifyDataRows( + result, + rows(39225.0, "IL", "M"), + rows(48086.0, "IN", "F"), + rows(4180.0, "MD", "M"), + rows(40540.0, "PA", "F"), + rows(5686.0, "TN", "M"), + rows(32838.0, "VA", "F"), + rows(16418.0, "WA", "M")); + } + + @Test + public void testChartMaxBalanceByAgeSpan() throws IOException { + JSONObject result = + executeQuery( + String.format("source=%s | chart max(balance) by age span=10", TEST_INDEX_BANK)); + verifySchema(result, schema("max(balance)", "bigint"), schema("age", "int")); + verifyDataRows(result, rows(32838, 20), rows(48086, 30)); + } + + @Test + public void testChartMaxValueOverTimestampSpanWeekByCategory() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart max(value) over timestamp span=1week by category", + TEST_INDEX_TIME_DATA)); + verifySchema( + result, + schema("timestamp", "timestamp"), + schema("category", "string"), + schema("max(value)", "int")); + // Data spans from 2025-07-28 to 2025-08-01, all within same week + verifyDataRows( + result, + rows("2025-07-28 00:00:00", "A", 9367), + rows("2025-07-28 00:00:00", "B", 9521), + rows("2025-07-28 00:00:00", "C", 9187), + rows("2025-07-28 00:00:00", "D", 8736)); + } + + @Test + public void testChartMaxValueOverCategoryByTimestampSpanWeek() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart max(value) over category by timestamp span=1week", + TEST_INDEX_TIME_DATA)); + verifySchema( + result, + schema("category", "string"), + schema("timestamp", "string"), + schema("max(value)", "int")); + // All data within same week span + verifyDataRows( + result, + rows("A", "2025-07-28 00:00:00", 9367), + rows("B", "2025-07-28 00:00:00", 9521), + rows("C", "2025-07-28 00:00:00", 9187), + rows("D", "2025-07-28 00:00:00", 8736)); + } + + @Test + public void testChartMaxValueByTimestampSpanDayAndWeek() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart max(value) by timestamp span=1day, @timestamp span=2weeks", + TEST_INDEX_TIME_DATA)); + // column split are converted to string in order to be compatible with nullstr and otherstr + verifySchema( + result, + schema("timestamp", "timestamp"), + schema("@timestamp", "string"), + schema("max(value)", "int")); + // Data grouped by day spans + verifyDataRows( + result, + rows("2025-07-28 00:00:00", "2025-07-28 00:00:00", 9367), + rows("2025-07-29 00:00:00", "2025-07-28 00:00:00", 9521), + rows("2025-07-30 00:00:00", "2025-07-28 00:00:00", 9234), + rows("2025-07-31 00:00:00", "2025-07-28 00:00:00", 9318), + rows("2025-08-01 00:00:00", "2025-07-28 00:00:00", 9015)); + } + + @Test + public void testChartLimit0WithUseOther() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart limit=0 useother=true otherstr='max_among_other'" + + " max(severityNumber) over flags by severityText", + TEST_INDEX_OTEL_LOGS)); + verifySchema( + result, + schema("max(severityNumber)", "bigint"), + schema("flags", "bigint"), + schema("severityText", "string")); + verifyDataRows( + result, + rows(5, 0, "DEBUG"), + rows(6, 0, "DEBUG2"), + rows(7, 0, "DEBUG3"), + rows(8, 0, "DEBUG4"), + rows(17, 0, "ERROR"), + rows(18, 0, "ERROR2"), + rows(19, 0, "ERROR3"), + rows(20, 0, "ERROR4"), + rows(21, 0, "FATAL"), + rows(22, 0, "FATAL2"), + rows(23, 0, "FATAL3"), + rows(24, 0, "FATAL4"), + rows(9, 0, "INFO"), + rows(10, 0, "INFO2"), + rows(11, 0, "INFO3"), + rows(12, 0, "INFO4"), + rows(2, 0, "TRACE2"), + rows(3, 0, "TRACE3"), + rows(4, 0, "TRACE4"), + rows(13, 0, "WARN"), + rows(14, 0, "WARN2"), + rows(15, 0, "WARN3"), + rows(16, 0, "WARN4"), + rows(17, 1, "ERROR"), + rows(9, 1, "INFO"), + rows(1, 1, "TRACE")); + } + + @Test + public void testChartLimitTopWithUseOther() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart limit=top 2 useother=true otherstr='max_among_other'" + + " max(severityNumber) over flags by severityText", + TEST_INDEX_OTEL_LOGS)); + verifySchema( + result, + schema("flags", "bigint"), + schema("severityText", "string"), + schema("max(severityNumber)", "bigint")); + verifyDataRows( + result, + rows(1, "max_among_other", 17), + rows(0, "max_among_other", 22), + rows(0, "FATAL3", 23), + rows(0, "FATAL4", 24)); + } + + @Test + public void testChartLimitBottomWithUseOther() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart limit=bottom 2 useother=false otherstr='other_small_not_shown'" + + " max(severityNumber) over flags by severityText", + TEST_INDEX_OTEL_LOGS)); + verifySchema( + result, + schema("flags", "bigint"), + schema("severityText", "string"), + schema("max(severityNumber)", "bigint")); + verifyDataRows(result, rows(1, "TRACE", 1), rows(0, "TRACE2", 2)); + } + + @Test + public void testChartLimitTopWithMinAgg() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart limit=top 2 min(severityNumber) over flags by severityText", + TEST_INDEX_OTEL_LOGS)); + verifySchema( + result, + schema("flags", "bigint"), + schema("severityText", "string"), + schema("min(severityNumber)", "bigint")); + verifyDataRows( + result, + rows(1, "OTHER", 9), + rows(1, "TRACE", 1), + rows(0, "OTHER", 3), + rows(0, "TRACE2", 2)); + } + + @Test + public void testChartUseNullTrueWithNullStr() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart nullstr='nil' avg(balance) over gender by age span=10", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchema( + result, + schema("gender", "string"), + schema("age", "string"), + schema("avg(balance)", "double")); + verifyDataRows( + result, + rows("M", "30", 21702.5), + rows("F", "30", 48086.0), + rows("F", "20", 32838.0), + rows("F", "nil", null)); + } + @Test + public void testChartUseNullFalseWithNullStr() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart usenull=false nullstr='not_shown' count() over gender by age" + + " span=10", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchema( + result, schema("gender", "string"), schema("age", "string"), schema("count()", "bigint")); + verifyDataRows(result, rows("M", "30", 4), rows("F", "30", 1), rows("F", "20", 1)); + } } From 2332e2407138b73d4e434ba304e8cb203610412b Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Thu, 23 Oct 2025 18:19:39 +0800 Subject: [PATCH 12/23] Add unit tests Signed-off-by: Yuanchun Shen --- docs/user/ppl/cmd/chart.rst | 0 .../sql/ppl/parser/AstBuilderTest.java | 92 +++++++++++++++++++ .../ppl/parser/AstExpressionBuilderTest.java | 63 +++++++++++++ .../sql/ppl/utils/ArgumentFactoryTest.java | 38 ++++++++ 4 files changed, 193 insertions(+) create mode 100644 docs/user/ppl/cmd/chart.rst diff --git a/docs/user/ppl/cmd/chart.rst b/docs/user/ppl/cmd/chart.rst new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index b9948e6abe2..a3cca06fbc7 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -66,6 +66,7 @@ import org.mockito.Mockito; import org.opensearch.sql.ast.Node; import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.ParseMethod; @@ -73,6 +74,7 @@ import org.opensearch.sql.ast.expression.PatternMode; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.tree.AD; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.RareTopN.CommandType; @@ -1267,4 +1269,94 @@ public void testReplaceCommandWithMultiplePairs() { // Test multiple pattern/replacement pairs plan("source=t | replace 'a' WITH 'A', 'b' WITH 'B' IN field"); } + + @Test + public void testChartCommandBasic() { + assertEqual( + "source=t | chart count() by age", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("age", field("age"))) + .aggregationFunctions(List.of(alias("count()", aggregate("count", AllFields.of())))) + .arguments(emptyList()) + .build()); + } + + @Test + public void testChartCommandWithRowSplit() { + assertEqual( + "source=t | chart count() over status by age", + Chart.builder() + .child(relation("t")) + .rowSplit(alias("status", field("status"))) + .columnSplit(alias("age", field("age"))) + .aggregationFunctions(List.of(alias("count()", aggregate("count", AllFields.of())))) + .arguments(emptyList()) + .build()); + } + + @Test + public void testChartCommandWithMultipleAggregations() { + assertEqual( + "source=t | chart avg(salary), max(age) by department", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("department", field("department"))) + .aggregationFunctions( + List.of( + alias("avg(salary)", aggregate("avg", field("salary"))), + alias("max(age)", aggregate("max", field("age"))))) + .arguments(emptyList()) + .build()); + } + + @Test + public void testChartCommandWithOptions() { + assertEqual( + "source=t | chart limit=10 useother=true count() by status", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("status", field("status"))) + .aggregationFunctions(List.of(alias("count()", aggregate("count", AllFields.of())))) + .arguments( + exprList( + argument("limit", intLiteral(10)), + argument("top", booleanLiteral(true)), + argument("useother", booleanLiteral(true)))) + .build()); + } + + @Test + public void testChartCommandWithAllOptions() { + assertEqual( + "source=t | chart limit=5 useother=false otherstr='OTHER' usenull=true nullstr='NULL'" + + " avg(balance) by gender", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("gender", field("gender"))) + .aggregationFunctions( + List.of(alias("avg(balance)", aggregate("avg", field("balance"))))) + .arguments( + exprList( + argument("limit", intLiteral(5)), + argument("top", booleanLiteral(true)), + argument("useother", booleanLiteral(false)), + argument("otherstr", stringLiteral("OTHER")), + argument("usenull", booleanLiteral(true)), + argument("nullstr", stringLiteral("NULL")))) + .build()); + } + + @Test + public void testChartCommandWithBottomLimit() { + assertEqual( + "source=t | chart limit=bottom 3 count() by category", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("category", field("category"))) + .aggregationFunctions(List.of(alias("count()", aggregate("count", AllFields.of())))) + .arguments( + exprList(argument("limit", intLiteral(3)), argument("top", booleanLiteral(false)))) + .build()); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index 6b0e0a081f8..5d10960ea6b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -14,6 +14,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.allFields; import static org.opensearch.sql.ast.dsl.AstDSL.and; import static org.opensearch.sql.ast.dsl.AstDSL.argument; +import static org.opensearch.sql.ast.dsl.AstDSL.bin; import static org.opensearch.sql.ast.dsl.AstDSL.booleanLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.caseWhen; import static org.opensearch.sql.ast.dsl.AstDSL.cast; @@ -1605,4 +1606,66 @@ public void testVisitSpanLiteral() { .useOther(true) .build()); } + + @Test + public void testBinOptionWithSpan() { + assertEqual( + "source=t | bin age span=10", + bin(relation("t"), field("age"), argument("span", intLiteral(10)))); + } + + @Test + public void testBinOptionWithBins() { + assertEqual( + "source=t | bin age bins=5", + bin(relation("t"), field("age"), argument("bins", intLiteral(5)))); + } + + @Test + public void testBinOptionWithMinspan() { + assertEqual( + "source=t | bin age minspan=100", + bin(relation("t"), field("age"), argument("minspan", intLiteral(100)))); + } + + @Test + public void testBinOptionWithAligntimeEarliest() { + assertEqual( + "source=t | bin age span=10 aligntime=earliest", + bin( + relation("t"), + field("age"), + argument("span", intLiteral(10)), + argument("aligntime", stringLiteral("earliest")))); + } + + @Test + public void testBinOptionWithAligntimeLiteralValue() { + assertEqual( + "source=t | bin age span=10 aligntime=1000", + bin( + relation("t"), + field("age"), + argument("span", intLiteral(10)), + argument("aligntime", intLiteral(1000)))); + } + + @Test + public void testBinOptionWithStartAndEnd() { + assertEqual( + "source=t | bin age bins=10 start=0 end=100", + bin( + relation("t"), + field("age"), + argument("bins", intLiteral(10)), + argument("start", intLiteral(0)), + argument("end", intLiteral(100)))); + } + + @Test + public void testBinOptionWithTimeSpan() { + assertEqual( + "source=t | bin timestamp span=1h", + bin(relation("t"), field("timestamp"), argument("span", stringLiteral("1h")))); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java index adb9ec719e6..e268656a8d9 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java @@ -20,8 +20,11 @@ import static org.opensearch.sql.ast.dsl.AstDSL.sort; import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; +import com.google.common.collect.ImmutableList; import org.junit.Test; +import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ppl.parser.AstBuilderTest; public class ArgumentFactoryTest extends AstBuilderTest { @@ -100,6 +103,41 @@ public void testSortFieldArgument() { argument("type", stringLiteral("auto")))))); } + @Test + public void testChartCommandArguments() { + assertEqual( + "source=t | chart limit=5 useother=true otherstr='OTHER_VAL' usenull=false" + + " nullstr='NULL_VAL' count() by age", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("age", field("age"))) + .aggregationFunctions( + ImmutableList.of(alias("count()", aggregate("count", AllFields.of())))) + .arguments( + exprList( + argument("limit", intLiteral(5)), + argument("top", booleanLiteral(true)), + argument("useother", booleanLiteral(true)), + argument("otherstr", stringLiteral("OTHER_VAL")), + argument("usenull", booleanLiteral(false)), + argument("nullstr", stringLiteral("NULL_VAL")))) + .build()); + } + + @Test + public void testChartCommandBottomArguments() { + assertEqual( + "source=t | chart limit=bottom 3 count() by status", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("status", field("status"))) + .aggregationFunctions( + ImmutableList.of(alias("count()", aggregate("count", AllFields.of())))) + .arguments( + exprList(argument("limit", intLiteral(3)), argument("top", booleanLiteral(false)))) + .build()); + } + @Test public void testNoArgConstructorForArgumentFactoryShouldPass() { new ArgumentFactory(); From 585cd5102af7ba13831ba96e3d97c0b908e68eff Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Thu, 23 Oct 2025 18:20:12 +0800 Subject: [PATCH 13/23] Add doc for chart command Signed-off-by: Yuanchun Shen --- docs/category.json | 1 + docs/user/ppl/cmd/chart.rst | 194 ++++++++++++++++++++++++++++++++++++ docs/user/ppl/index.rst | 2 + 3 files changed, 197 insertions(+) diff --git a/docs/category.json b/docs/category.json index 49529b08bdc..94b0b180417 100644 --- a/docs/category.json +++ b/docs/category.json @@ -48,6 +48,7 @@ "user/ppl/cmd/stats.rst", "user/ppl/cmd/subquery.rst", "user/ppl/cmd/syntax.rst", + "user/ppl/cmd/chart.rst", "user/ppl/cmd/timechart.rst", "user/ppl/cmd/search.rst", "user/ppl/functions/statistical.rst", diff --git a/docs/user/ppl/cmd/chart.rst b/docs/user/ppl/cmd/chart.rst index e69de29bb2d..1633302676a 100644 --- a/docs/user/ppl/cmd/chart.rst +++ b/docs/user/ppl/cmd/chart.rst @@ -0,0 +1,194 @@ +============= +chart +============= + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +============ + +The ``chart`` command transforms search results by applying a statistical aggregation function and optionally grouping the data by one or two fields. The results are suitable for visualization as a two-dimension chart when grouping by two fields, where unique values in the second group key can be pivoted to column names. + +Version +======= +3.4.0 + +Syntax +============ + +.. code-block:: text + + chart + [limit=(top|bottom) ] [useother=] [usenull=] [nullstr=] [otherstr=] + + [ by ] | [over ] [ by ] + +**Parameters:** + +* **limit**: optional. Specifies the number of distinct values to display when using column split. + + * Default: 10 + * Syntax: ``limit=(top|bottom) `` or ``limit=`` (defaults to top) + * When there are more distinct values than the limit, the additional values are grouped into an "OTHER" category if useother is not set to false. + * Set to 0 to show all distinct values without any limit. + * Only applies when using column split (over...by clause). + +* **useother**: optional. Controls whether to create an "OTHER" category for values beyond the limit. + + * Default: true + * When set to false, only the top/bottom N values (based on limit) are shown without an "OTHER" category. + * When set to true, values beyond the limit are grouped into an "OTHER" category. + * Only applies when using column split and when there are more distinct values than the limit. + +* **usenull**: optional. Controls whether to include null values as a separate category. + + * Default: true + * When set to false, events with null values in the split-by field are excluded from results. + * When set to true, null values appear as a separate category. + +* **nullstr**: optional. Specifies the string to display for null values. + + * Default: "NULL" + * Only applies when usenull is set to true. + +* **otherstr**: optional. Specifies the string to display for the "OTHER" category. + + * Default: "OTHER" + * Only applies when useother is set to true and there are values beyond the limit. + +* **aggregation_function**: mandatory. The aggregation function to apply to the data. + + * Currently, only a single aggregation function is supported. + * Available functions: All aggregation functions supported by the :doc:`stats ` command. + +* **by**: optional. Groups the results by the specified field as rows. + + * If not specified, the aggregation is performed across all documents. + +* **over...by**: optional. Alternative syntax for grouping by multiple fields. + + * ``over by `` groups the results by both fields. + * The row_split field becomes the primary grouping dimension. + * The column_split field becomes the secondary grouping dimension. + * Results are returned as individual rows for each combination. + +Notes +===== + +* The ``chart`` command transforms results into a table format suitable for visualization. +* When using multiple grouping fields (over...by syntax), the output contains individual rows for each combination of the grouping fields. +* The limit parameter determines how many columns to show when there are many distinct values. +* Results are ordered by the aggregated values to determine top/bottom selections. + +Examples +======== + +Example 1: Basic aggregation without grouping +============================================== + +This example calculates the average balance across all accounts. + +PPL query:: + + os> source=accounts | chart avg(balance) + fetched rows / total rows = 1/1 + +--------------+ + | avg(balance) | + |--------------| + | 20482.25 | + +--------------+ + +Example 2: Group by single field +================================= + +This example calculates the count of accounts grouped by gender. + +PPL query:: + + os> source=accounts | chart count() by gender + fetched rows / total rows = 2/2 + +---------+--------+ + | count() | gender | + |---------+--------| + | 1 | F | + | 3 | M | + +---------+--------+ + +Example 3: Using over and by for multiple field grouping +======================================================== + +This example shows average balance grouped by both gender and age fields. + +PPL query:: + + os> source=accounts | chart avg(balance) over gender by age + fetched rows / total rows = 4/4 + +--------+-----+--------------+ + | gender | age | avg(balance) | + |--------+-----+--------------| + | F | 28 | 32838.0 | + | M | 32 | 39225.0 | + | M | 33 | 4180.0 | + | M | 36 | 5686.0 | + +--------+-----+--------------+ + +Example 4: Using basic limit functionality +======================================== + +This example limits the results to show only the top 1 age group. + +PPL query:: + + os> source=accounts | chart limit=1 count() over gender by age + fetched rows / total rows = 3/3 + +--------+-------+---------+ + | gender | age | count() | + |--------+-------+---------| + | M | OTHER | 2 | + | M | 33 | 1 | + | F | OTHER | 1 | + +--------+-------+---------+ + +Example 5: Using limit with other parameters +============================================= + +This example shows using limit with useother and custom otherstr parameters. + +PPL query:: + + os> source=accounts | chart limit=top 2 useother=true otherstr='remaining_accounts' max(balance) over state by gender + fetched rows / total rows = 4/4 + +-------+--------+--------------+ + | state | gender | max(balance) | + |-------+--------+--------------| + | TN | M | 5686 | + | MD | M | 4180 | + | IL | M | 39225 | + | VA | F | 32838 | + +-------+--------+--------------+ + +Example 6: Using span with chart command +======================================= + +This example demonstrates using span for grouping age ranges. + +PPL query:: + + os> source=accounts | chart max(balance) by age span=10 + fetched rows / total rows = 2/2 + +--------------+-----+ + | max(balance) | age | + |--------------+-----| + | 32838 | 20 | + | 39225 | 30 | + +--------------+-----+ + +Limitations +============ +* Only a single aggregation function is supported per chart command. +* When using both row and column splits, the column split field is converted to string type so that it can be used as column names. \ No newline at end of file diff --git a/docs/user/ppl/index.rst b/docs/user/ppl/index.rst index 17b4797df39..6792c161ed0 100644 --- a/docs/user/ppl/index.rst +++ b/docs/user/ppl/index.rst @@ -117,6 +117,8 @@ The query start with search command and then flowing a set of command delimited - `reverse command `_ - `table command `_ + + - `chart command `_ - `timechart command `_ From efeb8e2dd14e1e54c3a99e61f6a64d31c1ef1356 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 24 Oct 2025 10:36:26 +0800 Subject: [PATCH 14/23] Prompt users that multiple agg is not supported Signed-off-by: Yuanchun Shen --- .../opensearch/sql/ppl/parser/AstBuilder.java | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 8d2d5b63283..654381be0f1 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -407,16 +407,7 @@ private ReplacePair buildReplacePair(OpenSearchPPLParser.ReplacePairContext ctx) /** Stats command. */ @Override public UnresolvedPlan visitStatsCommand(StatsCommandContext ctx) { - ImmutableList.Builder aggListBuilder = new ImmutableList.Builder<>(); - for (OpenSearchPPLParser.StatsAggTermContext aggCtx : ctx.statsAggTerm()) { - UnresolvedExpression aggExpression = internalVisitExpression(aggCtx.statsFunction()); - String name = - aggCtx.alias == null - ? getTextInQuery(aggCtx) - : StringUtils.unquoteIdentifier(aggCtx.alias.getText()); - Alias alias = new Alias(name, aggExpression); - aggListBuilder.add(alias); - } + List aggregations = parseAggTerms(ctx.statsAggTerm()); List groupList = Optional.ofNullable(ctx.statsByClause()) @@ -441,7 +432,7 @@ public UnresolvedPlan visitStatsCommand(StatsCommandContext ctx) { Aggregation aggregation = new Aggregation( - aggListBuilder.build(), + aggregations, Collections.emptyList(), groupList, span, @@ -614,8 +605,23 @@ public UnresolvedPlan visitChartCommand(OpenSearchPPLParser.ChartCommandContext UnresolvedExpression columnSplit = ctx.columnSplit() == null ? null : internalVisitExpression(ctx.columnSplit()); List arguments = ArgumentFactory.getArgumentList(ctx); + List aggList = parseAggTerms(ctx.statsAggTerm()); + if (aggList.size() > 1) { + throw new IllegalArgumentException( + "Chart command does not support multiple aggregation functions yet"); + } + return Chart.builder() + .rowSplit(rowSplit) + .columnSplit(columnSplit) + .aggregationFunctions(aggList) + .arguments(arguments) + .build(); + } + + private List parseAggTerms( + List statsAggTermContexts) { ImmutableList.Builder aggListBuilder = new ImmutableList.Builder<>(); - for (OpenSearchPPLParser.StatsAggTermContext aggCtx : ctx.statsAggTerm()) { + for (OpenSearchPPLParser.StatsAggTermContext aggCtx : statsAggTermContexts) { UnresolvedExpression aggExpression = internalVisitExpression(aggCtx.statsFunction()); String name = aggCtx.alias == null @@ -624,12 +630,7 @@ public UnresolvedPlan visitChartCommand(OpenSearchPPLParser.ChartCommandContext Alias alias = new Alias(name, aggExpression); aggListBuilder.add(alias); } - return Chart.builder() - .rowSplit(rowSplit) - .columnSplit(columnSplit) - .aggregationFunctions(aggListBuilder.build()) - .arguments(arguments) - .build(); + return aggListBuilder.build(); } /** Timechart command. */ From 6b8934e10e137f6db487a0f703ba327bff8ca25f Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 24 Oct 2025 14:05:37 +0800 Subject: [PATCH 15/23] Add explain ITs Signed-off-by: Yuanchun Shen --- .../sql/calcite/CalciteNoPushdownIT.java | 165 +++++++++--------- .../sql/calcite/remote/CalciteExplainIT.java | 62 +++++++ .../explain_chart_multiple_group_keys.yaml | 32 ++++ .../calcite/explain_chart_null_str.yaml | 36 ++++ .../explain_chart_single_group_key.yaml | 9 + .../calcite/explain_chart_timestamp_span.yaml | 26 +++ .../calcite/explain_chart_use_other.yaml | 26 +++ .../calcite/explain_chart_with_limit.yaml | 9 + .../calcite/explain_chart_with_span.yaml | 9 + .../explain_chart_multiple_group_keys.yaml | 35 ++++ .../explain_chart_multiple_groups.yaml | 35 ++++ .../explain_chart_null_str.yaml | 37 ++++ .../explain_chart_single_group.yaml | 13 ++ .../explain_chart_single_group_key.yaml | 13 ++ .../explain_chart_timestamp_span.yaml | 32 ++++ .../explain_chart_use_other.yaml | 30 ++++ .../explain_chart_with_limit.yaml | 13 ++ .../explain_chart_with_span.yaml | 14 ++ 18 files changed, 513 insertions(+), 83 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_chart_multiple_group_keys.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_chart_null_str.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_chart_single_group_key.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_chart_timestamp_span.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_chart_use_other.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_limit.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_span.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_group_keys.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_groups.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_null_str.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group_key.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_timestamp_span.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_use_other.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_limit.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_span.yaml diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index 69507c71aa5..14e2bfdb4da 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -10,7 +10,6 @@ import org.junit.runner.RunWith; import org.junit.runners.Suite; import org.opensearch.sql.calcite.remote.*; -import org.opensearch.sql.calcite.tpch.CalcitePPLTpchIT; import org.opensearch.sql.ppl.PPLIntegTestCase; /** @@ -21,88 +20,88 @@ @RunWith(Suite.class) @Suite.SuiteClasses({ CalciteExplainIT.class, - CalciteArrayFunctionIT.class, - CalciteBinCommandIT.class, - CalciteConvertTZFunctionIT.class, - CalciteCsvFormatIT.class, - CalciteDataTypeIT.class, - CalciteDateTimeComparisonIT.class, - CalciteDateTimeFunctionIT.class, - CalciteDateTimeImplementationIT.class, - CalciteDedupCommandIT.class, - CalciteDescribeCommandIT.class, - CalciteExpandCommandIT.class, - CalciteFieldsCommandIT.class, - CalciteFillNullCommandIT.class, - CalciteFlattenCommandIT.class, - CalciteFlattenDocValueIT.class, - CalciteGeoIpFunctionsIT.class, - CalciteGeoPointFormatsIT.class, - CalciteHeadCommandIT.class, - CalciteInformationSchemaCommandIT.class, - CalciteIPComparisonIT.class, - CalciteIPFunctionsIT.class, - CalciteJsonFunctionsIT.class, - CalciteLegacyAPICompatibilityIT.class, - CalciteLikeQueryIT.class, - CalciteMathematicalFunctionIT.class, - CalciteMultisearchCommandIT.class, - CalciteMultiValueStatsIT.class, - CalciteNewAddedCommandsIT.class, - CalciteNowLikeFunctionIT.class, - CalciteObjectFieldOperateIT.class, - CalciteOperatorIT.class, - CalciteParseCommandIT.class, - CalcitePPLAggregationIT.class, - CalcitePPLAppendcolIT.class, - CalcitePPLAppendCommandIT.class, - CalcitePPLBasicIT.class, - CalcitePPLBuiltinDatetimeFunctionInvalidIT.class, - CalcitePPLBuiltinFunctionIT.class, - CalcitePPLBuiltinFunctionsNullIT.class, - CalcitePPLCaseFunctionIT.class, - CalcitePPLCastFunctionIT.class, - CalcitePPLConditionBuiltinFunctionIT.class, - CalcitePPLCryptographicFunctionIT.class, - CalcitePPLDedupIT.class, - CalcitePPLEventstatsIT.class, - CalcitePPLExistsSubqueryIT.class, - CalcitePPLExplainIT.class, - CalcitePPLFillnullIT.class, - CalcitePPLGrokIT.class, - CalcitePPLInSubqueryIT.class, - CalcitePPLIPFunctionIT.class, - CalcitePPLJoinIT.class, - CalcitePPLJsonBuiltinFunctionIT.class, - CalcitePPLLookupIT.class, - CalcitePPLParseIT.class, - CalcitePPLPatternsIT.class, - CalcitePPLPluginIT.class, - CalcitePPLRenameIT.class, - CalcitePPLScalarSubqueryIT.class, - CalcitePPLSortIT.class, - CalcitePPLStringBuiltinFunctionIT.class, - CalcitePPLTrendlineIT.class, - CalcitePrometheusDataSourceCommandsIT.class, - CalciteQueryAnalysisIT.class, - CalciteRareCommandIT.class, - CalciteRegexCommandIT.class, - CalciteRexCommandIT.class, - CalciteRenameCommandIT.class, - CalciteReplaceCommandIT.class, - CalciteResourceMonitorIT.class, - CalciteSearchCommandIT.class, - CalciteSettingsIT.class, - CalciteShowDataSourcesCommandIT.class, - CalciteSortCommandIT.class, - CalciteStatsCommandIT.class, - CalciteSystemFunctionIT.class, - CalciteTextFunctionIT.class, - CalciteTopCommandIT.class, - CalciteTrendlineCommandIT.class, - CalciteVisualizationFormatIT.class, - CalciteWhereCommandIT.class, - CalcitePPLTpchIT.class + // CalciteArrayFunctionIT.class, + // CalciteBinCommandIT.class, + // CalciteConvertTZFunctionIT.class, + // CalciteCsvFormatIT.class, + // CalciteDataTypeIT.class, + // CalciteDateTimeComparisonIT.class, + // CalciteDateTimeFunctionIT.class, + // CalciteDateTimeImplementationIT.class, + // CalciteDedupCommandIT.class, + // CalciteDescribeCommandIT.class, + // CalciteExpandCommandIT.class, + // CalciteFieldsCommandIT.class, + // CalciteFillNullCommandIT.class, + // CalciteFlattenCommandIT.class, + // CalciteFlattenDocValueIT.class, + // CalciteGeoIpFunctionsIT.class, + // CalciteGeoPointFormatsIT.class, + // CalciteHeadCommandIT.class, + // CalciteInformationSchemaCommandIT.class, + // CalciteIPComparisonIT.class, + // CalciteIPFunctionsIT.class, + // CalciteJsonFunctionsIT.class, + // CalciteLegacyAPICompatibilityIT.class, + // CalciteLikeQueryIT.class, + // CalciteMathematicalFunctionIT.class, + // CalciteMultisearchCommandIT.class, + // CalciteMultiValueStatsIT.class, + // CalciteNewAddedCommandsIT.class, + // CalciteNowLikeFunctionIT.class, + // CalciteObjectFieldOperateIT.class, + // CalciteOperatorIT.class, + // CalciteParseCommandIT.class, + // CalcitePPLAggregationIT.class, + // CalcitePPLAppendcolIT.class, + // CalcitePPLAppendCommandIT.class, + // CalcitePPLBasicIT.class, + // CalcitePPLBuiltinDatetimeFunctionInvalidIT.class, + // CalcitePPLBuiltinFunctionIT.class, + // CalcitePPLBuiltinFunctionsNullIT.class, + // CalcitePPLCaseFunctionIT.class, + // CalcitePPLCastFunctionIT.class, + // CalcitePPLConditionBuiltinFunctionIT.class, + // CalcitePPLCryptographicFunctionIT.class, + // CalcitePPLDedupIT.class, + // CalcitePPLEventstatsIT.class, + // CalcitePPLExistsSubqueryIT.class, + // CalcitePPLExplainIT.class, + // CalcitePPLFillnullIT.class, + // CalcitePPLGrokIT.class, + // CalcitePPLInSubqueryIT.class, + // CalcitePPLIPFunctionIT.class, + // CalcitePPLJoinIT.class, + // CalcitePPLJsonBuiltinFunctionIT.class, + // CalcitePPLLookupIT.class, + // CalcitePPLParseIT.class, + // CalcitePPLPatternsIT.class, + // CalcitePPLPluginIT.class, + // CalcitePPLRenameIT.class, + // CalcitePPLScalarSubqueryIT.class, + // CalcitePPLSortIT.class, + // CalcitePPLStringBuiltinFunctionIT.class, + // CalcitePPLTrendlineIT.class, + // CalcitePrometheusDataSourceCommandsIT.class, + // CalciteQueryAnalysisIT.class, + // CalciteRareCommandIT.class, + // CalciteRegexCommandIT.class, + // CalciteRexCommandIT.class, + // CalciteRenameCommandIT.class, + // CalciteReplaceCommandIT.class, + // CalciteResourceMonitorIT.class, + // CalciteSearchCommandIT.class, + // CalciteSettingsIT.class, + // CalciteShowDataSourcesCommandIT.class, + // CalciteSortCommandIT.class, + // CalciteStatsCommandIT.class, + // CalciteSystemFunctionIT.class, + // CalciteTextFunctionIT.class, + // CalciteTopCommandIT.class, + // CalciteTrendlineCommandIT.class, + // CalciteVisualizationFormatIT.class, + // CalciteWhereCommandIT.class, + // CalcitePPLTpchIT.class }) public class CalciteNoPushdownIT { private static boolean wasPushdownEnabled; diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index fe3e5e9224b..fbabfdb0e04 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -7,8 +7,10 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_LOGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OTEL_LOGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STRINGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_TIME_DATA; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; @@ -31,6 +33,7 @@ public void init() throws Exception { enableCalcite(); setQueryBucketSize(1000); loadIndex(Index.BANK_WITH_STRING_VALUES); + loadIndex(Index.BANK_WITH_NULL_VALUES); loadIndex(Index.NESTED_SIMPLE); loadIndex(Index.TIME_TEST_DATA); loadIndex(Index.TIME_TEST_DATA2); @@ -1153,6 +1156,65 @@ public void testPushDownMinOrMaxAggOnDerivedField() throws IOException { TEST_INDEX_ACCOUNT))); } + @Test + public void testExplainChartWithSingleGroupKey() throws IOException { + assertYamlEqualsIgnoreId( + loadExpectedPlan("explain_chart_single_group_key.yaml"), + explainQueryYaml( + String.format("source=%s | chart avg(balance) by gender", TEST_INDEX_BANK))); + + assertYamlEqualsIgnoreId( + loadExpectedPlan("explain_chart_with_span.yaml"), + explainQueryYaml( + String.format("source=%s | chart max(balance) by age span=10", TEST_INDEX_BANK))); + + assertYamlEqualsIgnoreId( + loadExpectedPlan("explain_chart_timestamp_span.yaml"), + explainQueryYaml( + String.format( + "source=%s | chart max(value) over timestamp span=1week by category", + TEST_INDEX_TIME_DATA))); + } + + @Test + public void testExplainChartWithMultipleGroupKeys() throws IOException { + String expected = loadExpectedPlan("explain_chart_multiple_group_keys.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format("source=%s | chart avg(balance) over gender by age", TEST_INDEX_BANK))); + } + + @Test + public void testExplainChartWithLimits() throws IOException { + String expected = loadExpectedPlan("explain_chart_with_limit.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format( + "source=%s | chart limit=0 avg(balance) over state by gender", TEST_INDEX_BANK))); + + assertYamlEqualsIgnoreId( + loadExpectedPlan("explain_chart_use_other.yaml"), + explainQueryYaml( + String.format( + "source=%s | chart limit=2 useother=true otherstr='max_among_other'" + + " max(severityNumber) over flags by severityText", + TEST_INDEX_OTEL_LOGS))); + } + + @Test + public void testExplainChartWithNullStr() throws IOException { + String expected = loadExpectedPlan("explain_chart_null_str.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format( + "source=%s | chart limit=10 usenull=true nullstr='nil' avg(balance) over gender by" + + " age span=10", + TEST_INDEX_BANK_WITH_NULL_VALUES))); + } + @Test public void testCasePushdownAsRangeQueryExplain() throws IOException { // CASE 1: Range - Metric diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_multiple_group_keys.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_multiple_group_keys.yaml new file mode 100644 index 00000000000..b4419f38e11 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_multiple_group_keys.yaml @@ -0,0 +1,32 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalAggregate(group=[{1, 2}], avg(balance)=[AVG($0)]) + LogicalProject(avg(balance)=[$0], gender=[$1], age=[CASE(IS NULL($2), 'NULL', <=($5, 10), $2, 'OTHER')]) + LogicalJoin(condition=[=($2, $3)], joinType=[left]) + LogicalProject(avg(balance)=[$2], gender=[$0], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalProject(age=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{1}], __grand_total__=[AVG($0)]) + LogicalProject(avg(balance)=[$2], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) + EnumerableAggregate(group=[{1, 2}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], age=[$t10]) + EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) + EnumerableSort(sort0=[$2], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[SAFE_CAST($t1)], avg(balance)=[$t2], gender=[$t0], age=[$t3]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], age=[$t0], __grand_total__=[$t7]) + EnumerableAggregate(group=[{1}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[SAFE_CAST($t1)], avg(balance)=[$t0], $f1=[$t2]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), PROJECT->[avg(balance), age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_null_str.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_null_str.yaml new file mode 100644 index 00000000000..6a3a024b2b3 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_null_str.yaml @@ -0,0 +1,36 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalAggregate(group=[{1, 2}], avg(balance)=[AVG($0)]) + LogicalProject(avg(balance)=[$0], gender=[$1], age=[CASE(IS NULL($2), 'nil', <=($5, 10), $2, 'OTHER')]) + LogicalJoin(condition=[=($2, $3)], joinType=[left]) + LogicalProject(avg(balance)=[$2], gender=[$0], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + LogicalProject(age=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{1}], __grand_total__=[AVG($0)]) + LogicalProject(avg(balance)=[$2], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) + EnumerableAggregate(group=[{1, 2}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['nil'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], age=[$t10]) + EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) + EnumerableSort(sort0=[$2], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], gender=[$t0], age=[$t10]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[10], expr#4=[null:NULL], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]], PushDownContext=[[PROJECT->[gender, balance, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","balance","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], age=[$t0], __grand_total__=[$t7]) + EnumerableAggregate(group=[{1}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], age=[$t10]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[10], expr#4=[null:NULL], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]], PushDownContext=[[PROJECT->[gender, balance, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","balance","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_single_group_key.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_single_group_key.yaml new file mode 100644 index 00000000000..3752736138f --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_single_group_key.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$1], gender=[$0]) + LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(balance)=AVG($1)), PROJECT->[avg(balance), gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_timestamp_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_timestamp_span.yaml new file mode 100644 index 00000000000..9007f4da716 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_timestamp_span.yaml @@ -0,0 +1,26 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalAggregate(group=[{1, 2}], max(value)=[MAX($0)]) + LogicalProject(max(value)=[$0], timestamp=[$1], category=[CASE(IS NULL($2), 'NULL', <=($5, 10), $2, 'OTHER')]) + LogicalJoin(condition=[=($2, $3)], joinType=[left]) + LogicalProject(max(value)=[$2], timestamp=[$1], category=[$0]) + LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + LogicalProject(category=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{1}], __grand_total__=[MAX($0)]) + LogicalProject(max(value)=[$2], category=[$0]) + LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableAggregate(group=[{1, 2}], max(value)=[MAX($0)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], category=[$t10]) + EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},max(value)=MAX($1)), PROJECT->[max(value), timestamp0, category], SORT->[2]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":true,"missing_order":"last","order":"asc"}}},{"timestamp0":{"date_histogram":{"field":"timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1w"}}}]},"aggregations":{"max(value)":{"max":{"field":"value"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], category=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},__grand_total__=MAX($1))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"__grand_total__":{"max":{"field":"value"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_use_other.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_use_other.yaml new file mode 100644 index 00000000000..d6e799c3c36 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_use_other.yaml @@ -0,0 +1,26 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalAggregate(group=[{1, 2}], max(severityNumber)=[MAX($0)]) + LogicalProject(max(severityNumber)=[$0], flags=[$1], severityText=[CASE(IS NULL($2), 'NULL', <=($5, 2), $2, 'max_among_other')]) + LogicalJoin(condition=[=($2, $3)], joinType=[left]) + LogicalProject(max(severityNumber)=[$2], flags=[$0], severityText=[$1]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + LogicalProject(severityText=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{1}], __grand_total__=[MAX($0)]) + LogicalProject(max(severityNumber)=[$2], severityText=[$1]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableAggregate(group=[{1, 2}], max(severityNumber)=[MAX($0)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['NULL'], expr#7=[2], expr#8=[<=($t4, $t7)], expr#9=['max_among_other'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], severityText=[$t10]) + EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},max(severityNumber)=MAX($2)), PROJECT->[max(severityNumber), flags, severityText], SORT->[2]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"severityText":{"terms":{"field":"severityText","missing_bucket":true,"missing_order":"last","order":"asc"}}},{"flags":{"terms":{"field":"flags","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"max(severityNumber)":{"max":{"field":"severityNumber"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], severityText=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},__grand_total__=MAX($2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"severityText":{"terms":{"field":"severityText","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"__grand_total__":{"max":{"field":"severityNumber"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_limit.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_limit.yaml new file mode 100644 index 00000000000..3077f16152c --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_limit.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$2], state=[$0], gender=[$1]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(state=[$9], gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), PROJECT->[avg(balance), state, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_span.yaml new file mode 100644 index 00000000000..b6af45e0974 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_span.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(max(balance)=[$1], age=[$0]) + LogicalAggregate(group=[{1}], max(balance)=[MAX($0)]) + LogicalProject(balance=[$7], age0=[SPAN($10, 10, null:NULL)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},max(balance)=MAX($0)), PROJECT->[max(balance), age0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age0":{"histogram":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc","interval":10.0}}}]},"aggregations":{"max(balance)":{"max":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_group_keys.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_group_keys.yaml new file mode 100644 index 00000000000..11e45e502bf --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_group_keys.yaml @@ -0,0 +1,35 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalAggregate(group=[{1, 2}], avg(balance)=[AVG($0)]) + LogicalProject(avg(balance)=[$0], gender=[$1], age=[CASE(IS NULL($2), 'NULL', <=($5, 10), $2, 'OTHER')]) + LogicalJoin(condition=[=($2, $3)], joinType=[left]) + LogicalProject(avg(balance)=[$2], gender=[$0], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalProject(age=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{1}], __grand_total__=[AVG($0)]) + LogicalProject(avg(balance)=[$2], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) + EnumerableAggregate(group=[{1, 2}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], age=[$t10]) + EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) + EnumerableSort(sort0=[$2], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], gender=[$t0], age=[$t10]) + EnumerableAggregate(group=[{4, 10}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], age=[$t0], __grand_total__=[$t7]) + EnumerableAggregate(group=[{1}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], age=[$t10]) + EnumerableAggregate(group=[{4, 10}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_groups.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_groups.yaml new file mode 100644 index 00000000000..5d641cb3929 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_groups.yaml @@ -0,0 +1,35 @@ +logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalAggregate(group=[{1, 2}], avg(balance)=[AVG($0)]) + LogicalProject(avg(balance)=[$0], gender=[$1], age=[CASE(IS NULL($2), 'NULL', <=($5, 10), $2, 'OTHER')]) + LogicalJoin(condition=[=($2, $3)], joinType=[left]) + LogicalProject(avg(balance)=[$2], gender=[$0], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalProject(age=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{1}], __grand_total__=[AVG($0)]) + LogicalProject(avg(balance)=[$2], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + +physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) + EnumerableAggregate(group=[{1, 2}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], age=[$t10]) + EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) + EnumerableSort(sort0=[$2], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], gender=[$t0], age=[$t10]) + EnumerableAggregate(group=[{4, 10}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], age=[$t0], __grand_total__=[$t7]) + EnumerableAggregate(group=[{1}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], age=[$t10]) + EnumerableAggregate(group=[{4, 10}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_null_str.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_null_str.yaml new file mode 100644 index 00000000000..0c34016836b --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_null_str.yaml @@ -0,0 +1,37 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalAggregate(group=[{1, 2}], avg(balance)=[AVG($0)]) + LogicalProject(avg(balance)=[$0], gender=[$1], age=[CASE(IS NULL($2), 'nil', <=($5, 10), $2, 'OTHER')]) + LogicalJoin(condition=[=($2, $3)], joinType=[left]) + LogicalProject(avg(balance)=[$2], gender=[$0], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + LogicalProject(age=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{1}], __grand_total__=[AVG($0)]) + LogicalProject(avg(balance)=[$2], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) + EnumerableAggregate(group=[{1, 2}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['nil'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], age=[$t10]) + EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) + EnumerableSort(sort0=[$2], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], gender=[$t0], age=[$t10]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[10], expr#14=[null:NULL], expr#15=[SPAN($t5, $t13, $t14)], gender=[$t4], balance=[$t3], age0=[$t15]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], age=[$t0], __grand_total__=[$t7]) + EnumerableAggregate(group=[{1}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], age=[$t10]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[10], expr#14=[null:NULL], expr#15=[SPAN($t5, $t13, $t14)], gender=[$t4], balance=[$t3], age0=[$t15]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group.yaml new file mode 100644 index 00000000000..0cf28205b97 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group.yaml @@ -0,0 +1,13 @@ +logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$1], gender=[$0]) + LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + +physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], avg(balance)=[$t8], gender=[$t0]) + EnumerableAggregate(group=[{4}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group_key.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group_key.yaml new file mode 100644 index 00000000000..b9e6ff9f735 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group_key.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$1], gender=[$0]) + LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], avg(balance)=[$t8], gender=[$t0]) + EnumerableAggregate(group=[{4}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_timestamp_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_timestamp_span.yaml new file mode 100644 index 00000000000..913fd20b8bc --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_timestamp_span.yaml @@ -0,0 +1,32 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalAggregate(group=[{1, 2}], max(value)=[MAX($0)]) + LogicalProject(max(value)=[$0], timestamp=[$1], category=[CASE(IS NULL($2), 'NULL', <=($5, 10), $2, 'OTHER')]) + LogicalJoin(condition=[=($2, $3)], joinType=[left]) + LogicalProject(max(value)=[$2], timestamp=[$1], category=[$0]) + LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + LogicalProject(category=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{1}], __grand_total__=[MAX($0)]) + LogicalProject(max(value)=[$2], category=[$0]) + LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableAggregate(group=[{1, 2}], max(value)=[MAX($0)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], category=[$t10]) + EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) + EnumerableSort(sort0=[$2], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], max(value)=[$t2], timestamp=[$t1], category=[$t0]) + EnumerableAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + EnumerableCalc(expr#0..9=[{inputs}], expr#10=[1], expr#11=['w'], expr#12=[SPAN($t3, $t10, $t11)], category=[$t1], value=[$t2], timestamp0=[$t12]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], category=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{1}], __grand_total__=[MAX($2)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_use_other.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_use_other.yaml new file mode 100644 index 00000000000..022072f3ae7 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_use_other.yaml @@ -0,0 +1,30 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalAggregate(group=[{1, 2}], max(severityNumber)=[MAX($0)]) + LogicalProject(max(severityNumber)=[$0], flags=[$1], severityText=[CASE(IS NULL($2), 'NULL', <=($5, 2), $2, 'max_among_other')]) + LogicalJoin(condition=[=($2, $3)], joinType=[left]) + LogicalProject(max(severityNumber)=[$2], flags=[$0], severityText=[$1]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + LogicalProject(severityText=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{1}], __grand_total__=[MAX($0)]) + LogicalProject(max(severityNumber)=[$2], severityText=[$1]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableAggregate(group=[{1, 2}], max(severityNumber)=[MAX($0)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['NULL'], expr#7=[2], expr#8=[<=($t4, $t7)], expr#9=['max_among_other'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], severityText=[$t10]) + EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) + EnumerableSort(sort0=[$2], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], max(severityNumber)=[$t2], flags=[$t1], severityText=[$t0]) + EnumerableAggregate(group=[{7, 23}], max(severityNumber)=[MAX($163)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], severityText=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{7}], __grand_total__=[MAX($163)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_limit.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_limit.yaml new file mode 100644 index 00000000000..e4ff0a172ce --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_limit.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$2], state=[$0], gender=[$1]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(state=[$9], gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], avg(balance)=[$t9], state=[$t1], gender=[$t0]) + EnumerableAggregate(group=[{4, 9}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_span.yaml new file mode 100644 index 00000000000..6e8f8777170 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_span.yaml @@ -0,0 +1,14 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(max(balance)=[$1], age=[$0]) + LogicalAggregate(group=[{1}], max(balance)=[MAX($0)]) + LogicalProject(balance=[$7], age0=[SPAN($10, 10, null:NULL)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..1=[{inputs}], max(balance)=[$t1], age=[$t0]) + EnumerableAggregate(group=[{1}], max(balance)=[MAX($0)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[10], expr#20=[null:NULL], expr#21=[SPAN($t10, $t19, $t20)], balance=[$t7], age0=[$t21]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + From df809bd6a6b8e3c7db2f7e7f7f7bb34d08d9802d Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 24 Oct 2025 14:50:11 +0800 Subject: [PATCH 16/23] Remove unimplemented support for multiple aggregations in chart command Signed-off-by: Yuanchun Shen --- .../org/opensearch/sql/ast/tree/Chart.java | 2 +- .../sql/calcite/CalciteRelNodeVisitor.java | 58 +++++++++++-------- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 4 +- .../opensearch/sql/ppl/parser/AstBuilder.java | 8 +-- .../sql/ppl/parser/AstBuilderTest.java | 26 ++------- .../sql/ppl/utils/ArgumentFactoryTest.java | 7 +-- 6 files changed, 46 insertions(+), 59 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java b/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java index 02e0878e12d..d0f982edce6 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java @@ -34,7 +34,7 @@ public class Chart extends UnresolvedPlan { private UnresolvedPlan child; private UnresolvedExpression rowSplit; private UnresolvedExpression columnSplit; - private List aggregationFunctions; + private UnresolvedExpression aggregationFunction; private List arguments; @Override diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 0c011352539..35bd3ae0151 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2018,30 +2018,31 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { ArgumentMap argMap = ArgumentMap.of(node.getArguments()); List groupExprList = Stream.of(node.getRowSplit(), node.getColumnSplit()).filter(Objects::nonNull).toList(); - Boolean useNull = (Boolean) argMap.getOrDefault("usenull", Chart.DEFAULT_USE_NULL).getValue(); + ChartConfig config = ChartConfig.fromArguments(argMap); Aggregation aggregation = new Aggregation( - node.getAggregationFunctions(), + List.of(node.getAggregationFunction()), List.of(), groupExprList, null, - List.of(new Argument(Argument.BUCKET_NULLABLE, AstDSL.booleanLiteral(useNull)))); + List.of(new Argument(Argument.BUCKET_NULLABLE, AstDSL.booleanLiteral(config.useNull)))); RelNode aggregated = visitAggregation(aggregation, context); // If row or column split does not present or limit equals 0, this is the same as `stats agg - // [group by col]` + // [group by col]` because all truncating is performed on the column split Integer limit = (Integer) argMap.getOrDefault("limit", Chart.DEFAULT_LIMIT).getValue(); if (node.getRowSplit() == null || node.getColumnSplit() == null || Objects.equals(limit, 0)) { return aggregated; } - String aggFunctionName = getAggFunctionName(node.getAggregationFunctions().getFirst()); - Optional aggFuncNameOptional = BuiltinFunctionName.of(aggFunctionName); - if (aggFuncNameOptional.isEmpty()) { - throw new IllegalArgumentException( - StringUtils.format("Unrecognized aggregation function: %s", aggFunctionName)); - } - BuiltinFunctionName aggFunction = aggFuncNameOptional.get(); + String aggFunctionName = getAggFunctionName(node.getAggregationFunction()); + BuiltinFunctionName aggFunction = + BuiltinFunctionName.of(aggFunctionName) + .orElseThrow( + () -> + new IllegalArgumentException( + StringUtils.format( + "Unrecognized aggregation function: %s", aggFunctionName))); // Convert the column split to string if necessary: column split was supposed to be pivoted to // column names. This guarantees that its type compatibility with useother and usenull @@ -2058,12 +2059,6 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { relBuilder.project(relBuilder.field(0), relBuilder.field(1), colSplit); aggregated = relBuilder.peek(); - Boolean top = (Boolean) argMap.getOrDefault("top", Chart.DEFAULT_TOP).getValue(); - Boolean useOther = - (Boolean) argMap.getOrDefault("useother", Chart.DEFAULT_USE_OTHER).getValue(); - String otherStr = (String) argMap.getOrDefault("otherstr", Chart.DEFAULT_OTHER_STR).getValue(); - String nullStr = (String) argMap.getOrDefault("nullstr", Chart.DEFAULT_NULL_STR).getValue(); - // 0: agg; 2: column-split relBuilder.project(relBuilder.field(0), relBuilder.field(2)); // 1: column split; 0: agg @@ -2075,7 +2070,7 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { // Apply sorting: for MIN/EARLIEST, reverse the top/bottom logic boolean smallestFirst = aggFunction == BuiltinFunctionName.MIN || aggFunction == BuiltinFunctionName.EARLIEST; - if (top != smallestFirst) { + if (config.top != smallestFirst) { grandTotal = relBuilder.desc(grandTotal); } @@ -2108,26 +2103,26 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { relBuilder.literal(limit)); RexNode nullCondition = relBuilder.isNull(colSplitPostJoin); RexNode columnSplitExpr; - if (!useOther) { + if (!config.useOther) { relBuilder.filter(lteCondition); } - if (useNull) { + if (config.useNull) { columnSplitExpr = relBuilder.call( SqlStdOperatorTable.CASE, nullCondition, - relBuilder.literal(nullStr), + relBuilder.literal(config.nullStr), lteCondition, relBuilder.field(2), - relBuilder.literal(otherStr)); + relBuilder.literal(config.otherStr)); } else { columnSplitExpr = relBuilder.call( SqlStdOperatorTable.CASE, lteCondition, relBuilder.field(2), - relBuilder.literal(otherStr)); + relBuilder.literal(config.otherStr)); } String aggFieldName = relBuilder.peek().getRowType().getFieldNames().getFirst(); @@ -2141,6 +2136,21 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { return relBuilder.peek(); } + private record ChartConfig( + int limit, boolean top, boolean useOther, boolean useNull, String otherStr, String nullStr) { + static ChartConfig fromArguments(ArgumentMap argMap) { + int limit = (Integer) argMap.getOrDefault("limit", Chart.DEFAULT_LIMIT).getValue(); + boolean top = (Boolean) argMap.getOrDefault("top", Chart.DEFAULT_TOP).getValue(); + boolean useOther = + (Boolean) argMap.getOrDefault("useother", Chart.DEFAULT_USE_OTHER).getValue(); + boolean useNull = (Boolean) argMap.getOrDefault("usenull", Chart.DEFAULT_USE_NULL).getValue(); + String otherStr = + (String) argMap.getOrDefault("otherstr", Chart.DEFAULT_OTHER_STR).getValue(); + String nullStr = (String) argMap.getOrDefault("nullstr", Chart.DEFAULT_NULL_STR).getValue(); + return new ChartConfig(limit, top, useOther, useNull, otherStr, nullStr); + } + } + /** Transforms timechart command into SQL-based operations. */ @Override public RelNode visitTimechart( @@ -2150,7 +2160,7 @@ public RelNode visitTimechart( // Extract parameters UnresolvedExpression spanExpr = node.getBinExpression(); - List groupExprList = Arrays.asList(spanExpr); + List groupExprList; // Handle no by field case if (node.getByField() == null) { diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 77316e00c98..bcc7c285162 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -260,8 +260,8 @@ reverseCommand ; chartCommand - : CHART chartOptions* statsAggTerm (COMMA statsAggTerm)* (OVER rowSplit)? (BY columnSplit)? - | CHART chartOptions* statsAggTerm (COMMA statsAggTerm)* BY rowSplit (COMMA)? columnSplit + : CHART chartOptions* statsAggTerm (OVER rowSplit)? (BY columnSplit)? + | CHART chartOptions* statsAggTerm BY rowSplit (COMMA)? columnSplit ; chartOptions diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 654381be0f1..4c7b38704cc 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -605,15 +605,11 @@ public UnresolvedPlan visitChartCommand(OpenSearchPPLParser.ChartCommandContext UnresolvedExpression columnSplit = ctx.columnSplit() == null ? null : internalVisitExpression(ctx.columnSplit()); List arguments = ArgumentFactory.getArgumentList(ctx); - List aggList = parseAggTerms(ctx.statsAggTerm()); - if (aggList.size() > 1) { - throw new IllegalArgumentException( - "Chart command does not support multiple aggregation functions yet"); - } + UnresolvedExpression aggFunction = parseAggTerms(List.of(ctx.statsAggTerm())).getFirst(); return Chart.builder() .rowSplit(rowSplit) .columnSplit(columnSplit) - .aggregationFunctions(aggList) + .aggregationFunction(aggFunction) .arguments(arguments) .build(); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index a3cca06fbc7..d3584fd39ce 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -1277,7 +1277,7 @@ public void testChartCommandBasic() { Chart.builder() .child(relation("t")) .columnSplit(alias("age", field("age"))) - .aggregationFunctions(List.of(alias("count()", aggregate("count", AllFields.of())))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) .arguments(emptyList()) .build()); } @@ -1290,22 +1290,7 @@ public void testChartCommandWithRowSplit() { .child(relation("t")) .rowSplit(alias("status", field("status"))) .columnSplit(alias("age", field("age"))) - .aggregationFunctions(List.of(alias("count()", aggregate("count", AllFields.of())))) - .arguments(emptyList()) - .build()); - } - - @Test - public void testChartCommandWithMultipleAggregations() { - assertEqual( - "source=t | chart avg(salary), max(age) by department", - Chart.builder() - .child(relation("t")) - .columnSplit(alias("department", field("department"))) - .aggregationFunctions( - List.of( - alias("avg(salary)", aggregate("avg", field("salary"))), - alias("max(age)", aggregate("max", field("age"))))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) .arguments(emptyList()) .build()); } @@ -1317,7 +1302,7 @@ public void testChartCommandWithOptions() { Chart.builder() .child(relation("t")) .columnSplit(alias("status", field("status"))) - .aggregationFunctions(List.of(alias("count()", aggregate("count", AllFields.of())))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) .arguments( exprList( argument("limit", intLiteral(10)), @@ -1334,8 +1319,7 @@ public void testChartCommandWithAllOptions() { Chart.builder() .child(relation("t")) .columnSplit(alias("gender", field("gender"))) - .aggregationFunctions( - List.of(alias("avg(balance)", aggregate("avg", field("balance"))))) + .aggregationFunction(alias("avg(balance)", aggregate("avg", field("balance")))) .arguments( exprList( argument("limit", intLiteral(5)), @@ -1354,7 +1338,7 @@ public void testChartCommandWithBottomLimit() { Chart.builder() .child(relation("t")) .columnSplit(alias("category", field("category"))) - .aggregationFunctions(List.of(alias("count()", aggregate("count", AllFields.of())))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) .arguments( exprList(argument("limit", intLiteral(3)), argument("top", booleanLiteral(false)))) .build()); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java index e268656a8d9..f5b389146c7 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java @@ -20,7 +20,6 @@ import static org.opensearch.sql.ast.dsl.AstDSL.sort; import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; -import com.google.common.collect.ImmutableList; import org.junit.Test; import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.Argument; @@ -111,8 +110,7 @@ public void testChartCommandArguments() { Chart.builder() .child(relation("t")) .columnSplit(alias("age", field("age"))) - .aggregationFunctions( - ImmutableList.of(alias("count()", aggregate("count", AllFields.of())))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) .arguments( exprList( argument("limit", intLiteral(5)), @@ -131,8 +129,7 @@ public void testChartCommandBottomArguments() { Chart.builder() .child(relation("t")) .columnSplit(alias("status", field("status"))) - .aggregationFunctions( - ImmutableList.of(alias("count()", aggregate("count", AllFields.of())))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) .arguments( exprList(argument("limit", intLiteral(3)), argument("top", booleanLiteral(false)))) .build()); From dabb71074a115f613d8e1f069007ca480655d5d0 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Tue, 28 Oct 2025 20:03:10 +0800 Subject: [PATCH 17/23] Add unit tests for chart command Signed-off-by: Yuanchun Shen --- .../sql/ppl/calcite/CalcitePPLChartTest.java | 381 ++++++++++++++++++ 1 file changed, 381 insertions(+) create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java new file mode 100644 index 00000000000..c49f0344900 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java @@ -0,0 +1,381 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.DataContext; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.linq4j.Enumerable; +import org.apache.calcite.linq4j.Linq4j; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.schema.ScannableTable; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.test.CalciteAssert; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.Programs; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.junit.Test; +import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; +import org.opensearch.sql.ppl.parser.AstBuilder; + +public class CalcitePPLChartTest extends CalcitePPLAbstractTest { + + public CalcitePPLChartTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Override + protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) { + final SchemaPlus rootSchema = Frameworks.createRootSchema(true); + final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); + // Add events table for chart tests - similar to bank data used in integration tests + ImmutableList rows = + ImmutableList.of( + new Object[] {32838, "F", 28, "VA", java.sql.Timestamp.valueOf("2024-07-01 00:00:00")}, + new Object[] {40540, "F", 39, "PA", java.sql.Timestamp.valueOf("2024-07-01 00:01:00")}, + new Object[] {39225, "M", 32, "IL", java.sql.Timestamp.valueOf("2024-07-01 00:02:00")}, + new Object[] {4180, "M", 33, "MD", java.sql.Timestamp.valueOf("2024-07-01 00:03:00")}, + new Object[] {11052, "M", 36, "WA", java.sql.Timestamp.valueOf("2024-07-01 00:04:00")}, + new Object[] {48086, "F", 34, "IN", java.sql.Timestamp.valueOf("2024-07-01 00:05:00")}); + schema.add("bank", new BankTable(rows)); + + // Add time_data table for span tests + ImmutableList timeRows = + ImmutableList.of( + new Object[] {java.sql.Timestamp.valueOf("2025-07-28 00:00:00"), "A", 9367}, + new Object[] {java.sql.Timestamp.valueOf("2025-07-29 00:00:00"), "B", 9521}, + new Object[] {java.sql.Timestamp.valueOf("2025-07-30 00:00:00"), "C", 9187}, + new Object[] {java.sql.Timestamp.valueOf("2025-07-31 00:00:00"), "D", 8736}, + new Object[] {java.sql.Timestamp.valueOf("2025-08-01 00:00:00"), "A", 9015}); + schema.add("time_data", new TimeDataTable(timeRows)); + + return Frameworks.newConfigBuilder() + .parserConfig(SqlParser.Config.DEFAULT) + .defaultSchema(schema) + .traitDefs((List) null) + .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2)); + } + + @Test + public void testChartWithSingleGroupKey() { + String ppl = "source=bank | chart avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT AVG(`balance`) `avg(balance)`, `gender`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithOverSyntax() { + String ppl = "source=bank | chart avg(balance) over gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT AVG(`balance`) `avg(balance)`, `gender`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithMultipleGroupKeys() { + String ppl = "source=bank | chart avg(balance) over gender by age"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `t1`.`gender`, CASE WHEN `t1`.`age` IS NULL THEN 'NULL' WHEN `t6`.`__row_number__`" + + " <= 10 THEN `t1`.`age` ELSE 'OTHER' END `age`, AVG(`t1`.`avg(balance)`)" + + " `avg(balance)`\n" + + "FROM (SELECT AVG(`balance`) `avg(balance)`, `gender`, SAFE_CAST(`age` AS STRING)" + + " `age`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`, `age`) `t1`\n" + + "LEFT JOIN (SELECT `age`, AVG(`avg(balance)`) `__grand_total__`, ROW_NUMBER() OVER" + + " (ORDER BY AVG(`avg(balance)`) DESC) `__row_number__`\n" + + "FROM (SELECT AVG(`balance`) `avg(balance)`, SAFE_CAST(`age` AS STRING) `age`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`, `age`) `t4`\n" + + "GROUP BY `age`) `t6` ON `t1`.`age` = `t6`.`age`\n" + + "GROUP BY `t1`.`gender`, CASE WHEN `t1`.`age` IS NULL THEN 'NULL' WHEN" + + " `t6`.`__row_number__` <= 10 THEN `t1`.`age` ELSE 'OTHER' END"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithMultipleGroupKeysAlternativeSyntax() { + String ppl = "source=bank | chart avg(balance) by gender, age"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `t1`.`gender`, CASE WHEN `t1`.`age` IS NULL THEN 'NULL' WHEN `t6`.`__row_number__`" + + " <= 10 THEN `t1`.`age` ELSE 'OTHER' END `age`, AVG(`t1`.`avg(balance)`)" + + " `avg(balance)`\n" + + "FROM (SELECT AVG(`balance`) `avg(balance)`, `gender`, SAFE_CAST(`age` AS STRING)" + + " `age`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`, `age`) `t1`\n" + + "LEFT JOIN (SELECT `age`, AVG(`avg(balance)`) `__grand_total__`, ROW_NUMBER() OVER" + + " (ORDER BY AVG(`avg(balance)`) DESC) `__row_number__`\n" + + "FROM (SELECT AVG(`balance`) `avg(balance)`, SAFE_CAST(`age` AS STRING) `age`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`, `age`) `t4`\n" + + "GROUP BY `age`) `t6` ON `t1`.`age` = `t6`.`age`\n" + + "GROUP BY `t1`.`gender`, CASE WHEN `t1`.`age` IS NULL THEN 'NULL' WHEN" + + " `t6`.`__row_number__` <= 10 THEN `t1`.`age` ELSE 'OTHER' END"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithLimit() { + String ppl = "source=bank | chart limit=2 avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT AVG(`balance`) `avg(balance)`, `gender`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithLimitZero() { + String ppl = "source=bank | chart limit=0 avg(balance) over state by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT AVG(`balance`) `avg(balance)`, `state`, `gender`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `state`, `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithSpan() { + String ppl = "source=bank | chart max(balance) by age span=10"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT MAX(`balance`) `max(balance)`, `SPAN`(`age`, 10, NULL) `age`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `SPAN`(`age`, 10, NULL)"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithTimeSpan() { + String ppl = "source=time_data | chart max(value) over timestamp span=1week by category"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `t1`.`timestamp`, CASE WHEN `t1`.`category` IS NULL THEN 'NULL' WHEN" + + " `t6`.`__row_number__` <= 10 THEN `t1`.`category` ELSE 'OTHER' END `category`," + + " MAX(`t1`.`max(value)`) `max(value)`\n" + + "FROM (SELECT MAX(`value`) `max(value)`, `SPAN`(`timestamp`, 1, 'w') `timestamp`," + + " `category`\n" + + "FROM `scott`.`time_data`\n" + + "GROUP BY `category`, `SPAN`(`timestamp`, 1, 'w')) `t1`\n" + + "LEFT JOIN (SELECT `category`, MAX(`max(value)`) `__grand_total__`, ROW_NUMBER() OVER" + + " (ORDER BY MAX(`max(value)`) DESC) `__row_number__`\n" + + "FROM (SELECT MAX(`value`) `max(value)`, `category`\n" + + "FROM `scott`.`time_data`\n" + + "GROUP BY `category`, `SPAN`(`timestamp`, 1, 'w')) `t4`\n" + + "GROUP BY `category`) `t6` ON `t1`.`category` = `t6`.`category`\n" + + "GROUP BY `t1`.`timestamp`, CASE WHEN `t1`.`category` IS NULL THEN 'NULL' WHEN" + + " `t6`.`__row_number__` <= 10 THEN `t1`.`category` ELSE 'OTHER' END"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithUseOtherTrue() { + String ppl = "source=bank | chart useother=true avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT AVG(`balance`) `avg(balance)`, `gender`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithUseOtherFalse() { + String ppl = "source=bank | chart useother=false limit=2 avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT AVG(`balance`) `avg(balance)`, `gender`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithOtherStr() { + String ppl = "source=bank | chart limit=1 otherstr='other_values' avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT AVG(`balance`) `avg(balance)`, `gender`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithNullStr() { + String ppl = "source=bank | chart nullstr='null_values' avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT AVG(`balance`) `avg(balance)`, `gender`\n" + + "FROM `scott`.`bank`\n" + + "GROUP BY `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithUseNull() { + String ppl = "source=bank | chart usenull=false avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT AVG(`balance`) `avg(balance)`, `gender`\n" + + "FROM `scott`.`bank`\n" + + "WHERE `gender` IS NOT NULL\n" + + "GROUP BY `gender`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + private UnresolvedPlan parsePPL(String query) { + PPLSyntaxParser parser = new PPLSyntaxParser(); + AstBuilder astBuilder = new AstBuilder(query); + return astBuilder.visit(parser.parse(query)); + } + + @RequiredArgsConstructor + public static class BankTable implements ScannableTable { + private final ImmutableList rows; + + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("balance", SqlTypeName.INTEGER) + .nullable(true) + .add("gender", SqlTypeName.VARCHAR) + .nullable(true) + .add("age", SqlTypeName.INTEGER) + .nullable(true) + .add("state", SqlTypeName.VARCHAR) + .nullable(true) + .add("timestamp", SqlTypeName.TIMESTAMP) + .nullable(true) + .build(); + + @Override + public Enumerable<@Nullable Object[]> scan(DataContext root) { + return Linq4j.asEnumerable(rows); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } + + @RequiredArgsConstructor + public static class TimeDataTable implements ScannableTable { + private final ImmutableList rows; + + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("timestamp", SqlTypeName.TIMESTAMP) + .nullable(true) + .add("category", SqlTypeName.VARCHAR) + .nullable(true) + .add("value", SqlTypeName.INTEGER) + .nullable(true) + .build(); + + @Override + public Enumerable<@Nullable Object[]> scan(DataContext root) { + return Linq4j.asEnumerable(rows); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } +} From dbaa7f813be51f27035a152430ace57d7f71db68 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Wed, 29 Oct 2025 09:55:18 +0800 Subject: [PATCH 18/23] Remove irrelevant yaml test Signed-off-by: Yuanchun Shen --- .../rest-api-spec/test/issues/4582.yml | 120 ------------------ 1 file changed, 120 deletions(-) delete mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4582.yml diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4582.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4582.yml deleted file mode 100644 index 27973484d6c..00000000000 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4582.yml +++ /dev/null @@ -1,120 +0,0 @@ -setup: - - do: - query.settings: - body: - transient: - plugins.calcite.enabled : true - - do: - indices.create: - index: test_timechart_4582 - body: - mappings: - properties: - "@timestamp": - type: date_nanos - severityNumber: - type: long - severityText: - type: keyword - body: - type: text - - do: - bulk: - index: test_timechart_4582 - refresh: true - body: - - '{"index": {}}' - - '{"@timestamp": "2024-01-15T10:30:04.567890123Z", "severityNumber": 9, "severityText": "INFO", "body": "Info message"}' - - '{"index": {}}' - - '{"@timestamp": "2024-01-15T10:30:05.567890123Z", "severityNumber": 13, "severityText": "WARN", "body": "Warning message"}' - - '{"index": {}}' - - '{"@timestamp": "2024-01-15T10:30:06.567890123Z", "severityNumber": 17, "severityText": "ERROR", "body": "Error message"}' - - '{"index": {}}' - - '{"@timestamp": "2024-01-15T10:30:07.567890123Z", "severityNumber": 21, "severityText": "FATAL", "body": "Fatal message"}' - - '{"index": {}}' - - '{"@timestamp": "2024-01-15T10:30:08.567890123Z", "severityNumber": 24, "severityText": "FATAL4", "body": "Fatal4 message"}' - - '{"index": {}}' - - '{"@timestamp": "2024-01-15T10:30:09.567890123Z", "severityNumber": 23, "severityText": "DEBUG", "body": "Debug message"}' - - '{"index": {}}' - - '{"@timestamp": "2024-01-15T10:30:10.567890123Z", "severityNumber": 20, "severityText": "TRACE", "body": "Trace message"}' - - '{"index": {}}' - - '{"@timestamp": "2024-01-15T10:30:11.567890123Z", "severityNumber": 22, "severityText": "CUSTOM", "body": "Custom message"}' - ---- -teardown: - - do: - query.settings: - body: - transient: - plugins.calcite.enabled : false - ---- -"timechart max aggregation with limit should not sum OTHER values": - - skip: - features: - - headers - - allowed_warnings - - do: - headers: - Content-Type: 'application/json' - ppl: - body: - query: source=test_timechart_4582 | timechart limit=1 span=10seconds max(severityNumber) by severityText - - - match: { total: 3 } - - match: { "schema": [{"name": "@timestamp", "type": "timestamp"}, {"name": "severityText", "type": "string"}, {"name": "max(severityNumber)", "type": "bigint"}] } - - match: { "datarows": [["2024-01-15 10:30:00", "FATAL4", 24], ["2024-01-15 10:30:00", "OTHER", 23], ["2024-01-15 10:30:10", "OTHER",22]] } - ---- -"timechart min aggregation with limit should not sum OTHER values": - - skip: - features: - - headers - - allowed_warnings - - do: - headers: - Content-Type: 'application/json' - ppl: - body: - query: source=test_timechart_4582 | timechart limit=2 span=1d min(severityNumber) by severityText - - - match: { total: 3 } - - match: { "schema": [{"name": "@timestamp", "type": "timestamp"}, {"name": "severityText", "type": "string"}, {"name": "min(severityNumber)", "type": "bigint"}] } - - match: { "datarows": [["2024-01-15 00:00:00", "INFO", 9], ["2024-01-15 00:00:00", "OTHER", 17], ["2024-01-15 00:00:00", "WARN", 13]] } - ---- -"timechart earliest aggregation with limit should not sum OTHER values": - - skip: - features: - - headers - - allowed_warnings - - do: - headers: - Content-Type: 'application/json' - ppl: - body: - query: source=test_timechart_4582 | timechart limit=2 span=30seconds earliest(@timestamp) by severityText - - - match: { total: 3 } - - match: { "schema": [{"name": "@timestamp", "type": "timestamp"}, {"name": "severityText", "type": "string"}, {"name": "earliest(@timestamp)", "type": "timestamp"}] } - - match: { "datarows": [ - ["2024-01-15 10:30:00", "INFO", "2024-01-15 10:30:04.567890123"], - ["2024-01-15 10:30:00", "OTHER", "2024-01-15 10:30:06.567890123"], - ["2024-01-15 10:30:00", "WARN", "2024-01-15 10:30:05.567890123"]] } - ---- -"timechart count aggregation with limit should sum OTHER values": - - skip: - features: - - headers - - allowed_warnings - - do: - headers: - Content-Type: 'application/json' - ppl: - body: - query: source=test_timechart_4582 | timechart limit=3 span=1min count() by severityText - - - match: { total: 4 } - - match: { "schema": [{"name": "@timestamp", "type": "timestamp"}, {"name": "severityText", "type": "string"}, {"name": "count", "type": "bigint"}] } - - match: { "datarows": [["2024-01-15 10:30:00", "CUSTOM", 1], ["2024-01-15 10:30:00", "DEBUG", 1], ["2024-01-15 10:30:00", "ERROR", 1], ["2024-01-15 10:30:00", "OTHER", 5]] } From 1a6c5c5450792c0f9f6842d9bc8d159da619b516 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Wed, 29 Oct 2025 15:50:34 +0800 Subject: [PATCH 19/23] Tweak chart.rst Signed-off-by: Yuanchun Shen --- docs/user/ppl/cmd/chart.rst | 114 ++++++++++++++++++++---------------- 1 file changed, 64 insertions(+), 50 deletions(-) diff --git a/docs/user/ppl/cmd/chart.rst b/docs/user/ppl/cmd/chart.rst index 1633302676a..ed5058f4f38 100644 --- a/docs/user/ppl/cmd/chart.rst +++ b/docs/user/ppl/cmd/chart.rst @@ -1,6 +1,6 @@ -============= +===== chart -============= +===== .. rubric:: Table of contents @@ -10,7 +10,7 @@ chart Description -============ +=========== The ``chart`` command transforms search results by applying a statistical aggregation function and optionally grouping the data by one or two fields. The results are suitable for visualization as a two-dimension chart when grouping by two fields, where unique values in the second group key can be pivoted to column names. @@ -19,7 +19,7 @@ Version 3.4.0 Syntax -============ +====== .. code-block:: text @@ -34,16 +34,16 @@ Syntax * Default: 10 * Syntax: ``limit=(top|bottom) `` or ``limit=`` (defaults to top) - * When there are more distinct values than the limit, the additional values are grouped into an "OTHER" category if useother is not set to false. + * When there are more distinct column split values than the limit, the additional values are grouped into an "OTHER" category if ``useother`` is not set to false. * Set to 0 to show all distinct values without any limit. - * Only applies when using column split (over...by clause). + * Only applies when column split presents (by 2 fields or over...by... coexists). -* **useother**: optional. Controls whether to create an "OTHER" category for values beyond the limit. +* **useother**: optional. Controls whether to create an "OTHER" category for distinct column values beyond the limit. * Default: true - * When set to false, only the top/bottom N values (based on limit) are shown without an "OTHER" category. - * When set to true, values beyond the limit are grouped into an "OTHER" category. - * Only applies when using column split and when there are more distinct values than the limit. + * When set to false, only the top/bottom N distinct values (based on limit) are shown without an "OTHER" category. + * When set to true, distinct values beyond the limit are grouped into an "OTHER" category. + * Only applies when using column split and when there are more distinct column values than the limit. * **usenull**: optional. Controls whether to include null values as a separate category. @@ -51,45 +51,42 @@ Syntax * When set to false, events with null values in the split-by field are excluded from results. * When set to true, null values appear as a separate category. -* **nullstr**: optional. Specifies the string to display for null values. +* **nullstr**: optional. Specifies the category name for rows that do not contain the column split value. * Default: "NULL" - * Only applies when usenull is set to true. + * Only applies when ``usenull`` is set to true. -* **otherstr**: optional. Specifies the string to display for the "OTHER" category. +* **otherstr**: optional. Specifies the category name for the "OTHER" category. * Default: "OTHER" - * Only applies when useother is set to true and there are values beyond the limit. + * Only applies when ``useother`` is set to true and there are values beyond the limit. * **aggregation_function**: mandatory. The aggregation function to apply to the data. * Currently, only a single aggregation function is supported. - * Available functions: All aggregation functions supported by the :doc:`stats ` command. + * Available functions: aggregation functions supported by the `stats `_ command. -* **by**: optional. Groups the results by the specified field as rows. +* **by**: optional. Groups the results by either one field (row split) or two fields (row split and column split) + * ``limit``, ``useother``, and ``usenull`` apply to the column split + * Results are returned as individual rows for each combination. * If not specified, the aggregation is performed across all documents. -* **over...by**: optional. Alternative syntax for grouping by multiple fields. +* **over...by...**: optional. Alternative syntax for grouping by multiple fields. * ``over by `` groups the results by both fields. - * The row_split field becomes the primary grouping dimension. - * The column_split field becomes the secondary grouping dimension. - * Results are returned as individual rows for each combination. + * Using ``over`` alone on one field is equivalent to ``by `` Notes ===== -* The ``chart`` command transforms results into a table format suitable for visualization. -* When using multiple grouping fields (over...by syntax), the output contains individual rows for each combination of the grouping fields. -* The limit parameter determines how many columns to show when there are many distinct values. -* Results are ordered by the aggregated values to determine top/bottom selections. +* The column split field in the result will become strings so that they are compatible with ``nullstr`` and ``otherstr`` and can be used as column names once pivoted. Examples ======== Example 1: Basic aggregation without grouping -============================================== +--------------------------------------------- This example calculates the average balance across all accounts. @@ -104,7 +101,7 @@ PPL query:: +--------------+ Example 2: Group by single field -================================= +-------------------------------- This example calculates the count of accounts grouped by gender. @@ -120,7 +117,7 @@ PPL query:: +---------+--------+ Example 3: Using over and by for multiple field grouping -======================================================== +-------------------------------------------------------- This example shows average balance grouped by both gender and age fields. @@ -138,7 +135,7 @@ PPL query:: +--------+-----+--------------+ Example 4: Using basic limit functionality -======================================== +------------------------------------------ This example limits the results to show only the top 1 age group. @@ -155,40 +152,57 @@ PPL query:: +--------+-------+---------+ Example 5: Using limit with other parameters -============================================= +-------------------------------------------- This example shows using limit with useother and custom otherstr parameters. PPL query:: - os> source=accounts | chart limit=top 2 useother=true otherstr='remaining_accounts' max(balance) over state by gender + os> source=accounts | chart limit=top 1 useother=true otherstr='minor_gender' count() over state by gender + fetched rows / total rows = 4/4 + +-------+--------------+---------+ + | state | gender | count() | + |-------+--------------+---------| + | TN | M | 1 | + | MD | M | 1 | + | VA | minor_gender | 1 | + | IL | M | 1 | + +-------+--------------+---------+ + +Example 6: Using null parameters +-------------------------------- + +This example shows using limit with usenull and custom nullstr parameters. + +PPL query:: + + os> source=accounts | chart usenull=true nullstr='employer not specified' count() over firstname by employer fetched rows / total rows = 4/4 - +-------+--------+--------------+ - | state | gender | max(balance) | - |-------+--------+--------------| - | TN | M | 5686 | - | MD | M | 4180 | - | IL | M | 39225 | - | VA | F | 32838 | - +-------+--------+--------------+ - -Example 6: Using span with chart command -======================================= + +-----------+------------------------+---------+ + | firstname | employer | count() | + |-----------+------------------------+---------| + | Nanette | Quility | 1 | + | Amber | Pyrami | 1 | + | Dale | employer not specified | 1 | + | Hattie | Netagy | 1 | + +-----------+------------------------+---------+ + +Example 7: Using chart command with span +---------------------------------------- This example demonstrates using span for grouping age ranges. PPL query:: - os> source=accounts | chart max(balance) by age span=10 + os> source=accounts | chart max(balance) by age span=10, gender fetched rows / total rows = 2/2 - +--------------+-----+ - | max(balance) | age | - |--------------+-----| - | 32838 | 20 | - | 39225 | 30 | - +--------------+-----+ + +-----+--------+--------------+ + | age | gender | max(balance) | + |-----+--------+--------------| + | 30 | M | 39225 | + | 20 | F | 32838 | + +-----+--------+--------------+ Limitations -============ +=========== * Only a single aggregation function is supported per chart command. -* When using both row and column splits, the column split field is converted to string type so that it can be used as column names. \ No newline at end of file From 93e3d0314ce8d0a1719203bb220cb3cc609b57b2 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Wed, 29 Oct 2025 16:52:59 +0800 Subject: [PATCH 20/23] Swap the order of chart output to ensure metrics come last Signed-off-by: Yuanchun Shen --- .../sql/calcite/CalciteRelNodeVisitor.java | 72 ++++++++++------ docs/user/ppl/cmd/chart.rst | 12 +-- .../calcite/remote/CalciteChartCommandIT.java | 82 +++++++++---------- 3 files changed, 93 insertions(+), 73 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 4e31965d8ba..63af492b6f7 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -1108,6 +1108,19 @@ private Pair, List> resolveAttributesForAggregation( @Override public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { + visitAggregation(node, context, true); + return context.relBuilder.peek(); + } + + /** + * Visits an aggregation node and builds the corresponding Calcite RelNode. + * + * @param node the aggregation node containing group expressions and aggregation functions + * @param context the Calcite plan context for building RelNodes + * @param aggFirst if true, aggregation results (metrics) appear first in output schema (agg, + * group-by fields); if false, group expressions appear first (group-by fields, agg). + */ + private void visitAggregation(Aggregation node, CalcitePlanContext context, boolean aggFirst) { visitChildren(node, context); List aggExprList = node.getAggExprList(); @@ -1152,8 +1165,6 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { aggregateWithTrimming(groupExprList, aggExprList, context, toAddHintsOnAggregate); // schema reordering - // As an example, in command `stats count() by colA, colB`, - // the sequence of output schema is "count, colA, colB". List outputFields = context.relBuilder.fields(); int numOfOutputFields = outputFields.size(); int numOfAggList = aggExprList.size(); @@ -1161,8 +1172,6 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { // Add aggregation results first List aggRexList = outputFields.subList(numOfOutputFields - numOfAggList, numOfOutputFields); - reordered.addAll(aggRexList); - // Add group by columns List aliasedGroupByList = aggregationAttributes.getLeft().stream() .map(this::extractAliasLiteral) @@ -1171,10 +1180,17 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { .map(context.relBuilder::field) .map(f -> (RexNode) f) .toList(); - reordered.addAll(aliasedGroupByList); + if (aggFirst) { + // As an example, in command `stats count() by colA, colB`, + // the sequence of output schema is "count, colA, colB". + reordered.addAll(aggRexList); + // Add group by columns + reordered.addAll(aliasedGroupByList); + } else { + reordered.addAll(aliasedGroupByList); + reordered.addAll(aggRexList); + } context.relBuilder.project(reordered); - - return context.relBuilder.peek(); } private Optional getTimeSpanField(UnresolvedExpression expr) { @@ -2038,7 +2054,13 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { groupExprList, null, List.of(new Argument(Argument.BUCKET_NULLABLE, AstDSL.booleanLiteral(config.useNull)))); - RelNode aggregated = visitAggregation(aggregation, context); + visitAggregation(aggregation, context, false); + RelBuilder relBuilder = context.relBuilder; + String columnSplitName = + relBuilder.peek().getRowType().getFieldNames().size() > 2 + ? relBuilder.peek().getRowType().getFieldNames().get(1) + : null; + RelNode aggregated = context.relBuilder.peek(); // If row or column split does not present or limit equals 0, this is the same as `stats agg // [group by col]` because all truncating is performed on the column split @@ -2058,9 +2080,8 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { // Convert the column split to string if necessary: column split was supposed to be pivoted to // column names. This guarantees that its type compatibility with useother and usenull - RelBuilder relBuilder = context.relBuilder; - RexNode colSplit = relBuilder.field(2); - String columSplitName = relBuilder.peek().getRowType().getFieldNames().getLast(); + RexNode colSplit = relBuilder.field(1); + String columSplitName = relBuilder.peek().getRowType().getFieldNames().get(1); if (!SqlTypeUtil.isCharacter(colSplit.getType())) { colSplit = relBuilder.alias( @@ -2068,15 +2089,14 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { UserDefinedFunctionUtils.NULLABLE_STRING, colSplit, true, true), columSplitName); } - relBuilder.project(relBuilder.field(0), relBuilder.field(1), colSplit); + relBuilder.project(relBuilder.field(0), colSplit, relBuilder.field(2)); aggregated = relBuilder.peek(); - // 0: agg; 2: column-split - relBuilder.project(relBuilder.field(0), relBuilder.field(2)); - // 1: column split; 0: agg + // 1: column-split, 2: agg + relBuilder.project(relBuilder.field(1), relBuilder.field(2)); relBuilder.aggregate( - relBuilder.groupKey(relBuilder.field(1)), - buildAggCall(context.relBuilder, aggFunction, relBuilder.field(0)) + relBuilder.groupKey(relBuilder.field(0)), + buildAggCall(context.relBuilder, aggFunction, relBuilder.field(1)) .as("__grand_total__")); // results: group key, agg calls RexNode grandTotal = relBuilder.field("__grand_total__"); // Apply sorting: for MIN/EARLIEST, reverse the top/bottom logic @@ -2105,9 +2125,9 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { // on column-split = group key relBuilder.join( - JoinRelType.LEFT, relBuilder.equals(relBuilder.field(2, 0, 2), relBuilder.field(2, 1, 0))); + JoinRelType.LEFT, relBuilder.equals(relBuilder.field(2, 0, 1), relBuilder.field(2, 1, 0))); - RexNode colSplitPostJoin = relBuilder.field(2); + RexNode colSplitPostJoin = relBuilder.field(1); RexNode lteCondition = relBuilder.call( SqlStdOperatorTable.LESS_THAN_OR_EQUAL, @@ -2126,25 +2146,25 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { nullCondition, relBuilder.literal(config.nullStr), lteCondition, - relBuilder.field(2), + relBuilder.field(1), // col split relBuilder.literal(config.otherStr)); } else { columnSplitExpr = relBuilder.call( SqlStdOperatorTable.CASE, lteCondition, - relBuilder.field(2), + relBuilder.field(1), relBuilder.literal(config.otherStr)); } - String aggFieldName = relBuilder.peek().getRowType().getFieldNames().getFirst(); + String aggFieldName = relBuilder.peek().getRowType().getFieldNames().get(2); relBuilder.project( relBuilder.field(0), - relBuilder.field(1), - relBuilder.alias(columnSplitExpr, columSplitName)); + relBuilder.alias(columnSplitExpr, columnSplitName), + relBuilder.field(2)); relBuilder.aggregate( - relBuilder.groupKey(relBuilder.field(1), relBuilder.field(2)), - buildAggCall(context.relBuilder, aggFunction, relBuilder.field(0)).as(aggFieldName)); + relBuilder.groupKey(relBuilder.field(0), relBuilder.field(1)), + buildAggCall(context.relBuilder, aggFunction, relBuilder.field(2)).as(aggFieldName)); return relBuilder.peek(); } diff --git a/docs/user/ppl/cmd/chart.rst b/docs/user/ppl/cmd/chart.rst index ed5058f4f38..ed1cd2a453e 100644 --- a/docs/user/ppl/cmd/chart.rst +++ b/docs/user/ppl/cmd/chart.rst @@ -109,12 +109,12 @@ PPL query:: os> source=accounts | chart count() by gender fetched rows / total rows = 2/2 - +---------+--------+ - | count() | gender | - |---------+--------| - | 1 | F | - | 3 | M | - +---------+--------+ + +--------+---------+ + | gender | count() | + |--------+---------| + | F | 1 | + | M | 3 | + +--------+---------+ Example 3: Using over and by for multiple field grouping -------------------------------------------------------- diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java index ce6a63e3c24..310e8451b22 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java @@ -35,8 +35,8 @@ public void init() throws Exception { public void testChartWithSingleGroupKey() throws IOException { JSONObject result1 = executeQuery(String.format("source=%s | chart avg(balance) by gender", TEST_INDEX_BANK)); - verifySchema(result1, schema("avg(balance)", "double"), schema("gender", "string")); - verifyDataRows(result1, rows(40488, "F"), rows(16377.25, "M")); + verifySchema(result1, schema("gender", "string"), schema("avg(balance)", "double")); + verifyDataRows(result1, rows("F", 40488), rows("M", 16377.25)); JSONObject result2 = executeQuery(String.format("source=%s | chart avg(balance) over gender", TEST_INDEX_BANK)); assertJsonEquals(result1.toString(), result2.toString()); @@ -74,18 +74,18 @@ public void testChartCombineOverByWithLimit0() throws IOException { "source=%s | chart limit=0 avg(balance) over state by gender", TEST_INDEX_BANK)); verifySchema( result, - schema("avg(balance)", "double"), schema("state", "string"), - schema("gender", "string")); + schema("gender", "string"), + schema("avg(balance)", "double")); verifyDataRows( result, - rows(39225.0, "IL", "M"), - rows(48086.0, "IN", "F"), - rows(4180.0, "MD", "M"), - rows(40540.0, "PA", "F"), - rows(5686.0, "TN", "M"), - rows(32838.0, "VA", "F"), - rows(16418.0, "WA", "M")); + rows("IL", "M", 39225.0), + rows("IN", "F", 48086.0), + rows("MD", "M", 4180.0), + rows("PA", "F", 40540.0), + rows("TN", "M", 5686.0), + rows("VA", "F", 32838.0), + rows("WA", "M", 16418.0)); } @Test @@ -93,8 +93,8 @@ public void testChartMaxBalanceByAgeSpan() throws IOException { JSONObject result = executeQuery( String.format("source=%s | chart max(balance) by age span=10", TEST_INDEX_BANK)); - verifySchema(result, schema("max(balance)", "bigint"), schema("age", "int")); - verifyDataRows(result, rows(32838, 20), rows(48086, 30)); + verifySchema(result, schema("age", "int"), schema("max(balance)", "bigint")); + verifyDataRows(result, rows(20, 32838), rows(30, 48086)); } @Test @@ -172,37 +172,37 @@ public void testChartLimit0WithUseOther() throws IOException { TEST_INDEX_OTEL_LOGS)); verifySchema( result, - schema("max(severityNumber)", "bigint"), schema("flags", "bigint"), - schema("severityText", "string")); + schema("severityText", "string"), + schema("max(severityNumber)", "bigint")); verifyDataRows( result, - rows(5, 0, "DEBUG"), - rows(6, 0, "DEBUG2"), - rows(7, 0, "DEBUG3"), - rows(8, 0, "DEBUG4"), - rows(17, 0, "ERROR"), - rows(18, 0, "ERROR2"), - rows(19, 0, "ERROR3"), - rows(20, 0, "ERROR4"), - rows(21, 0, "FATAL"), - rows(22, 0, "FATAL2"), - rows(23, 0, "FATAL3"), - rows(24, 0, "FATAL4"), - rows(9, 0, "INFO"), - rows(10, 0, "INFO2"), - rows(11, 0, "INFO3"), - rows(12, 0, "INFO4"), - rows(2, 0, "TRACE2"), - rows(3, 0, "TRACE3"), - rows(4, 0, "TRACE4"), - rows(13, 0, "WARN"), - rows(14, 0, "WARN2"), - rows(15, 0, "WARN3"), - rows(16, 0, "WARN4"), - rows(17, 1, "ERROR"), - rows(9, 1, "INFO"), - rows(1, 1, "TRACE")); + rows(0, "DEBUG", 5), + rows(0, "DEBUG2", 6), + rows(0, "DEBUG3", 7), + rows(0, "DEBUG4", 8), + rows(0, "ERROR", 17), + rows(0, "ERROR2", 18), + rows(0, "ERROR3", 19), + rows(0, "ERROR4", 20), + rows(0, "FATAL", 21), + rows(0, "FATAL2", 22), + rows(0, "FATAL3", 23), + rows(0, "FATAL4", 24), + rows(0, "INFO", 9), + rows(0, "INFO2", 10), + rows(0, "INFO3", 11), + rows(0, "INFO4", 12), + rows(0, "TRACE2", 2), + rows(0, "TRACE3", 3), + rows(0, "TRACE4", 4), + rows(0, "WARN", 13), + rows(0, "WARN2", 14), + rows(0, "WARN3", 15), + rows(0, "WARN4", 16), + rows(1, "ERROR", 17), + rows(1, "INFO", 9), + rows(1, "TRACE", 1)); } @Test From 86b4cb3170137c05a2071ce0c94a7a0d658af7f0 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Wed, 29 Oct 2025 19:38:37 +0800 Subject: [PATCH 21/23] Filter rows without col split when calculate grand total Signed-off-by: Yuanchun Shen --- .../sql/calcite/CalciteRelNodeVisitor.java | 12 +- .../sql/calcite/CalciteNoPushdownIT.java | 165 +++++++++--------- .../calcite/remote/CalciteChartCommandIT.java | 20 +++ .../explain_chart_multiple_group_keys.yaml | 35 ++-- .../calcite/explain_chart_null_str.yaml | 33 ++-- .../explain_chart_single_group_key.yaml | 9 +- .../calcite/explain_chart_timestamp_span.yaml | 29 +-- .../calcite/explain_chart_use_other.yaml | 34 ++-- .../calcite/explain_chart_with_limit.yaml | 9 +- .../calcite/explain_chart_with_span.yaml | 4 +- .../explain_chart_multiple_group_keys.yaml | 34 ++-- .../explain_chart_null_str.yaml | 34 ++-- .../explain_chart_single_group_key.yaml | 10 +- .../explain_chart_timestamp_span.yaml | 33 ++-- .../explain_chart_use_other.yaml | 40 +++-- .../explain_chart_with_limit.yaml | 10 +- .../explain_chart_with_span.yaml | 10 +- .../sql/ppl/calcite/CalcitePPLChartTest.java | 59 ++++--- 18 files changed, 303 insertions(+), 277 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 63af492b6f7..eb73fbceea2 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2094,11 +2094,14 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { // 1: column-split, 2: agg relBuilder.project(relBuilder.field(1), relBuilder.field(2)); + // Make sure that rows who don't have a column split not interfere grand total calculation + relBuilder.filter(relBuilder.isNotNull(relBuilder.field(0))); + final String GRAND_TOTAL_COL = "__grand_total__"; relBuilder.aggregate( relBuilder.groupKey(relBuilder.field(0)), buildAggCall(context.relBuilder, aggFunction, relBuilder.field(1)) - .as("__grand_total__")); // results: group key, agg calls - RexNode grandTotal = relBuilder.field("__grand_total__"); + .as(GRAND_TOTAL_COL)); // results: group key, agg calls + RexNode grandTotal = relBuilder.field(GRAND_TOTAL_COL); // Apply sorting: for MIN/EARLIEST, reverse the top/bottom logic boolean smallestFirst = aggFunction == BuiltinFunctionName.MIN || aggFunction == BuiltinFunctionName.EARLIEST; @@ -2108,6 +2111,7 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { // Always set it to null last so that it does not interfere with top / bottom calculation grandTotal = relBuilder.nullsLast(grandTotal); + final String ROW_NUM_COL = "__row_number__"; RexNode rowNum = PlanUtils.makeOver( context, @@ -2117,7 +2121,7 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { List.of(), List.of(grandTotal), WindowFrame.toCurrentRow()); - relBuilder.projectPlus(relBuilder.alias(rowNum, "__row_number__")); + relBuilder.projectPlus(relBuilder.alias(rowNum, ROW_NUM_COL)); RelNode ranked = relBuilder.build(); relBuilder.push(aggregated); @@ -2131,7 +2135,7 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { RexNode lteCondition = relBuilder.call( SqlStdOperatorTable.LESS_THAN_OR_EQUAL, - relBuilder.field("__row_number__"), + relBuilder.field(ROW_NUM_COL), relBuilder.literal(limit)); RexNode nullCondition = relBuilder.isNull(colSplitPostJoin); RexNode columnSplitExpr; diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index 14e2bfdb4da..69507c71aa5 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -10,6 +10,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Suite; import org.opensearch.sql.calcite.remote.*; +import org.opensearch.sql.calcite.tpch.CalcitePPLTpchIT; import org.opensearch.sql.ppl.PPLIntegTestCase; /** @@ -20,88 +21,88 @@ @RunWith(Suite.class) @Suite.SuiteClasses({ CalciteExplainIT.class, - // CalciteArrayFunctionIT.class, - // CalciteBinCommandIT.class, - // CalciteConvertTZFunctionIT.class, - // CalciteCsvFormatIT.class, - // CalciteDataTypeIT.class, - // CalciteDateTimeComparisonIT.class, - // CalciteDateTimeFunctionIT.class, - // CalciteDateTimeImplementationIT.class, - // CalciteDedupCommandIT.class, - // CalciteDescribeCommandIT.class, - // CalciteExpandCommandIT.class, - // CalciteFieldsCommandIT.class, - // CalciteFillNullCommandIT.class, - // CalciteFlattenCommandIT.class, - // CalciteFlattenDocValueIT.class, - // CalciteGeoIpFunctionsIT.class, - // CalciteGeoPointFormatsIT.class, - // CalciteHeadCommandIT.class, - // CalciteInformationSchemaCommandIT.class, - // CalciteIPComparisonIT.class, - // CalciteIPFunctionsIT.class, - // CalciteJsonFunctionsIT.class, - // CalciteLegacyAPICompatibilityIT.class, - // CalciteLikeQueryIT.class, - // CalciteMathematicalFunctionIT.class, - // CalciteMultisearchCommandIT.class, - // CalciteMultiValueStatsIT.class, - // CalciteNewAddedCommandsIT.class, - // CalciteNowLikeFunctionIT.class, - // CalciteObjectFieldOperateIT.class, - // CalciteOperatorIT.class, - // CalciteParseCommandIT.class, - // CalcitePPLAggregationIT.class, - // CalcitePPLAppendcolIT.class, - // CalcitePPLAppendCommandIT.class, - // CalcitePPLBasicIT.class, - // CalcitePPLBuiltinDatetimeFunctionInvalidIT.class, - // CalcitePPLBuiltinFunctionIT.class, - // CalcitePPLBuiltinFunctionsNullIT.class, - // CalcitePPLCaseFunctionIT.class, - // CalcitePPLCastFunctionIT.class, - // CalcitePPLConditionBuiltinFunctionIT.class, - // CalcitePPLCryptographicFunctionIT.class, - // CalcitePPLDedupIT.class, - // CalcitePPLEventstatsIT.class, - // CalcitePPLExistsSubqueryIT.class, - // CalcitePPLExplainIT.class, - // CalcitePPLFillnullIT.class, - // CalcitePPLGrokIT.class, - // CalcitePPLInSubqueryIT.class, - // CalcitePPLIPFunctionIT.class, - // CalcitePPLJoinIT.class, - // CalcitePPLJsonBuiltinFunctionIT.class, - // CalcitePPLLookupIT.class, - // CalcitePPLParseIT.class, - // CalcitePPLPatternsIT.class, - // CalcitePPLPluginIT.class, - // CalcitePPLRenameIT.class, - // CalcitePPLScalarSubqueryIT.class, - // CalcitePPLSortIT.class, - // CalcitePPLStringBuiltinFunctionIT.class, - // CalcitePPLTrendlineIT.class, - // CalcitePrometheusDataSourceCommandsIT.class, - // CalciteQueryAnalysisIT.class, - // CalciteRareCommandIT.class, - // CalciteRegexCommandIT.class, - // CalciteRexCommandIT.class, - // CalciteRenameCommandIT.class, - // CalciteReplaceCommandIT.class, - // CalciteResourceMonitorIT.class, - // CalciteSearchCommandIT.class, - // CalciteSettingsIT.class, - // CalciteShowDataSourcesCommandIT.class, - // CalciteSortCommandIT.class, - // CalciteStatsCommandIT.class, - // CalciteSystemFunctionIT.class, - // CalciteTextFunctionIT.class, - // CalciteTopCommandIT.class, - // CalciteTrendlineCommandIT.class, - // CalciteVisualizationFormatIT.class, - // CalciteWhereCommandIT.class, - // CalcitePPLTpchIT.class + CalciteArrayFunctionIT.class, + CalciteBinCommandIT.class, + CalciteConvertTZFunctionIT.class, + CalciteCsvFormatIT.class, + CalciteDataTypeIT.class, + CalciteDateTimeComparisonIT.class, + CalciteDateTimeFunctionIT.class, + CalciteDateTimeImplementationIT.class, + CalciteDedupCommandIT.class, + CalciteDescribeCommandIT.class, + CalciteExpandCommandIT.class, + CalciteFieldsCommandIT.class, + CalciteFillNullCommandIT.class, + CalciteFlattenCommandIT.class, + CalciteFlattenDocValueIT.class, + CalciteGeoIpFunctionsIT.class, + CalciteGeoPointFormatsIT.class, + CalciteHeadCommandIT.class, + CalciteInformationSchemaCommandIT.class, + CalciteIPComparisonIT.class, + CalciteIPFunctionsIT.class, + CalciteJsonFunctionsIT.class, + CalciteLegacyAPICompatibilityIT.class, + CalciteLikeQueryIT.class, + CalciteMathematicalFunctionIT.class, + CalciteMultisearchCommandIT.class, + CalciteMultiValueStatsIT.class, + CalciteNewAddedCommandsIT.class, + CalciteNowLikeFunctionIT.class, + CalciteObjectFieldOperateIT.class, + CalciteOperatorIT.class, + CalciteParseCommandIT.class, + CalcitePPLAggregationIT.class, + CalcitePPLAppendcolIT.class, + CalcitePPLAppendCommandIT.class, + CalcitePPLBasicIT.class, + CalcitePPLBuiltinDatetimeFunctionInvalidIT.class, + CalcitePPLBuiltinFunctionIT.class, + CalcitePPLBuiltinFunctionsNullIT.class, + CalcitePPLCaseFunctionIT.class, + CalcitePPLCastFunctionIT.class, + CalcitePPLConditionBuiltinFunctionIT.class, + CalcitePPLCryptographicFunctionIT.class, + CalcitePPLDedupIT.class, + CalcitePPLEventstatsIT.class, + CalcitePPLExistsSubqueryIT.class, + CalcitePPLExplainIT.class, + CalcitePPLFillnullIT.class, + CalcitePPLGrokIT.class, + CalcitePPLInSubqueryIT.class, + CalcitePPLIPFunctionIT.class, + CalcitePPLJoinIT.class, + CalcitePPLJsonBuiltinFunctionIT.class, + CalcitePPLLookupIT.class, + CalcitePPLParseIT.class, + CalcitePPLPatternsIT.class, + CalcitePPLPluginIT.class, + CalcitePPLRenameIT.class, + CalcitePPLScalarSubqueryIT.class, + CalcitePPLSortIT.class, + CalcitePPLStringBuiltinFunctionIT.class, + CalcitePPLTrendlineIT.class, + CalcitePrometheusDataSourceCommandsIT.class, + CalciteQueryAnalysisIT.class, + CalciteRareCommandIT.class, + CalciteRegexCommandIT.class, + CalciteRexCommandIT.class, + CalciteRenameCommandIT.class, + CalciteReplaceCommandIT.class, + CalciteResourceMonitorIT.class, + CalciteSearchCommandIT.class, + CalciteSettingsIT.class, + CalciteShowDataSourcesCommandIT.class, + CalciteSortCommandIT.class, + CalciteStatsCommandIT.class, + CalciteSystemFunctionIT.class, + CalciteTextFunctionIT.class, + CalciteTopCommandIT.class, + CalciteTrendlineCommandIT.class, + CalciteVisualizationFormatIT.class, + CalciteWhereCommandIT.class, + CalcitePPLTpchIT.class }) public class CalciteNoPushdownIT { private static boolean wasPushdownEnabled; diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java index 310e8451b22..c25296d9cd8 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java @@ -29,6 +29,7 @@ public void init() throws Exception { loadIndex(Index.BANK_WITH_NULL_VALUES); loadIndex(Index.OTELLOGS); loadIndex(Index.TIME_TEST_DATA); + loadIndex(Index.EVENTS_NULL); } @Test @@ -282,6 +283,25 @@ public void testChartUseNullTrueWithNullStr() throws IOException { rows("F", "nil", null)); } + @Test + public void testChartWithNullAndLimit() throws IOException { + JSONObject result = + executeQuery("source=events_null | chart limit=3 count() over @timestamp span=1d by host"); + + verifySchema( + result, + schema("@timestamp", "timestamp"), + schema("host", "string"), + schema("count()", "bigint")); + + verifyDataRows( + result, + rows("2024-07-01 00:00:00", "db-01", 1), + rows("2024-07-01 00:00:00", "web-01", 2), + rows("2024-07-01 00:00:00", "web-02", 2), + rows("2024-07-01 00:00:00", "NULL", 1)); + } + @Test public void testChartUseNullFalseWithNullStr() throws IOException { JSONObject result = diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_multiple_group_keys.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_multiple_group_keys.yaml index b4419f38e11..b41cebbda4e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_multiple_group_keys.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_multiple_group_keys.yaml @@ -1,32 +1,33 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalAggregate(group=[{1, 2}], avg(balance)=[AVG($0)]) - LogicalProject(avg(balance)=[$0], gender=[$1], age=[CASE(IS NULL($2), 'NULL', <=($5, 10), $2, 'OTHER')]) - LogicalJoin(condition=[=($2, $3)], joinType=[left]) - LogicalProject(avg(balance)=[$2], gender=[$0], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$0], age=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], avg(balance)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(gender=[$0], age=[SAFE_CAST($1)], avg(balance)=[$2]) LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) LogicalProject(gender=[$4], age=[$10], balance=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) LogicalProject(age=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) - LogicalAggregate(group=[{1}], __grand_total__=[AVG($0)]) - LogicalProject(avg(balance)=[$2], age=[SAFE_CAST($1)]) - LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) - LogicalProject(gender=[$4], age=[$10], balance=[$7]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalAggregate(group=[{0}], __grand_total__=[AVG($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) - EnumerableAggregate(group=[{1, 2}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], age=[$t10]) - EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) - EnumerableSort(sort0=[$2], dir0=[ASC]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[SAFE_CAST($t1)], avg(balance)=[$t2], gender=[$t0], age=[$t3]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], gender=[$t0], age=[$t10], avg(balance)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[SAFE_CAST($t1)], gender=[$t0], age=[$t3], avg(balance)=[$t2]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableSort(sort0=[$0], dir0=[ASC]) EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], age=[$t0], __grand_total__=[$t7]) - EnumerableAggregate(group=[{1}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[SAFE_CAST($t1)], avg(balance)=[$t0], $f1=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), PROJECT->[avg(balance), age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[SAFE_CAST($t0)], expr#3=[IS NOT NULL($t2)], $f0=[$t2], avg(balance)=[$t1], $condition=[$t3]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), PROJECT->[age, avg(balance)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_null_str.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_null_str.yaml index 6a3a024b2b3..3f60744f069 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_null_str.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_null_str.yaml @@ -1,27 +1,28 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalAggregate(group=[{1, 2}], avg(balance)=[AVG($0)]) - LogicalProject(avg(balance)=[$0], gender=[$1], age=[CASE(IS NULL($2), 'nil', <=($5, 10), $2, 'OTHER')]) - LogicalJoin(condition=[=($2, $3)], joinType=[left]) - LogicalProject(avg(balance)=[$2], gender=[$0], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$0], age=[CASE(IS NULL($1), 'nil', <=($5, 10), $1, 'OTHER')], avg(balance)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(gender=[$0], age=[SAFE_CAST($1)], avg(balance)=[$2]) LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) LogicalProject(age=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) - LogicalAggregate(group=[{1}], __grand_total__=[AVG($0)]) - LogicalProject(avg(balance)=[$2], age=[SAFE_CAST($1)]) - LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) - LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + LogicalAggregate(group=[{0}], __grand_total__=[AVG($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) - EnumerableAggregate(group=[{1, 2}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['nil'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], age=[$t10]) - EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) - EnumerableSort(sort0=[$2], dir0=[ASC]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], gender=[$t0], age=[$t10]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['nil'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], gender=[$t0], age=[$t10], avg(balance)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], gender=[$t0], age=[$t4], avg(balance)=[$t10]) EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[10], expr#4=[null:NULL], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]], PushDownContext=[[PROJECT->[gender, balance, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","balance","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) @@ -29,8 +30,8 @@ calcite: EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], age=[$t0], __grand_total__=[$t7]) - EnumerableAggregate(group=[{1}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], age=[$t10]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], expr#11=[IS NOT NULL($t4)], age=[$t4], avg(balance)=[$t10], $condition=[$t11]) EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[10], expr#4=[null:NULL], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]], PushDownContext=[[PROJECT->[gender, balance, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","balance","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_single_group_key.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_single_group_key.yaml index 3752736138f..be22ae9c011 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_single_group_key.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_single_group_key.yaml @@ -1,9 +1,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(avg(balance)=[$1], gender=[$0]) - LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) - LogicalProject(gender=[$4], balance=[$7]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(balance)=AVG($1)), PROJECT->[avg(balance), gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(balance)=AVG($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_timestamp_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_timestamp_span.yaml index 9007f4da716..7fb6a77dce2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_timestamp_span.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_timestamp_span.yaml @@ -1,26 +1,27 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalAggregate(group=[{1, 2}], max(value)=[MAX($0)]) - LogicalProject(max(value)=[$0], timestamp=[$1], category=[CASE(IS NULL($2), 'NULL', <=($5, 10), $2, 'OTHER')]) - LogicalJoin(condition=[=($2, $3)], joinType=[left]) - LogicalProject(max(value)=[$2], timestamp=[$1], category=[$0]) + LogicalAggregate(group=[{0, 1}], max(value)=[MAX($2)]) + LogicalProject(timestamp=[$0], category=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], max(value)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(timestamp=[$1], category=[$0], max(value)=[$2]) LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) LogicalProject(category=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) - LogicalAggregate(group=[{1}], __grand_total__=[MAX($0)]) - LogicalProject(max(value)=[$2], category=[$0]) - LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) - LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + LogicalAggregate(group=[{0}], __grand_total__=[MAX($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(category=[$0], max(value)=[$2]) + LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableAggregate(group=[{1, 2}], max(value)=[MAX($0)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], category=[$t10]) - EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},max(value)=MAX($1)), PROJECT->[max(value), timestamp0, category], SORT->[2]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":true,"missing_order":"last","order":"asc"}}},{"timestamp0":{"date_histogram":{"field":"timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1w"}}}]},"aggregations":{"max(value)":{"max":{"field":"value"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableAggregate(group=[{0, 1}], max(value)=[MAX($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], timestamp=[$t0], category=[$t10], max(value)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},max(value)=MAX($1)), PROJECT->[timestamp0, category, max(value)], SORT->[1]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":true,"missing_order":"last","order":"asc"}}},{"timestamp0":{"date_histogram":{"field":"timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1w"}}}]},"aggregations":{"max(value)":{"max":{"field":"value"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableSort(sort0=[$0], dir0=[ASC]) EnumerableCalc(expr#0..2=[{inputs}], category=[$t0], $1=[$t2]) EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},__grand_total__=MAX($1))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"__grand_total__":{"max":{"field":"value"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->IS NOT NULL($1), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},__grand_total__=MAX($1))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"category","boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"__grand_total__":{"max":{"field":"value"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_use_other.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_use_other.yaml index d6e799c3c36..ffb167e148e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_use_other.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_use_other.yaml @@ -1,26 +1,26 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalAggregate(group=[{1, 2}], max(severityNumber)=[MAX($0)]) - LogicalProject(max(severityNumber)=[$0], flags=[$1], severityText=[CASE(IS NULL($2), 'NULL', <=($5, 2), $2, 'max_among_other')]) - LogicalJoin(condition=[=($2, $3)], joinType=[left]) - LogicalProject(max(severityNumber)=[$2], flags=[$0], severityText=[$1]) - LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) - LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$0], severityText=[CASE(IS NULL($1), 'NULL', <=($5, 2), $1, 'max_among_other')], max(severityNumber)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) LogicalProject(severityText=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) - LogicalAggregate(group=[{1}], __grand_total__=[MAX($0)]) - LogicalProject(max(severityNumber)=[$2], severityText=[$1]) - LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) - LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + LogicalAggregate(group=[{0}], __grand_total__=[MAX($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(severityText=[$1], max(severityNumber)=[$2]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableAggregate(group=[{1, 2}], max(severityNumber)=[MAX($0)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['NULL'], expr#7=[2], expr#8=[<=($t4, $t7)], expr#9=['max_among_other'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], severityText=[$t10]) - EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},max(severityNumber)=MAX($2)), PROJECT->[max(severityNumber), flags, severityText], SORT->[2]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"severityText":{"terms":{"field":"severityText","missing_bucket":true,"missing_order":"last","order":"asc"}}},{"flags":{"terms":{"field":"flags","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"max(severityNumber)":{"max":{"field":"severityNumber"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[2], expr#8=[<=($t4, $t7)], expr#9=['max_among_other'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], flags=[$t0], severityText=[$t10], max(severityNumber)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},max(severityNumber)=MAX($2)), SORT->[1]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"severityText":{"terms":{"field":"severityText","missing_bucket":true,"missing_order":"last","order":"asc"}}},{"flags":{"terms":{"field":"flags","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"max(severityNumber)":{"max":{"field":"severityNumber"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableSort(sort0=[$0], dir0=[ASC]) EnumerableCalc(expr#0..2=[{inputs}], severityText=[$t0], $1=[$t2]) EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},__grand_total__=MAX($2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"severityText":{"terms":{"field":"severityText","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"__grand_total__":{"max":{"field":"severityNumber"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[FILTER->IS NOT NULL($7), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},__grand_total__=MAX($2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"severityText","boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"severityText":{"terms":{"field":"severityText","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"__grand_total__":{"max":{"field":"severityNumber"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_limit.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_limit.yaml index 3077f16152c..4c97f6b8506 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_limit.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_limit.yaml @@ -1,9 +1,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(avg(balance)=[$2], state=[$0], gender=[$1]) - LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) - LogicalProject(state=[$9], gender=[$4], balance=[$7]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(state=[$9], gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), PROJECT->[avg(balance), state, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_span.yaml index b6af45e0974..5b6b68572c2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_span.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_chart_with_span.yaml @@ -1,9 +1,9 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(max(balance)=[$1], age=[$0]) + LogicalProject(age=[$0], max(balance)=[$1]) LogicalAggregate(group=[{1}], max(balance)=[MAX($0)]) LogicalProject(balance=[$7], age0=[SPAN($10, 10, null:NULL)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},max(balance)=MAX($0)), PROJECT->[max(balance), age0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age0":{"histogram":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc","interval":10.0}}}]},"aggregations":{"max(balance)":{"max":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},max(balance)=MAX($0)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age0":{"histogram":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc","interval":10.0}}}]},"aggregations":{"max(balance)":{"max":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_group_keys.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_group_keys.yaml index 11e45e502bf..5cf91c3d322 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_group_keys.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_multiple_group_keys.yaml @@ -1,35 +1,35 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalAggregate(group=[{1, 2}], avg(balance)=[AVG($0)]) - LogicalProject(avg(balance)=[$0], gender=[$1], age=[CASE(IS NULL($2), 'NULL', <=($5, 10), $2, 'OTHER')]) - LogicalJoin(condition=[=($2, $3)], joinType=[left]) - LogicalProject(avg(balance)=[$2], gender=[$0], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$0], age=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], avg(balance)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(gender=[$0], age=[SAFE_CAST($1)], avg(balance)=[$2]) LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) LogicalProject(gender=[$4], age=[$10], balance=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) LogicalProject(age=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) - LogicalAggregate(group=[{1}], __grand_total__=[AVG($0)]) - LogicalProject(avg(balance)=[$2], age=[SAFE_CAST($1)]) - LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) - LogicalProject(gender=[$4], age=[$10], balance=[$7]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalAggregate(group=[{0}], __grand_total__=[AVG($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) - EnumerableAggregate(group=[{1, 2}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], age=[$t10]) - EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) - EnumerableSort(sort0=[$2], dir0=[ASC]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], gender=[$t0], age=[$t10]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], gender=[$t0], age=[$t10], avg(balance)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], gender=[$t0], age=[$t4], avg(balance)=[$t10]) EnumerableAggregate(group=[{4, 10}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) EnumerableSort(sort0=[$0], dir0=[ASC]) EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], age=[$t0], __grand_total__=[$t7]) - EnumerableAggregate(group=[{1}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], age=[$t10]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], expr#11=[IS NOT NULL($t4)], age=[$t4], avg(balance)=[$t10], $condition=[$t11]) EnumerableAggregate(group=[{4, 10}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) - diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_null_str.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_null_str.yaml index 0c34016836b..22daa2c447c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_null_str.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_null_str.yaml @@ -1,27 +1,28 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalAggregate(group=[{1, 2}], avg(balance)=[AVG($0)]) - LogicalProject(avg(balance)=[$0], gender=[$1], age=[CASE(IS NULL($2), 'nil', <=($5, 10), $2, 'OTHER')]) - LogicalJoin(condition=[=($2, $3)], joinType=[left]) - LogicalProject(avg(balance)=[$2], gender=[$0], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$0], age=[CASE(IS NULL($1), 'nil', <=($5, 10), $1, 'OTHER')], avg(balance)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(gender=[$0], age=[SAFE_CAST($1)], avg(balance)=[$2]) LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) LogicalProject(age=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) - LogicalAggregate(group=[{1}], __grand_total__=[AVG($0)]) - LogicalProject(avg(balance)=[$2], age=[SAFE_CAST($1)]) - LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) - LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + LogicalAggregate(group=[{0}], __grand_total__=[AVG($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) - EnumerableAggregate(group=[{1, 2}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['nil'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], age=[$t10]) - EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) - EnumerableSort(sort0=[$2], dir0=[ASC]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], gender=[$t0], age=[$t10]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['nil'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], gender=[$t0], age=[$t10], avg(balance)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], gender=[$t0], age=[$t4], avg(balance)=[$t10]) EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) EnumerableCalc(expr#0..12=[{inputs}], expr#13=[10], expr#14=[null:NULL], expr#15=[SPAN($t5, $t13, $t14)], gender=[$t4], balance=[$t3], age0=[$t15]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) @@ -29,9 +30,8 @@ calcite: EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], age=[$t0], __grand_total__=[$t7]) - EnumerableAggregate(group=[{1}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], age=[$t10]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], expr#11=[IS NOT NULL($t4)], age=[$t4], avg(balance)=[$t10], $condition=[$t11]) EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) EnumerableCalc(expr#0..12=[{inputs}], expr#13=[10], expr#14=[null:NULL], expr#15=[SPAN($t5, $t13, $t14)], gender=[$t4], balance=[$t3], age0=[$t15]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) - diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group_key.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group_key.yaml index b9e6ff9f735..5af244df172 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group_key.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_single_group_key.yaml @@ -1,13 +1,11 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(avg(balance)=[$1], gender=[$0]) - LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) - LogicalProject(gender=[$4], balance=[$7]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], avg(balance)=[$t8], gender=[$t0]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], gender=[$t0], avg(balance)=[$t8]) EnumerableAggregate(group=[{4}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) - diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_timestamp_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_timestamp_span.yaml index 913fd20b8bc..bdbe39dc2ae 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_timestamp_span.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_timestamp_span.yaml @@ -1,26 +1,27 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalAggregate(group=[{1, 2}], max(value)=[MAX($0)]) - LogicalProject(max(value)=[$0], timestamp=[$1], category=[CASE(IS NULL($2), 'NULL', <=($5, 10), $2, 'OTHER')]) - LogicalJoin(condition=[=($2, $3)], joinType=[left]) - LogicalProject(max(value)=[$2], timestamp=[$1], category=[$0]) + LogicalAggregate(group=[{0, 1}], max(value)=[MAX($2)]) + LogicalProject(timestamp=[$0], category=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], max(value)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(timestamp=[$1], category=[$0], max(value)=[$2]) LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) LogicalProject(category=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) - LogicalAggregate(group=[{1}], __grand_total__=[MAX($0)]) - LogicalProject(max(value)=[$2], category=[$0]) - LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) - LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + LogicalAggregate(group=[{0}], __grand_total__=[MAX($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(category=[$0], max(value)=[$2]) + LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableAggregate(group=[{1, 2}], max(value)=[MAX($0)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], category=[$t10]) - EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) - EnumerableSort(sort0=[$2], dir0=[ASC]) - EnumerableCalc(expr#0..2=[{inputs}], max(value)=[$t2], timestamp=[$t1], category=[$t0]) + EnumerableAggregate(group=[{0, 1}], max(value)=[MAX($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], timestamp=[$t0], category=[$t10], max(value)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], timestamp=[$t1], category=[$t0], max(value)=[$t2]) EnumerableAggregate(group=[{0, 2}], max(value)=[MAX($1)]) EnumerableCalc(expr#0..9=[{inputs}], expr#10=[1], expr#11=['w'], expr#12=[SPAN($t3, $t10, $t11)], category=[$t1], value=[$t2], timestamp0=[$t12]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) @@ -28,5 +29,5 @@ calcite: EnumerableCalc(expr#0..2=[{inputs}], category=[$t0], $1=[$t2]) EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) EnumerableAggregate(group=[{1}], __grand_total__=[MAX($2)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) - + EnumerableCalc(expr#0..9=[{inputs}], expr#10=[IS NOT NULL($t1)], proj#0..9=[{exprs}], $condition=[$t10]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_use_other.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_use_other.yaml index 022072f3ae7..be986e25077 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_use_other.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_use_other.yaml @@ -1,30 +1,32 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalAggregate(group=[{1, 2}], max(severityNumber)=[MAX($0)]) - LogicalProject(max(severityNumber)=[$0], flags=[$1], severityText=[CASE(IS NULL($2), 'NULL', <=($5, 2), $2, 'max_among_other')]) - LogicalJoin(condition=[=($2, $3)], joinType=[left]) - LogicalProject(max(severityNumber)=[$2], flags=[$0], severityText=[$1]) - LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) - LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$0], severityText=[CASE(IS NULL($1), 'NULL', <=($5, 2), $1, 'max_among_other')], max(severityNumber)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) LogicalProject(severityText=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) - LogicalAggregate(group=[{1}], __grand_total__=[MAX($0)]) - LogicalProject(max(severityNumber)=[$2], severityText=[$1]) - LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) - LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + LogicalAggregate(group=[{0}], __grand_total__=[MAX($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(severityText=[$1], max(severityNumber)=[$2]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableAggregate(group=[{1, 2}], max(severityNumber)=[MAX($0)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['NULL'], expr#7=[2], expr#8=[<=($t4, $t7)], expr#9=['max_among_other'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], severityText=[$t10]) - EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) - EnumerableSort(sort0=[$2], dir0=[ASC]) - EnumerableCalc(expr#0..2=[{inputs}], max(severityNumber)=[$t2], flags=[$t1], severityText=[$t0]) + EnumerableAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[2], expr#8=[<=($t4, $t7)], expr#9=['max_among_other'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], flags=[$t0], severityText=[$t10], max(severityNumber)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], flags=[$t1], severityText=[$t0], max(severityNumber)=[$t2]) EnumerableAggregate(group=[{7, 23}], max(severityNumber)=[MAX($163)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) EnumerableSort(sort0=[$0], dir0=[ASC]) EnumerableCalc(expr#0..2=[{inputs}], severityText=[$t0], $1=[$t2]) EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - EnumerableAggregate(group=[{7}], __grand_total__=[MAX($163)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + EnumerableAggregate(group=[{0}], __grand_total__=[MAX($2)]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[IS NOT NULL($t0)], proj#0..2=[{exprs}], $condition=[$t3]) + EnumerableAggregate(group=[{7, 23}], max(severityNumber)=[MAX($163)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_limit.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_limit.yaml index e4ff0a172ce..0c23fbaf627 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_limit.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_limit.yaml @@ -1,13 +1,11 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(avg(balance)=[$2], state=[$0], gender=[$1]) - LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) - LogicalProject(state=[$9], gender=[$4], balance=[$7]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(state=[$9], gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], avg(balance)=[$t9], state=[$t1], gender=[$t0]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], state=[$t1], gender=[$t0], avg(balance)=[$t9]) EnumerableAggregate(group=[{4, 9}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) - diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_span.yaml index 6e8f8777170..8708b777a13 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_span.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_chart_with_span.yaml @@ -1,14 +1,12 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(max(balance)=[$1], age=[$0]) + LogicalProject(age=[$0], max(balance)=[$1]) LogicalAggregate(group=[{1}], max(balance)=[MAX($0)]) LogicalProject(balance=[$7], age0=[SPAN($10, 10, null:NULL)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..1=[{inputs}], max(balance)=[$t1], age=[$t0]) - EnumerableAggregate(group=[{1}], max(balance)=[MAX($0)]) - EnumerableCalc(expr#0..18=[{inputs}], expr#19=[10], expr#20=[null:NULL], expr#21=[SPAN($t10, $t19, $t20)], balance=[$t7], age0=[$t21]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) - + EnumerableAggregate(group=[{1}], max(balance)=[MAX($0)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[10], expr#20=[null:NULL], expr#21=[SPAN($t10, $t19, $t20)], balance=[$t7], age0=[$t21]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java index c49f0344900..a29dc230949 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java @@ -80,7 +80,7 @@ public void testChartWithSingleGroupKey() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT AVG(`balance`) `avg(balance)`, `gender`\n" + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "GROUP BY `gender`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -92,7 +92,7 @@ public void testChartWithOverSyntax() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT AVG(`balance`) `avg(balance)`, `gender`\n" + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "GROUP BY `gender`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -104,21 +104,22 @@ public void testChartWithMultipleGroupKeys() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT `t1`.`gender`, CASE WHEN `t1`.`age` IS NULL THEN 'NULL' WHEN `t6`.`__row_number__`" + "SELECT `t1`.`gender`, CASE WHEN `t1`.`age` IS NULL THEN 'NULL' WHEN `t7`.`__row_number__`" + " <= 10 THEN `t1`.`age` ELSE 'OTHER' END `age`, AVG(`t1`.`avg(balance)`)" + " `avg(balance)`\n" - + "FROM (SELECT AVG(`balance`) `avg(balance)`, `gender`, SAFE_CAST(`age` AS STRING)" - + " `age`\n" + + "FROM (SELECT `gender`, SAFE_CAST(`age` AS STRING) `age`, AVG(`balance`)" + + " `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "GROUP BY `gender`, `age`) `t1`\n" + "LEFT JOIN (SELECT `age`, AVG(`avg(balance)`) `__grand_total__`, ROW_NUMBER() OVER" + " (ORDER BY AVG(`avg(balance)`) DESC) `__row_number__`\n" - + "FROM (SELECT AVG(`balance`) `avg(balance)`, SAFE_CAST(`age` AS STRING) `age`\n" + + "FROM (SELECT SAFE_CAST(`age` AS STRING) `age`, AVG(`balance`) `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "GROUP BY `gender`, `age`) `t4`\n" - + "GROUP BY `age`) `t6` ON `t1`.`age` = `t6`.`age`\n" + + "WHERE `age` IS NOT NULL\n" + + "GROUP BY `age`) `t7` ON `t1`.`age` = `t7`.`age`\n" + "GROUP BY `t1`.`gender`, CASE WHEN `t1`.`age` IS NULL THEN 'NULL' WHEN" - + " `t6`.`__row_number__` <= 10 THEN `t1`.`age` ELSE 'OTHER' END"; + + " `t7`.`__row_number__` <= 10 THEN `t1`.`age` ELSE 'OTHER' END"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -128,21 +129,22 @@ public void testChartWithMultipleGroupKeysAlternativeSyntax() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT `t1`.`gender`, CASE WHEN `t1`.`age` IS NULL THEN 'NULL' WHEN `t6`.`__row_number__`" + "SELECT `t1`.`gender`, CASE WHEN `t1`.`age` IS NULL THEN 'NULL' WHEN `t7`.`__row_number__`" + " <= 10 THEN `t1`.`age` ELSE 'OTHER' END `age`, AVG(`t1`.`avg(balance)`)" + " `avg(balance)`\n" - + "FROM (SELECT AVG(`balance`) `avg(balance)`, `gender`, SAFE_CAST(`age` AS STRING)" - + " `age`\n" + + "FROM (SELECT `gender`, SAFE_CAST(`age` AS STRING) `age`, AVG(`balance`)" + + " `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "GROUP BY `gender`, `age`) `t1`\n" + "LEFT JOIN (SELECT `age`, AVG(`avg(balance)`) `__grand_total__`, ROW_NUMBER() OVER" + " (ORDER BY AVG(`avg(balance)`) DESC) `__row_number__`\n" - + "FROM (SELECT AVG(`balance`) `avg(balance)`, SAFE_CAST(`age` AS STRING) `age`\n" + + "FROM (SELECT SAFE_CAST(`age` AS STRING) `age`, AVG(`balance`) `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "GROUP BY `gender`, `age`) `t4`\n" - + "GROUP BY `age`) `t6` ON `t1`.`age` = `t6`.`age`\n" + + "WHERE `age` IS NOT NULL\n" + + "GROUP BY `age`) `t7` ON `t1`.`age` = `t7`.`age`\n" + "GROUP BY `t1`.`gender`, CASE WHEN `t1`.`age` IS NULL THEN 'NULL' WHEN" - + " `t6`.`__row_number__` <= 10 THEN `t1`.`age` ELSE 'OTHER' END"; + + " `t7`.`__row_number__` <= 10 THEN `t1`.`age` ELSE 'OTHER' END"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -152,7 +154,7 @@ public void testChartWithLimit() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT AVG(`balance`) `avg(balance)`, `gender`\n" + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "GROUP BY `gender`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -164,7 +166,7 @@ public void testChartWithLimitZero() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT AVG(`balance`) `avg(balance)`, `state`, `gender`\n" + "SELECT `state`, `gender`, AVG(`balance`) `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "GROUP BY `state`, `gender`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -176,7 +178,7 @@ public void testChartWithSpan() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT MAX(`balance`) `max(balance)`, `SPAN`(`age`, 10, NULL) `age`\n" + "SELECT `SPAN`(`age`, 10, NULL) `age`, MAX(`balance`) `max(balance)`\n" + "FROM `scott`.`bank`\n" + "GROUP BY `SPAN`(`age`, 10, NULL)"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -189,20 +191,21 @@ public void testChartWithTimeSpan() { RelNode root = getRelNode(ppl); String expectedSparkSql = "SELECT `t1`.`timestamp`, CASE WHEN `t1`.`category` IS NULL THEN 'NULL' WHEN" - + " `t6`.`__row_number__` <= 10 THEN `t1`.`category` ELSE 'OTHER' END `category`," + + " `t7`.`__row_number__` <= 10 THEN `t1`.`category` ELSE 'OTHER' END `category`," + " MAX(`t1`.`max(value)`) `max(value)`\n" - + "FROM (SELECT MAX(`value`) `max(value)`, `SPAN`(`timestamp`, 1, 'w') `timestamp`," - + " `category`\n" + + "FROM (SELECT `SPAN`(`timestamp`, 1, 'w') `timestamp`, `category`, MAX(`value`)" + + " `max(value)`\n" + "FROM `scott`.`time_data`\n" + "GROUP BY `category`, `SPAN`(`timestamp`, 1, 'w')) `t1`\n" + "LEFT JOIN (SELECT `category`, MAX(`max(value)`) `__grand_total__`, ROW_NUMBER() OVER" + " (ORDER BY MAX(`max(value)`) DESC) `__row_number__`\n" - + "FROM (SELECT MAX(`value`) `max(value)`, `category`\n" + + "FROM (SELECT `category`, MAX(`value`) `max(value)`\n" + "FROM `scott`.`time_data`\n" + "GROUP BY `category`, `SPAN`(`timestamp`, 1, 'w')) `t4`\n" - + "GROUP BY `category`) `t6` ON `t1`.`category` = `t6`.`category`\n" + + "WHERE `category` IS NOT NULL\n" + + "GROUP BY `category`) `t7` ON `t1`.`category` = `t7`.`category`\n" + "GROUP BY `t1`.`timestamp`, CASE WHEN `t1`.`category` IS NULL THEN 'NULL' WHEN" - + " `t6`.`__row_number__` <= 10 THEN `t1`.`category` ELSE 'OTHER' END"; + + " `t7`.`__row_number__` <= 10 THEN `t1`.`category` ELSE 'OTHER' END"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -212,7 +215,7 @@ public void testChartWithUseOtherTrue() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT AVG(`balance`) `avg(balance)`, `gender`\n" + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "GROUP BY `gender`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -224,7 +227,7 @@ public void testChartWithUseOtherFalse() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT AVG(`balance`) `avg(balance)`, `gender`\n" + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "GROUP BY `gender`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -236,7 +239,7 @@ public void testChartWithOtherStr() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT AVG(`balance`) `avg(balance)`, `gender`\n" + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "GROUP BY `gender`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -248,7 +251,7 @@ public void testChartWithNullStr() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT AVG(`balance`) `avg(balance)`, `gender`\n" + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "GROUP BY `gender`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -260,7 +263,7 @@ public void testChartWithUseNull() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT AVG(`balance`) `avg(balance)`, `gender`\n" + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "WHERE `gender` IS NOT NULL\n" + "GROUP BY `gender`"; From 05c339ba1c11c0b0d6c6358a08c83104b47f4232 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Wed, 29 Oct 2025 20:39:17 +0800 Subject: [PATCH 22/23] Chores: tweak code order Signed-off-by: Yuanchun Shen --- .../org/opensearch/sql/calcite/CalciteRelNodeVisitor.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index eb73fbceea2..11b652244cb 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2137,12 +2137,12 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { SqlStdOperatorTable.LESS_THAN_OR_EQUAL, relBuilder.field(ROW_NUM_COL), relBuilder.literal(limit)); - RexNode nullCondition = relBuilder.isNull(colSplitPostJoin); - RexNode columnSplitExpr; if (!config.useOther) { relBuilder.filter(lteCondition); } + RexNode nullCondition = relBuilder.isNull(colSplitPostJoin); + RexNode columnSplitExpr; if (config.useNull) { columnSplitExpr = relBuilder.call( From be9063ff7d40f5ecc5e0d06ea27cef5e26e64eda Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Wed, 29 Oct 2025 22:21:59 +0800 Subject: [PATCH 23/23] Add anonymize test to chart command Signed-off-by: Yuanchun Shen --- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 37 +++++++++++++++++++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 28 ++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index e392a682cef..ea173f894a2 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -56,6 +56,7 @@ import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; import org.opensearch.sql.ast.tree.Bin; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CountBin; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.DefaultBin; @@ -520,6 +521,42 @@ public String visitTimechart(Timechart node, String context) { return StringUtils.format("%s%s", child, timechartCommand.toString()); } + @Override + public String visitChart(Chart node, String context) { + String child = node.getChild().get(0).accept(this, context); + StringBuilder chartCommand = new StringBuilder(); + chartCommand.append(" | chart"); + + for (Argument arg : node.getArguments()) { + String argName = arg.getArgName(); + // Skip the auto-generated "top" parameter that's added when limit is specified + if ("top".equals(argName)) { + continue; + } + if ("limit".equals(argName) || "useother".equals(argName) || "usenull".equals(argName)) { + chartCommand.append(" ").append(argName).append("=").append(MASK_LITERAL); + } else if ("otherstr".equals(argName) || "nullstr".equals(argName)) { + chartCommand.append(" ").append(argName).append("=").append(MASK_LITERAL); + } + } + + chartCommand.append(" ").append(visitExpression(node.getAggregationFunction())); + + if (node.getRowSplit() != null && node.getColumnSplit() != null) { + chartCommand + .append(" by ") + .append(visitExpression(node.getRowSplit())) + .append(" ") + .append(visitExpression(node.getColumnSplit())); + } else if (node.getRowSplit() != null) { + chartCommand.append(" by ").append(visitExpression(node.getRowSplit())); + } else if (node.getColumnSplit() != null) { + chartCommand.append(" by ").append(visitExpression(node.getColumnSplit())); + } + + return StringUtils.format("%s%s", child, chartCommand.toString()); + } + public String visitRex(Rex node, String context) { String child = node.getChild().get(0).accept(this, context); String field = visitExpression(node.getField()); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 6de9acacfe1..811e3fb62de 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -232,6 +232,34 @@ public void testTimechartCommand() { anonymize("source=t | timechart count() by host")); } + @Test + public void testChartCommand() { + assertEquals( + "source=table | chart count(identifier) by identifier identifier", + anonymize("source=t | chart count(age) by gender country")); + } + + @Test + public void testChartCommandWithParameters() { + assertEquals( + "source=table | chart limit=*** useother=*** avg(identifier) by identifier", + anonymize("source=t | chart limit=5 useother=false avg(balance) by state")); + } + + @Test + public void testChartCommandOver() { + assertEquals( + "source=table | chart avg(identifier) by identifier", + anonymize("source=t | chart avg(balance) over gender")); + } + + @Test + public void testChartCommandOverBy() { + assertEquals( + "source=table | chart sum(identifier) by identifier identifier", + anonymize("source=t | chart sum(amount) over gender by age")); + } + // todo, sort order is ignored, it doesn't impact the log analysis. @Test public void testSortCommandWithOptions() {