diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingGroupByColumnSelectorStrategy.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingGroupByColumnSelectorStrategy.java index cf033eaa65d6..e40db65b33e2 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingGroupByColumnSelectorStrategy.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingGroupByColumnSelectorStrategy.java @@ -110,7 +110,7 @@ private static GroupByColumnSelectorStrategy forStringArrays() ); } - private static class UniValueDimensionIdCodec implements DimensionIdCodec + public static class UniValueDimensionIdCodec implements DimensionIdCodec { /** * Dictionary for mapping the dimension value to an index. i-th position in the dictionary holds the value represented diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/MemoryFootprint.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/MemoryFootprint.java index 64303770d623..32cd2999e5c8 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/MemoryFootprint.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/MemoryFootprint.java @@ -27,8 +27,7 @@ public class MemoryFootprint private final T value; private final int footprintIncrease; - // Reduced visibility - MemoryFootprint(T value, int footprintIncrease) + public MemoryFootprint(T value, int footprintIncrease) { this.value = value; this.footprintIncrease = footprintIncrease; diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingComplexGroupByVectorColumnSelector.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingComplexGroupByVectorColumnSelector.java new file mode 100644 index 000000000000..24d101dafb57 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingComplexGroupByVectorColumnSelector.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingGroupByColumnSelectorStrategy; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.vector.VectorObjectSelector; + +/** + * Selector that groups complex columns using a dictionary. + * + * @see DictionaryBuildingSingleValueStringGroupByVectorColumnSelector similar selector for non-dict-encoded strings + * @see DictionaryBuildingGroupByColumnSelectorStrategy#forType(ColumnType) which creates the nonvectorized version + */ +public class DictionaryBuildingComplexGroupByVectorColumnSelector + extends DictionaryBuildingGroupByVectorColumnSelector +{ + public DictionaryBuildingComplexGroupByVectorColumnSelector( + final VectorObjectSelector selector, + final ColumnType columnType + ) + { + super( + selector, + new DictionaryBuildingGroupByColumnSelectorStrategy.UniValueDimensionIdCodec(columnType.getNullableStrategy()) + ); + } + + @Override + protected Object convertValue(final Object rawValue) + { + return rawValue; + } +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingGroupByVectorColumnSelector.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingGroupByVectorColumnSelector.java new file mode 100644 index 000000000000..928e904df55a --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingGroupByVectorColumnSelector.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.vector; + +import org.apache.datasketches.memory.WritableMemory; +import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.groupby.epinephelinae.collection.MemoryPointer; +import org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingGroupByColumnSelectorStrategy; +import org.apache.druid.query.groupby.epinephelinae.column.DimensionIdCodec; +import org.apache.druid.query.groupby.epinephelinae.column.MemoryFootprint; +import org.apache.druid.segment.vector.VectorObjectSelector; + +/** + * Base class for {@link GroupByVectorColumnSelector} that build dictionaries for values that are not + * natively dictionary-encoded. + * + * @see DictionaryBuildingGroupByColumnSelectorStrategy the nonvectorized version + */ +public abstract class DictionaryBuildingGroupByVectorColumnSelector implements GroupByVectorColumnSelector +{ + protected final VectorObjectSelector selector; + protected final DimensionIdCodec dimensionIdCodec; + + protected DictionaryBuildingGroupByVectorColumnSelector( + final VectorObjectSelector selector, + final DimensionIdCodec dimensionIdCodec + ) + { + this.selector = selector; + this.dimensionIdCodec = dimensionIdCodec; + } + + @Override + public final int getGroupingKeySize() + { + return Integer.BYTES; + } + + @Override + public final int writeKeys( + final WritableMemory keySpace, + final int keySize, + final int keyOffset, + final int startRow, + final int endRow + ) + { + final Object[] vector = selector.getObjectVector(); + int stateFootprintIncrease = 0; + + for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) { + final T value = convertValue(vector[i]); + final MemoryFootprint idAndMemoryIncrease = dimensionIdCodec.lookupId(value); + keySpace.putInt(j, idAndMemoryIncrease.value()); + stateFootprintIncrease += idAndMemoryIncrease.memoryIncrease(); + } + + return stateFootprintIncrease; + } + + @Override + public final void writeKeyToResultRow( + final MemoryPointer keyMemory, + final int keyOffset, + final ResultRow resultRow, + final int resultRowPosition + ) + { + final int id = keyMemory.memory().getInt(keyMemory.position() + keyOffset); + final T value = dimensionIdCodec.idToKey(id); + resultRow.set(resultRowPosition, value); + } + + @Override + public final void reset() + { + dimensionIdCodec.reset(); + } + + /** + * Convert raw value from the vector to the appropriate type for this selector. + */ + protected abstract T convertValue(Object rawValue); +} diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java index 0c4640c85f92..301821d6a6cf 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java @@ -19,12 +19,14 @@ package org.apache.druid.query.groupby.epinephelinae.vector; +import it.unimi.dsi.fastutil.objects.Object2IntMap; import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; -import org.apache.datasketches.memory.WritableMemory; -import org.apache.druid.query.groupby.ResultRow; import org.apache.druid.query.groupby.epinephelinae.DictionaryBuildingUtils; -import org.apache.druid.query.groupby.epinephelinae.collection.MemoryPointer; +import org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingGroupByColumnSelectorStrategy; +import org.apache.druid.query.groupby.epinephelinae.column.DimensionIdCodec; +import org.apache.druid.query.groupby.epinephelinae.column.MemoryFootprint; import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.vector.VectorObjectSelector; import java.util.ArrayList; @@ -34,85 +36,65 @@ * A {@link GroupByVectorColumnSelector} that builds an internal String<->Integer dictionary, used for grouping * single-valued STRING columns which are not natively dictionary encoded, e.g. expression virtual columns. * - * This is effectively the {@link VectorGroupByEngine} analog of - * {@link org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingGroupByColumnSelectorStrategy} for - * String columns + * @see DictionaryBuildingComplexGroupByVectorColumnSelector similar selector for complex columns + * @see DictionaryBuildingGroupByColumnSelectorStrategy#forType(ColumnType) which creates the nonvectorized version */ -public class DictionaryBuildingSingleValueStringGroupByVectorColumnSelector implements GroupByVectorColumnSelector +public class DictionaryBuildingSingleValueStringGroupByVectorColumnSelector + extends DictionaryBuildingGroupByVectorColumnSelector { - private static final int GROUP_BY_MISSING_VALUE = -1; - - private final VectorObjectSelector selector; - - private final List dictionary = new ArrayList<>(); - private final Object2IntOpenHashMap reverseDictionary = new Object2IntOpenHashMap<>(); - - public DictionaryBuildingSingleValueStringGroupByVectorColumnSelector(VectorObjectSelector selector) + public DictionaryBuildingSingleValueStringGroupByVectorColumnSelector(final VectorObjectSelector selector) { - this.selector = selector; - this.reverseDictionary.defaultReturnValue(-1); + super(selector, new StringDimensionIdCodec()); } @Override - public int getGroupingKeySize() + protected String convertValue(final Object rawValue) { - return Integer.BYTES; + return DimensionHandlerUtils.convertObjectToString(rawValue); } - @Override - public int writeKeys( - final WritableMemory keySpace, - final int keySize, - final int keyOffset, - final int startRow, - final int endRow - ) + private static class StringDimensionIdCodec implements DimensionIdCodec { - final Object[] vector = selector.getObjectVector(); - int stateFootprintIncrease = 0; + private final List dictionary = new ArrayList<>(); + private final Object2IntMap reverseDictionary = new Object2IntOpenHashMap<>(); - for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) { - final String value = DimensionHandlerUtils.convertObjectToString(vector[i]); - final int dictId = reverseDictionary.getInt(value); + StringDimensionIdCodec() + { + reverseDictionary.defaultReturnValue(-1); + } + + @Override + public MemoryFootprint lookupId(final String value) + { + int dictId = reverseDictionary.getInt(value); + int footprintIncrease = 0; if (dictId < 0) { - final int nextId = dictionary.size(); + dictId = dictionary.size(); dictionary.add(value); - reverseDictionary.put(value, nextId); - keySpace.putInt(j, nextId); - - // Use same ROUGH_OVERHEAD_PER_DICTIONARY_ENTRY as the nonvectorized version; dictionary structure is the same. - stateFootprintIncrease += - DictionaryBuildingUtils.estimateEntryFootprint((value == null ? 0 : value.length()) * Character.BYTES); - } else { - keySpace.putInt(j, dictId); + reverseDictionary.put(value, dictId); + footprintIncrease = + DictionaryBuildingUtils.estimateEntryFootprint(value == null ? 0 : value.length() * Character.BYTES); } + return new MemoryFootprint<>(dictId, footprintIncrease); } - return stateFootprintIncrease; - } + @Override + public String idToKey(final int id) + { + return dictionary.get(id); + } - @Override - public void writeKeyToResultRow( - final MemoryPointer keyMemory, - final int keyOffset, - final ResultRow resultRow, - final int resultRowPosition - ) - { - final int id = keyMemory.memory().getInt(keyMemory.position() + keyOffset); - // GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are omitted from the result map. - if (id != GROUP_BY_MISSING_VALUE) { - final String value = dictionary.get(id); - resultRow.set(resultRowPosition, value); - } else { - resultRow.set(resultRowPosition, null); + @Override + public boolean canCompareIds() + { + return false; } - } - @Override - public void reset() - { - dictionary.clear(); - reverseDictionary.clear(); + @Override + public void reset() + { + dictionary.clear(); + reverseDictionary.clear(); + } } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java index 9c73d0714087..2721ce75277f 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/GroupByVectorColumnProcessorFactory.java @@ -127,8 +127,11 @@ public GroupByVectorColumnSelector makeObjectProcessor( ); } return new DictionaryBuildingSingleValueStringGroupByVectorColumnSelector(selector); + } else if (capabilities.is(ValueType.COMPLEX)) { + return new DictionaryBuildingComplexGroupByVectorColumnSelector(selector, capabilities.toColumnType()); + } else { + return NilGroupByVectorColumnSelector.INSTANCE; } - return NilGroupByVectorColumnSelector.INSTANCE; } /** diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java index 957a5f4189e8..f63dd862c4fa 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java @@ -185,8 +185,8 @@ public static boolean canVectorizeDimensions( return false; } - if (!dimension.getOutputType().isPrimitive()) { - // group by on arrays and complex types is not currently supported in the vector processing engine + if (dimension.getOutputType().isArray()) { + // group by on arrays is not currently supported in the vector processing engine return false; } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 8d04c66ec11b..678f49287d25 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -52,6 +52,7 @@ import org.apache.druid.query.filter.LikeDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; +import org.apache.druid.query.groupby.orderby.NoopLimitSpec; import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.scan.ScanQuery; @@ -75,6 +76,7 @@ import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker; import org.apache.druid.sql.calcite.CalciteNestedDataQueryTest.NestedComponentSupplier; import org.apache.druid.sql.calcite.filtration.Filtration; +import org.apache.druid.sql.calcite.run.EngineFeature; import org.apache.druid.sql.calcite.util.SqlTestFramework.StandardComponentSupplier; import org.apache.druid.sql.calcite.util.TestDataBuilder; import org.apache.druid.timeline.DataSegment; @@ -619,33 +621,24 @@ public void testTopNPath() @Test public void testGroupByOnNestedColumn() { - cannotVectorize(); testQuery( - "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1", + "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1", ImmutableList.of( GroupByQuery.builder() .setDataSource(DATA_SOURCE) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setVirtualColumns( - new ExpressionVirtualColumn( - "v0", - "strlen(\"string\")", - ColumnType.LONG, - queryFramework().macroTable() - ) - ) .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) - .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "long"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() ), ImmutableList.of( - new Object[]{null, 9L}, - new Object[]{"\"hello\"", 3L}, - new Object[]{"2", 3L}, - new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 3L}, - new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 3L} + new Object[]{null, 6L}, + new Object[]{"\"hello\"", 4L}, + new Object[]{"2", 2L}, + new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 5L}, + new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 5L} ) ); } @@ -653,33 +646,38 @@ public void testGroupByOnNestedColumn() @Test public void testGroupByOnNestedColumnWithOrderBy() { - cannotVectorize(); testQuery( - "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1", + "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1 ORDER BY 1", ImmutableList.of( GroupByQuery.builder() .setDataSource(DATA_SOURCE) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setVirtualColumns( - new ExpressionVirtualColumn( - "v0", - "strlen(\"string\")", - ColumnType.LONG, - queryFramework().macroTable() + .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "long"))) + .setLimitSpec( + queryFramework().engine().featureAvailable(EngineFeature.GROUPBY_IMPLICITLY_SORTS) + ? NoopLimitSpec.instance() + : new DefaultLimitSpec( + ImmutableList.of( + new OrderByColumnSpec( + "d0", + OrderByColumnSpec.Direction.ASCENDING, + StringComparators.NATURAL + ) + ), + Integer.MAX_VALUE ) ) - .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) - .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) .setContext(QUERY_CONTEXT_DEFAULT) .build() ), ImmutableList.of( - new Object[]{null, 9L}, - new Object[]{"\"hello\"", 3L}, - new Object[]{"2", 3L}, - new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 3L}, - new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 3L} + new Object[]{null, 6L}, + new Object[]{"\"hello\"", 4L}, + new Object[]{"2", 2L}, + new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 5L}, + new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 5L} ) ); } @@ -687,24 +685,15 @@ public void testGroupByOnNestedColumnWithOrderBy() @Test public void testGroupByOnNestedColumnWithOrderByAndLimit() { - cannotVectorize(); testQuery( - "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1 ORDER BY 1 LIMIT 100", + "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1 ORDER BY 1 LIMIT 100", ImmutableList.of( GroupByQuery.builder() .setDataSource(DATA_SOURCE) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setVirtualColumns( - new ExpressionVirtualColumn( - "v0", - "strlen(\"string\")", - ColumnType.LONG, - queryFramework().macroTable() - ) - ) .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) - .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "long"))) .setLimitSpec(new DefaultLimitSpec( ImmutableList.of(new OrderByColumnSpec( "d0", @@ -717,11 +706,11 @@ public void testGroupByOnNestedColumnWithOrderByAndLimit() .build() ), ImmutableList.of( - new Object[]{null, 9L}, - new Object[]{"\"hello\"", 3L}, - new Object[]{"2", 3L}, - new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 3L}, - new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 3L} + new Object[]{null, 6L}, + new Object[]{"\"hello\"", 4L}, + new Object[]{"2", 2L}, + new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 5L}, + new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 5L} ) ); } @@ -729,24 +718,15 @@ public void testGroupByOnNestedColumnWithOrderByAndLimit() @Test public void testGroupByOnNestedColumnWithOrderByAndLimit2() { - cannotVectorize(); testQuery( - "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1 ORDER BY 1 LIMIT 2", + "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1 ORDER BY 1 LIMIT 2", ImmutableList.of( GroupByQuery.builder() .setDataSource(DATA_SOURCE) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setVirtualColumns( - new ExpressionVirtualColumn( - "v0", - "strlen(\"string\")", - ColumnType.LONG, - queryFramework().macroTable() - ) - ) .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) - .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "long"))) .setLimitSpec(new DefaultLimitSpec( ImmutableList.of(new OrderByColumnSpec( "d0", @@ -759,8 +739,8 @@ public void testGroupByOnNestedColumnWithOrderByAndLimit2() .build() ), ImmutableList.of( - new Object[]{null, 9L}, - new Object[]{"\"hello\"", 3L} + new Object[]{null, 6L}, + new Object[]{"\"hello\"", 4L} ) ); } @@ -768,34 +748,25 @@ public void testGroupByOnNestedColumnWithOrderByAndLimit2() @Test public void testGroupByOnNestedColumnWithLimit() { - cannotVectorize(); testQuery( - "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1 LIMIT 100", + "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1 LIMIT 100", ImmutableList.of( GroupByQuery.builder() .setDataSource(DATA_SOURCE) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setVirtualColumns( - new ExpressionVirtualColumn( - "v0", - "strlen(\"string\")", - ColumnType.LONG, - queryFramework().macroTable() - ) - ) .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) - .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "long"))) .setLimitSpec(new DefaultLimitSpec(null, 100)) .setContext(QUERY_CONTEXT_DEFAULT) .build() ), ImmutableList.of( - new Object[]{null, 9L}, - new Object[]{"\"hello\"", 3L}, - new Object[]{"2", 3L}, - new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 3L}, - new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 3L} + new Object[]{null, 6L}, + new Object[]{"\"hello\"", 4L}, + new Object[]{"2", 2L}, + new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}", 5L}, + new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}", 5L} ) ); } @@ -803,31 +774,22 @@ public void testGroupByOnNestedColumnWithLimit() @Test public void testGroupByOnNestedColumnWithLimit2() { - cannotVectorize(); testQuery( - "SELECT nester, SUM(strlen(string)) FROM druid.nested GROUP BY 1 LIMIT 2", + "SELECT nester, SUM(\"long\") FROM druid.nested GROUP BY 1 LIMIT 2", ImmutableList.of( GroupByQuery.builder() .setDataSource(DATA_SOURCE) .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) - .setVirtualColumns( - new ExpressionVirtualColumn( - "v0", - "strlen(\"string\")", - ColumnType.LONG, - queryFramework().macroTable() - ) - ) .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) - .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "long"))) .setLimitSpec(new DefaultLimitSpec(null, 2)) .setContext(QUERY_CONTEXT_DEFAULT) .build() ), ImmutableList.of( - new Object[]{null, 9L}, - new Object[]{"\"hello\"", 3L} + new Object[]{null, 6L}, + new Object[]{"\"hello\"", 4L} ) ); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index edd78ae2007e..269f58633741 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -751,7 +751,6 @@ public void testEarliestAggregators() @Test public void testGroupingOnStringSerializablePairLongString() { - cannotVectorize(); testQuery( "SELECT COUNT(*) FROM (SELECT string_first_added FROM druid.wikipedia_first_last GROUP BY 1)", ImmutableList.of( @@ -783,7 +782,6 @@ public void testGroupingOnStringSerializablePairLongString() @Test public void testGroupingOnStringSerializablePairLongLong() { - cannotVectorize(); testQuery( "SELECT COUNT(*) FROM (SELECT long_first_added FROM druid.wikipedia_first_last GROUP BY 1)", ImmutableList.of( @@ -815,7 +813,6 @@ public void testGroupingOnStringSerializablePairLongLong() @Test public void testGroupingOnStringSerializablePairLongDouble() { - cannotVectorize(); testQuery( "SELECT COUNT(*) FROM (SELECT double_first_added FROM druid.wikipedia_first_last GROUP BY 1)", ImmutableList.of( @@ -847,7 +844,6 @@ public void testGroupingOnStringSerializablePairLongDouble() @Test public void testGroupingOnStringSerializablePairLongFloat() { - cannotVectorize(); testQuery( "SELECT COUNT(*) FROM (SELECT float_first_added FROM druid.wikipedia_first_last GROUP BY 1)", ImmutableList.of(