From afbfcd61e9bd6b1521b76c54f6979eef3aaf0f5c Mon Sep 17 00:00:00 2001 From: cecemei Date: Tue, 30 Sep 2025 15:53:13 -0700 Subject: [PATCH 01/19] derive-json --- .../druid/segment/column/ColumnConfig.java | 13 + .../CompressedNestedDataComplexColumn.java | 298 ++++----- .../nested/NestedDataComplexColumn.java | 6 +- .../segment/nested/StructuredDataBuilder.java | 174 +++++ .../nested/StructuredDataBuilderTest.java | 203 ++++++ .../resources/nested-array-test-data.json | 4 +- .../calcite/CalciteNestedDataQueryTest.java | 631 +++++++++++++++--- 7 files changed, 1078 insertions(+), 251 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/segment/nested/StructuredDataBuilder.java create mode 100644 processing/src/test/java/org/apache/druid/segment/nested/StructuredDataBuilderTest.java diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java index a4bdfcfd6fa4..99712b8c9438 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java @@ -32,6 +32,14 @@ public interface ColumnConfig ColumnConfig DEFAULT = new ColumnConfig() {}; + ColumnConfig READ_RAW_JSON = new ColumnConfig() + { + @Override + public boolean deriveJsonColumnFromIndexes() + { + return false; + } + }; /** * Use range indexes if dictionary range is same size or smaller than selection size */ @@ -74,4 +82,9 @@ default double skipValueRangeIndexScale() { return DEFAULT_SKIP_VALUE_RANGE_INDEX_SCALE; } + + default boolean deriveJsonColumnFromIndexes() + { + return true; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index bb9ed6254627..9ebf475da8c6 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -19,9 +19,9 @@ package org.apache.druid.segment.nested; -import com.google.common.base.Preconditions; import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; import com.google.common.collect.Sets; import com.google.common.primitives.Doubles; import org.apache.druid.collections.bitmap.ImmutableBitmap; @@ -29,6 +29,7 @@ import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.io.Closer; @@ -84,16 +85,18 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; +import java.util.stream.Collectors; /** * Implementation of {@link NestedDataComplexColumn} which uses a {@link CompressedVariableSizedBlobColumn} for the @@ -119,7 +122,7 @@ public abstract class CompressedNestedDataComplexColumn fields; + private final LinkedHashMap, Pair> fieldPathMap; private final FieldTypeInfo fieldInfo; private final Supplier stringDictionarySupplier; private final Supplier> longDictionarySupplier; @@ -156,7 +159,11 @@ public CompressedNestedDataComplexColumn( this.columnName = columnName; this.logicalType = logicalType; this.nullValues = nullValues; - this.fields = fields; + this.fieldPathMap = new LinkedHashMap<>(fields.size()); + for (int i = 0; i < fields.size(); i++) { + String field = fields.get(i); + fieldPathMap.put(parsePath(field), Pair.of(fields.get(i), i)); + } this.fieldInfo = fieldInfo; this.stringDictionarySupplier = stringDictionary; this.longDictionarySupplier = longDictionarySupplier; @@ -181,10 +188,9 @@ public CompressedNestedDataComplexColumn( public SortedMap getFieldTypeInfo() { SortedMap fieldMap = new TreeMap<>(); - for (int i = 0; i < fields.size(); i++) { - String fieldPath = fields.get(i); - FieldTypeInfo.TypeSet types = fieldInfo.getTypes(i); - fieldMap.put(fieldPath, new FieldTypeInfo.MutableTypeSet(types.getByteValue())); + for (Pair field : fieldPathMap.values()) { + FieldTypeInfo.TypeSet types = fieldInfo.getTypes(field.rhs); + fieldMap.put(field.lhs, new FieldTypeInfo.MutableTypeSet(types.getByteValue())); } return fieldMap; } @@ -198,14 +204,9 @@ public ColumnType getLogicalType() @Override public List> getNestedFields() { - List> fieldParts = new ArrayList<>(fields.size()); - for (int i = 0; i < fields.size(); i++) { - fieldParts.add(parsePath(fields.get(i))); - } - return fieldParts; + return ImmutableList.copyOf(fieldPathMap.keySet()); } - public TStringDictionary getUtf8BytesDictionary() { return stringDictionarySupplier.get(); @@ -338,15 +339,25 @@ public Object getRowValue(int rowNum) @Override public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) { - if (!logicalType.equals(ColumnType.NESTED_DATA) && fields.size() == 1 && rootFieldPath.equals(fields.get(0))) { + if (!logicalType.equals(ColumnType.NESTED_DATA) + && fieldPathMap.size() == 1 + && rootFieldPath.equals(Iterables.getOnlyElement(fieldPathMap.values()).lhs)) { return makeColumnValueSelector( ImmutableList.of(), null /* not used */, offset ); } - if (compressedRawColumn == null) { - compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); + final List, BaseColumn>> fieldColumns; + if (columnConfig.deriveJsonColumnFromIndexes()) { + fieldColumns = fieldPathMap.keySet().stream() + .map(path -> Pair.of(path, Objects.requireNonNull(getColumnHolder(path)).getColumn())) + .collect(Collectors.toList()); + } else { + fieldColumns = null; + if (compressedRawColumn == null) { + compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); + } } return new ObjectColumnSelector() @@ -358,8 +369,16 @@ public Object getObject() if (nullValues.get(offset.getOffset())) { return null; } - final ByteBuffer valueBuffer = compressedRawColumn.get(offset.getOffset()); - return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); + if (columnConfig.deriveJsonColumnFromIndexes()) { + List elements = fieldColumns + .stream() + .map(c -> StructuredDataBuilder.Element.of(c.lhs, c.rhs.makeColumnValueSelector(offset).getObject())) + .collect(Collectors.toList()); + return new StructuredDataBuilder(elements).build(); + } else { + final ByteBuffer valueBuffer = compressedRawColumn.get(offset.getOffset()); + return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); + } } @Override @@ -379,7 +398,9 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) { - if (!logicalType.equals(ColumnType.NESTED_DATA) && fields.size() == 1 && rootFieldPath.equals(fields.get(0))) { + if (!logicalType.equals(ColumnType.NESTED_DATA) + && fieldPathMap.size() == 1 + && rootFieldPath.equals(Iterables.getOnlyElement(fieldPathMap.values()).lhs)) { return makeVectorObjectSelector( Collections.emptyList(), null /* not used */, @@ -451,7 +472,9 @@ public int getMaxVectorSize() @Override public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset offset) { - if (!logicalType.equals(ColumnType.NESTED_DATA) && fields.size() == 1 && rootFieldPath.equals(fields.get(0))) { + if (!logicalType.equals(ColumnType.NESTED_DATA) + && fieldPathMap.size() == 1 + && rootFieldPath.equals(Iterables.getOnlyElement(fieldPathMap.values()).lhs)) { return makeVectorValueSelector( Collections.emptyList(), null /* not used */, @@ -491,23 +514,25 @@ public DimensionSelector makeDimensionSelector( ReadableOffset readableOffset ) { - final String field = getField(path); - Preconditions.checkNotNull(field, "Null field"); - final int fieldIndex = fields.indexOf(field); - if (fieldIndex >= 0) { - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder(field, fieldIndex).getColumn(); + final Pair field = fieldPathMap.get(path); + if (field != null) { + DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder(field.lhs, field.rhs).getColumn(); return col.makeDimensionSelector(readableOffset, extractionFn); } if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final NestedPathPart lastPath = path.get(path.size() - 1); - final String arrayField = getField(path.subList(0, path.size() - 1)); - final int arrayFieldIndex = fields.indexOf(arrayField); - if (arrayFieldIndex >= 0) { - final int elementNumber = ((NestedPathArrayElement) lastPath).getIndex(); + final Pair arrayField = fieldPathMap.get(path.subList(0, path.size() - 1)); + if (arrayField != null) { + final int elementNumber = ((NestedPathArrayElement) path.get(path.size() - 1)).getIndex(); if (elementNumber < 0) { - throw new IAE("Cannot make array element selector for path [%s], negative array index not supported for this selector", path); + throw new IAE( + "Cannot make array element selector for path [%s], negative array index not supported for this selector", + path + ); } - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder(arrayField, arrayFieldIndex).getColumn(); + DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( + arrayField.lhs, + arrayField.rhs + ).getColumn(); ColumnValueSelector arraySelector = col.makeColumnValueSelector(readableOffset); return new BaseSingleValueDimensionSelector() { @@ -554,29 +579,23 @@ public ColumnValueSelector makeColumnValueSelector( ReadableOffset readableOffset ) { - final String field = getField(path); - Preconditions.checkNotNull(field, "Null field"); - - final int fieldIndex = fields.indexOf(field); - if (fieldIndex >= 0) { - BaseColumn col = getColumnHolder(field, fieldIndex).getColumn(); - return col.makeColumnValueSelector(readableOffset); + BaseColumnHolder columnHolder = getColumnHolder(path); + if (columnHolder != null) { + return columnHolder.getColumn().makeColumnValueSelector(readableOffset); } if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final NestedPathPart lastPath = path.get(path.size() - 1); - final String arrayField = getField(path.subList(0, path.size() - 1)); - final int arrayFieldIndex = fields.indexOf(arrayField); - if (arrayFieldIndex >= 0) { - final int elementNumber = ((NestedPathArrayElement) lastPath).getIndex(); + final BaseColumnHolder arrayColumnHolder = getColumnHolder(path.subList(0, path.size() - 1)); + if (arrayColumnHolder != null) { + final int elementNumber = ((NestedPathArrayElement) path.get(path.size() - 1)).getIndex(); if (elementNumber < 0) { throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.INVALID_INPUT) - .build("Cannot make array element selector for path [%s], negative array index not supported for this selector", path); + .build( + "Cannot make array element selector for path [%s], negative array index not supported for this selector", + path + ); } - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( - arrayField, - arrayFieldIndex - ).getColumn(); + DictionaryEncodedColumn col = (DictionaryEncodedColumn) arrayColumnHolder.getColumn(); ColumnValueSelector arraySelector = col.makeColumnValueSelector(readableOffset); return new ColumnValueSelector<>() { @@ -646,11 +665,9 @@ public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector ReadableVectorOffset readableOffset ) { - final String field = getField(path); - Preconditions.checkNotNull(field, "Null field"); - final int fieldIndex = fields.indexOf(field); - if (fieldIndex >= 0) { - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder(field, fieldIndex).getColumn(); + final ColumnHolder columnHolder = getColumnHolder(path); + if (columnHolder != null) { + DictionaryEncodedColumn col = (DictionaryEncodedColumn) columnHolder.getColumn(); return col.makeSingleValueDimensionVectorSelector(readableOffset); } else { return NilVectorSelector.create(readableOffset); @@ -671,29 +688,23 @@ public VectorObjectSelector makeVectorObjectSelector( ReadableVectorOffset readableOffset ) { - final String field = getField(path); - Preconditions.checkNotNull(field, "Null field"); - final int fieldIndex = fields.indexOf(field); - if (fieldIndex >= 0) { - BaseColumn col = getColumnHolder(field, fieldIndex).getColumn(); - return col.makeVectorObjectSelector(readableOffset); + final BaseColumnHolder columnHolder = getColumnHolder(path); + if (columnHolder != null) { + return columnHolder.getColumn().makeVectorObjectSelector(readableOffset); } if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final NestedPathPart lastPath = path.get(path.size() - 1); - final String arrayField = getField(path.subList(0, path.size() - 1)); - final int arrayFieldIndex = fields.indexOf(arrayField); - if (arrayFieldIndex >= 0) { - final int elementNumber = ((NestedPathArrayElement) lastPath).getIndex(); + final BaseColumnHolder arrayColumnHolder = getColumnHolder(path.subList(0, path.size() - 1)); + if (arrayColumnHolder != null) { + final int elementNumber = ((NestedPathArrayElement) path.get(path.size() - 1)).getIndex(); if (elementNumber < 0) { throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.INVALID_INPUT) - .build("Cannot make array element selector for path [%s], negative array index not supported for this selector", path); + .build( + "Cannot make array element selector for path [%s], negative array index not supported for this selector", + path + ); } - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( - arrayField, - arrayFieldIndex - ).getColumn(); - VectorObjectSelector arraySelector = col.makeVectorObjectSelector(readableOffset); + VectorObjectSelector arraySelector = arrayColumnHolder.getColumn().makeVectorObjectSelector(readableOffset); return new VectorObjectSelector() { @@ -755,29 +766,23 @@ public VectorValueSelector makeVectorValueSelector( ReadableVectorOffset readableOffset ) { - final String field = getField(path); - Preconditions.checkNotNull(field, "Null field"); - final int fieldIndex = fields.indexOf(field); - if (fieldIndex >= 0) { - BaseColumn col = getColumnHolder(field, fieldIndex).getColumn(); - return col.makeVectorValueSelector(readableOffset); + final BaseColumnHolder columnHolder = getColumnHolder(path); + if (columnHolder != null) { + return columnHolder.getColumn().makeVectorValueSelector(readableOffset); } if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final NestedPathPart lastPath = path.get(path.size() - 1); - final String arrayField = getField(path.subList(0, path.size() - 1)); - final int arrayFieldIndex = fields.indexOf(arrayField); - if (arrayFieldIndex >= 0) { - final int elementNumber = ((NestedPathArrayElement) lastPath).getIndex(); + final BaseColumnHolder arrayColumnHolder = getColumnHolder(path.subList(0, path.size() - 1)); + if (arrayColumnHolder != null) { + final int elementNumber = ((NestedPathArrayElement) path.get(path.size() - 1)).getIndex(); if (elementNumber < 0) { throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.INVALID_INPUT) - .build("Cannot make array element selector for path [%s], negative array index not supported for this selector", path); + .build( + "Cannot make array element selector for path [%s], negative array index not supported for this selector", + path + ); } - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( - arrayField, - arrayFieldIndex - ).getColumn(); - VectorObjectSelector arraySelector = col.makeVectorObjectSelector(readableOffset); + VectorObjectSelector arraySelector = arrayColumnHolder.getColumn().makeVectorObjectSelector(readableOffset); return new VectorValueSelector() { @@ -894,87 +899,81 @@ public int getCurrentVectorSize() @Override public Set getFieldTypes(List path) { - String field = getField(path); - int index = fields.indexOf(field); - // if index is negative, check for an array element accessor in the path - if (index < 0) { - if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final String arrayField = getField(path.subList(0, path.size() - 1)); - index = fields.indexOf(arrayField); - } - if (index < 0) { - return null; - } - final Set arrayFieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(index).getByteValue()); - final Set elementTypes = Sets.newHashSetWithExpectedSize(arrayFieldTypes.size()); - for (ColumnType type : arrayFieldTypes) { - if (type.isArray()) { - elementTypes.add((ColumnType) type.getElementType()); - } else { - elementTypes.add(type); + final Pair field = fieldPathMap.get(path); + if (field != null) { + return FieldTypeInfo.convertToSet(fieldInfo.getTypes(field.rhs).getByteValue()); + } else if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { + final Pair arrayField = fieldPathMap.get(path.subList(0, path.size() - 1)); + if (arrayField != null) { + final Set arrayFieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(arrayField.rhs) + .getByteValue()); + final Set elementTypes = Sets.newHashSetWithExpectedSize(arrayFieldTypes.size()); + for (ColumnType type : arrayFieldTypes) { + if (type.isArray()) { + elementTypes.add((ColumnType) type.getElementType()); + } else { + elementTypes.add(type); + } } + return elementTypes; } - return elementTypes; } - return FieldTypeInfo.convertToSet(fieldInfo.getTypes(index).getByteValue()); + return null; } @Nullable @Override public ColumnType getFieldLogicalType(List path) { - final String field = getField(path); - final Set fieldTypes; - int index = fields.indexOf(field); - ColumnType leastRestrictiveType = null; - if (index < 0) { - if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final String arrayField = getField(path.subList(0, path.size() - 1)); - index = fields.indexOf(arrayField); - } - if (index < 0) { - return null; - } - fieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(index).getByteValue()); - for (ColumnType type : fieldTypes) { - if (type.isArray()) { - leastRestrictiveType = ColumnType.leastRestrictiveType( - leastRestrictiveType, - (ColumnType) type.getElementType() - ); - } else { - leastRestrictiveType = ColumnType.leastRestrictiveType(leastRestrictiveType, type); + final Pair field = fieldPathMap.get(path); + if (field != null) { + final Set fieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(field.rhs).getByteValue()); + return ColumnType.leastRestrictiveType(fieldTypes); + } else if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { + final Pair arrayField = fieldPathMap.get(path.subList(0, path.size() - 1)); + if (arrayField != null) { + final Set arrayFieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(arrayField.rhs) + .getByteValue()); + ColumnType leastRestrictiveType = null; + for (ColumnType type : arrayFieldTypes) { + if (type.isArray()) { + leastRestrictiveType = ColumnType.leastRestrictiveType( + leastRestrictiveType, + (ColumnType) type.getElementType() + ); + } else { + leastRestrictiveType = ColumnType.leastRestrictiveType(leastRestrictiveType, type); + } } + return leastRestrictiveType; } - } else { - fieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(index).getByteValue()); - leastRestrictiveType = ColumnType.leastRestrictiveType(fieldTypes); } - return leastRestrictiveType; + return null; + } @Nullable @Override - public ColumnHolder getColumnHolder(List path) + public BaseColumnHolder getColumnHolder(List path) { - final String field = getField(path); - final int fieldIndex = fields.indexOf(field); - return getColumnHolder(field, fieldIndex); + final Pair field = fieldPathMap.get(path); + if (field == null) { + return null; + } + return getColumnHolder(field.lhs, field.rhs); } @Nullable @Override public ColumnIndexSupplier getColumnIndexSupplier(List path) { - final String field = getField(path); - int fieldIndex = fields.indexOf(field); - if (fieldIndex >= 0) { - return getColumnHolder(field, fieldIndex).getIndexSupplier(); + final ColumnHolder columnHolder = getColumnHolder(path); + if (columnHolder != null) { + return columnHolder.getIndexSupplier(); } if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final String arrayField = getField(path.subList(0, path.size() - 1)); - final int arrayFieldIndex = fields.indexOf(arrayField); - if (arrayFieldIndex >= 0) { + final ColumnHolder arrayColumnHolder = getColumnHolder(path.subList(0, path.size() - 1)); + if (arrayColumnHolder != null) { return NoIndexesColumnIndexSupplier.getInstance(); } } @@ -984,12 +983,11 @@ public ColumnIndexSupplier getColumnIndexSupplier(List path) @Override public boolean isNumeric(List path) { - final String field = getField(path); - final int fieldIndex = fields.indexOf(field); - if (fieldIndex < 0) { - return true; + final ColumnHolder columnHolder = getColumnHolder(path); + if (columnHolder != null) { + return columnHolder.getCapabilities().isNumeric(); } - return getColumnHolder(field, fieldIndex).getCapabilities().isNumeric(); + return true; } @SuppressWarnings("unchecked") diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java index 7f584a980698..17d782785a8e 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java @@ -20,7 +20,7 @@ package org.apache.druid.segment.nested; -import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.BaseColumnHolder; import org.apache.druid.segment.column.ComplexColumn; import javax.annotation.Nullable; @@ -40,10 +40,10 @@ public abstract class NestedDataComplexColumn implements NestedVectorColumnSelectorFactory { /** - * Get a {@link ColumnHolder} for a nested field column to retrieve metadata, the column itself, or indexes. + * Get a {@link BaseColumnHolder} for a nested field column to retrieve metadata, the column itself, or indexes. */ @Nullable - public abstract ColumnHolder getColumnHolder(List path); + public abstract BaseColumnHolder getColumnHolder(List path); @Override public Class getClazz() diff --git a/processing/src/main/java/org/apache/druid/segment/nested/StructuredDataBuilder.java b/processing/src/main/java/org/apache/druid/segment/nested/StructuredDataBuilder.java new file mode 100644 index 000000000000..584ca24d18c0 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/nested/StructuredDataBuilder.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.nested; + +import com.google.common.collect.LinkedListMultimap; +import com.google.common.collect.Maps; +import com.google.common.collect.Multimap; +import org.apache.druid.error.DruidException; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class StructuredDataBuilder +{ + + private final List elements; + + StructuredDataBuilder(StructuredDataBuilder.Element... elements) + { + this(List.of(elements)); + } + + StructuredDataBuilder(List elements) + { + this.elements = elements; + } + + /** + * Creates a StructuredDataBuilder from a list of paths and corresponding objects. + */ + StructuredDataBuilder(List> parts, List objects) + { + List elements = new ArrayList<>(); + for (int i = 0; i < parts.size(); i++) { + elements.add(Element.of(parts.get(i), objects.get(i))); + } + this.elements = elements; + } + + public StructuredData build() + { + Object subtree = buildObject(); + return StructuredData.wrap(subtree == null ? Map.of() : subtree); + } + + @Nullable + private Object buildObject() + { + Object simpleObject = null; + Multimap map = LinkedListMultimap.create(); + ArrayList> list = new ArrayList<>(); + + for (Element element : elements) { + if (element.getValue() == null) { + // we can't distinguish between null and missing values in structured data + continue; + } + + if (element.endOfPath()) { + simpleObject = element.getValue(); + continue; + } + + NestedPathPart currentPath = element.getCurrentPath(); + if (currentPath instanceof NestedPathField) { + map.put(((NestedPathField) currentPath).getField(), element.next()); + } else if (currentPath instanceof NestedPathArrayElement) { + int index = ((NestedPathArrayElement) currentPath).getIndex(); + while (list.size() <= index) { + list.add(new ArrayList<>()); + } + list.get(index).add(element.next()); + } + } + + if (simpleObject != null) { + if (!(map.isEmpty() && list.isEmpty())) { + throw DruidException.defensive( + "Error building structured data from paths[%s], cannot have map or array elements when root value is set", + elements + ); + } + return simpleObject; + } else if (!map.isEmpty()) { + if (!list.isEmpty()) { + throw DruidException.defensive( + "Error building structured data from paths[%s], cannot have both map and array elements at the same level", + elements + ); + } + return Maps.transformValues( + map.asMap(), + (mapElements) -> new StructuredDataBuilder(new ArrayList<>(mapElements)).buildObject() + ); + } else if (!list.isEmpty()) { + List resultList = new ArrayList<>(list.size()); + for (List elementList : list) { + resultList.add(new StructuredDataBuilder(elementList).buildObject()); + } + return resultList; + } + return null; + } + + public static class Element + { + final List path; + @Nullable + final Object value; + final int depth; + + Element(List path, Object value, int depth) + { + this.path = path; + this.value = value; + this.depth = depth; + } + + static Element of(List path, Object value) + { + return new Element(path, value, 0); + } + + @Nullable + Object getValue() + { + return value; + } + + NestedPathPart getCurrentPath() + { + return path.get(depth); + } + + boolean endOfPath() + { + return path.size() == depth; + } + + Element next() + { + return new Element(path, value, depth + 1); + } + + @Override + public String toString() + { + return "Element{" + + "path=" + path + + ", value=" + value + + ", depth=" + depth + + '}'; + } + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataBuilderTest.java b/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataBuilderTest.java new file mode 100644 index 000000000000..fa49e1104dc6 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataBuilderTest.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.nested; + +import org.apache.druid.error.DruidException; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +public class StructuredDataBuilderTest +{ + @Test + public void testBuildSingleDepth() + { + Object[] array = new Object[]{1, 2}; + StructuredDataBuilder.Element childArrayElement = new StructuredDataBuilder.Element( + List.of(new NestedPathArrayElement(1)), + array, + 1 + ); + Assert.assertEquals(StructuredData.wrap(array), new StructuredDataBuilder(childArrayElement).build()); + + // [null, [1, 2]] + StructuredDataBuilder.Element arrayElement = new StructuredDataBuilder.Element( + List.of(new NestedPathArrayElement(1)), + array, + 0 + ); + Assert.assertEquals( + StructuredData.wrap(Arrays.asList(null, array)), + new StructuredDataBuilder(arrayElement).build() + ); + + StructuredDataBuilder.Element nullElement = new StructuredDataBuilder.Element( + List.of(new NestedPathField("y")), + null, + 0 + ); + Assert.assertEquals(StructuredData.wrap(Map.of()), new StructuredDataBuilder(nullElement).build()); + + // {"x": "hi"} + StructuredDataBuilder.Element mapElement = new StructuredDataBuilder.Element( + List.of(new NestedPathField("x")), + "hi", + 0 + ); + Assert.assertEquals( + StructuredData.wrap(Map.of("x", "hi")), + new StructuredDataBuilder(mapElement, nullElement).build() + ); + } + + @Test + public void testBuildRootPath() + { + // "root-val" + StructuredDataBuilder.Element rootElement = new StructuredDataBuilder.Element( + List.of(), + "root-val", + 0 + ); + Assert.assertEquals(StructuredData.wrap("root-val"), new StructuredDataBuilder(rootElement).build()); + } + + @Test + public void testBuildArrayMultipleDepths() + { + // [[1], [null, [2]]] + Object[] array = new Object[]{2}; + StructuredDataBuilder.Element element1 = new StructuredDataBuilder.Element( + List.of(new NestedPathArrayElement(0), new NestedPathArrayElement(0)), + 1, + 0 + ); + StructuredDataBuilder.Element element2 = new StructuredDataBuilder.Element( + List.of(new NestedPathArrayElement(1), new NestedPathArrayElement(1)), + array, + 0 + ); + List expected = List.of(List.of(1), Arrays.asList(null, array)); + Assert.assertEquals(StructuredData.wrap(expected), new StructuredDataBuilder(element1, element2).build()); + } + + @Test + public void testBuildMapMultipleDepths() + { + // {"x": {"y": "hi-xy", "z": "hi-xz"}, "yz": {"z": "hi-yz"}} + StructuredDataBuilder.Element xyElement = new StructuredDataBuilder.Element( + List.of(new NestedPathField("x"), new NestedPathField("y")), + "hi-xy", + 0 + ); + StructuredDataBuilder.Element xzElement = new StructuredDataBuilder.Element( + List.of(new NestedPathField("x"), new NestedPathField("z")), + "hi-xz", + 0 + ); + StructuredDataBuilder.Element yzElement = new StructuredDataBuilder.Element( + List.of(new NestedPathField("yz"), new NestedPathField("z")), + "hi-yz", + 0 + ); + Map expected = Map.of("x", Map.of("y", "hi-xy", "z", "hi-xz"), "yz", Map.of("z", "hi-yz")); + Assert.assertEquals( + StructuredData.wrap(expected), + new StructuredDataBuilder(xyElement, xzElement, yzElement).build() + ); + } + + @Test + public void testBuildMixedMultipleDepths() + { + // {"x": {"y": "hi-xy", "array": ["hi-x-array-0", null, "hi-x-array-2"]}} + StructuredDataBuilder.Element xyElement = new StructuredDataBuilder.Element( + List.of(new NestedPathField("x"), new NestedPathField("y")), + "hi-xy", + 0 + ); + StructuredDataBuilder.Element xArray = new StructuredDataBuilder.Element( + List.of(new NestedPathField("x"), new NestedPathField("array"), new NestedPathArrayElement(0)), + "hi-x-array-0", + 0 + ); + StructuredDataBuilder.Element xArray2 = new StructuredDataBuilder.Element( + List.of(new NestedPathField("x"), new NestedPathField("array"), new NestedPathArrayElement(2)), + "hi-x-array-2", + 0 + ); + + Map expected = Map.of( + "x", + Map.of("y", "hi-xy", "array", Arrays.asList("hi-x-array-0", null, "hi-x-array-2")) + ); + Assert.assertEquals(StructuredData.wrap(expected), new StructuredDataBuilder(xyElement, xArray, xArray2).build()); + } + + @Test + public void testBuildExceptions() + { + StructuredDataBuilder.Element rootElement = new StructuredDataBuilder.Element( + List.of(), + "root-val", + 0 + ); + StructuredDataBuilder.Element mapElement = new StructuredDataBuilder.Element( + List.of(new NestedPathField("x")), + "hi", + 0 + ); + StructuredDataBuilder.Element arrayElement = new StructuredDataBuilder.Element( + List.of(new NestedPathArrayElement(0)), + 1, + 0 + ); + DruidException e1 = Assert.assertThrows( + DruidException.class, + () -> new StructuredDataBuilder(rootElement, mapElement).build() + ); + Assert.assertEquals( + "Error building structured data from paths[[Element{path=[], value=root-val, depth=0}, Element{path=[NestedPathField{field='x'}], value=hi, depth=0}]], " + + "cannot have map or array elements when root value is set", + e1.getMessage() + ); + DruidException e2 = Assert.assertThrows( + DruidException.class, + () -> new StructuredDataBuilder(rootElement, arrayElement).build() + ); + Assert.assertEquals( + "Error building structured data from paths[[Element{path=[], value=root-val, depth=0}, Element{path=[NestedPathArrayElement{index=0}], value=1, depth=0}]], " + + "cannot have map or array elements when root value is set", + e2.getMessage() + ); + DruidException e3 = Assert.assertThrows( + DruidException.class, + () -> new StructuredDataBuilder(mapElement, arrayElement).build() + ); + Assert.assertEquals( + "Error building structured data from paths[[Element{path=[NestedPathField{field='x'}], value=hi, depth=0}, Element{path=[NestedPathArrayElement{index=0}], value=1, depth=0}]], " + + "cannot have both map and array elements at the same level", + e3.getMessage() + ); + } +} diff --git a/processing/src/test/resources/nested-array-test-data.json b/processing/src/test/resources/nested-array-test-data.json index 430fe165eac5..a7368aacb127 100644 --- a/processing/src/test/resources/nested-array-test-data.json +++ b/processing/src/test/resources/nested-array-test-data.json @@ -1,14 +1,14 @@ {"timestamp": "2023-01-01T00:00:00", "arrayString": ["a", "b"], "arrayStringNulls": ["a", "b"], "arrayLong":[1, 2, 3], "arrayLongNulls":[1, null,3], "arrayDouble":[1.1, 2.2, 3.3], "arrayDoubleNulls":[1.1, 2.2, null], "arrayVariant":["a", 1, 2.2], "arrayNoType":[], "arrayNestedLong":[[1, 2, null], [3, 4]], "arrayObject":[{"x": 1},{"x":2}]} {"timestamp": "2023-01-01T00:00:00", "arrayString": ["a", "b", "c"], "arrayStringNulls": [null, "b"], "arrayLong":[2, 3], "arrayDouble":[3.3, 4.4, 5.5], "arrayDoubleNulls":[999, null, 5.5], "arrayVariant":[null, null, 2.2], "arrayNoType":[null], "arrayNestedLong":[null, [null], []], "arrayObject":[{"x": 3},{"x":4}]} {"timestamp": "2023-01-01T00:00:00", "arrayString": ["b", "c"], "arrayStringNulls": ["d", null, "b"], "arrayLong":[1, 2, 3, 4], "arrayLongNulls":[1, 2, 3], "arrayDouble":[1.1, 3.3], "arrayDoubleNulls":[null, 2.2, null], "arrayVariant":[1, null, 1], "arrayNestedLong":[[1], null, [1, 2, 3]], "arrayObject":[null,{"x":2}]} -{"timestamp": "2023-01-01T00:00:00", "arrayString": ["d", "e"], "arrayStringNulls": ["b", "b"], "arrayLong":[1, 4], "arrayLongNulls":[1], "arrayDouble":[2.2, 3.3, 4.0], "arrayVariant":["a", "b", "c"], "arrayNoType":[], "arrayNestedLong":[[1, 2], [3, 4], [5, 6, 7]], "arrayObject":[{"x": null},{"x":2}]} +{"timestamp": "2023-01-01T00:00:00", "arrayString": ["d", "e"], "arrayStringNulls": ["b", "b"], "arrayLong":[1, 4], "arrayLongNulls":[1], "arrayDouble":[2.2, 3.3, 4.0], "arrayVariant":["a", "b", "c"], "arrayNoType":[], "arrayNestedLong":[[1, 2], [3, 4], [5, 6, 7]], "arrayObject":[null,{"x":2}]} {"timestamp": "2023-01-01T00:00:00", "arrayString": null, "arrayLong":[1, 2, 3], "arrayLongNulls":[], "arrayDouble":[1.1, 2.2, 3.3], "arrayDoubleNulls":null, "arrayNoType":[], "arrayObject":[{"x": 1000},{"y":2000}]} {"timestamp": "2023-01-01T00:00:00", "arrayString": ["a", "b"], "arrayStringNulls": null, "arrayLongNulls":[null, 2, 9], "arrayDouble":null, "arrayDoubleNulls":[999, 5.5, null], "arrayVariant":["a", 1, 2.2], "arrayNoType":[null, null], "arrayNestedLong":[[1], [1, 2, null]], "arrayObject":[{"a": 1},{"b":2}]} {"timestamp": "2023-01-01T00:00:00", "arrayStringNulls": ["a", "b"], "arrayLong":null, "arrayLongNulls":[2, 3], "arrayDoubleNulls":[null], "arrayVariant":null, "arrayNoType":[], "arrayNestedLong":null, "arrayObject":[{"x": 1},{"x":2}]} {"timestamp": "2023-01-02T00:00:00", "arrayString": ["a", "b"], "arrayStringNulls": [], "arrayLong":[1, 2, 3], "arrayLongNulls":[1, null,3], "arrayDouble":[1.1, 2.2, 3.3], "arrayDoubleNulls":[1.1, 2.2, null], "arrayVariant":["a", 1, 2.2], "arrayNoType":[], "arrayNestedLong":[[2, 3], [1, 5]], "arrayObject":[{"x": 1},{"x":2}]} {"timestamp": "2023-01-02T00:00:00", "arrayString": ["a", "b", "c"], "arrayStringNulls": [null, "b"], "arrayLong":[2, 3], "arrayDouble":[3.3, 4.4, 5.5], "arrayDoubleNulls":[999, null, 5.5], "arrayVariant":[null, null, 2.2], "arrayNoType":[], "arrayNestedLong":[null], "arrayObject":[{"x": 3},{"x":4}]} {"timestamp": "2023-01-02T00:00:00", "arrayString": ["b", "c"], "arrayStringNulls": ["d", null, "b"], "arrayLong":[1, 2, 3, 4], "arrayLongNulls":[1, 2, 3], "arrayDouble":[1.1, 3.3], "arrayDoubleNulls":[null, 2.2, null], "arrayVariant":[1, null, 1], "arrayNoType":[null], "arrayNestedLong":[[1], null, [1]], "arrayObject":[null,{"x":2}]} -{"timestamp": "2023-01-02T00:00:00", "arrayString": ["d", "e"], "arrayStringNulls": ["b", "b"], "arrayLong":[1, 4], "arrayLongNulls":[null], "arrayDouble":[2.2, 3.3, 4.0], "arrayVariant":["a", "b", "c"], "arrayNoType":[], "arrayNestedLong":[[1, 2], [3, 4], [5, 6, 7]], "arrayObject":[{"x": null},{"x":2}]} +{"timestamp": "2023-01-02T00:00:00", "arrayString": ["d", "e"], "arrayStringNulls": ["b", "b"], "arrayLong":[1, 4], "arrayLongNulls":[null], "arrayDouble":[2.2, 3.3, 4.0], "arrayVariant":["a", "b", "c"], "arrayNoType":[], "arrayNestedLong":[[1, 2], [3, 4], [5, 6, 7]], "arrayObject":[null,{"x":2}]} {"timestamp": "2023-01-02T00:00:00", "arrayString": null, "arrayLong":[1, 2, 3], "arrayLongNulls":null, "arrayDouble":[1.1, 2.2, 3.3], "arrayDoubleNulls":[], "arrayNoType":[], "arrayObject":[{"x": 1000},{"y":2000}]} {"timestamp": "2023-01-02T00:00:00", "arrayString": ["a", "b"], "arrayStringNulls": [null], "arrayLongNulls":[null, 2, 9], "arrayDouble":null, "arrayDoubleNulls":[999, 5.5, null], "arrayVariant":["a", 1, 2.2], "arrayNoType":null, "arrayNestedLong":[], "arrayObject":[{"a": 1},{"b":2}]} {"timestamp": "2023-01-02T00:00:00", "arrayStringNulls": ["a", "b"], "arrayLong":null, "arrayLongNulls":[2, 3], "arrayDoubleNulls":[null, 1.1], "arrayVariant":null, "arrayNoType":[], "arrayNestedLong":null, "arrayObject":[{"x": 1},{"x":2}]} \ No newline at end of file diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index e43bc08c1119..bb33448f0aa7 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -60,6 +60,7 @@ import org.apache.druid.segment.AutoTypeColumnSchema; import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.incremental.IncrementalIndex; @@ -97,13 +98,14 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest public static final String DATA_SOURCE_ARRAYS = "arrays"; public static final String DATA_SOURCE_ALL = "all_auto"; public static final String DATA_SOURCE_ALL_REALTIME = "all_auto_realtime"; + public static final String DATA_SOURCE_ALL_RAW_JSON = "all_auto_raw_json"; public static final List> RAW_ROWS = ImmutableList.of( ImmutableMap.builder() .put("t", "2000-01-01") .put("string", "aaa") .put("string_sparse", "zzz") - .put("nest", ImmutableMap.of("x", 100L, "y", 2.02, "z", "300", "mixed", 1L, "mixed2", "1")) + .put("nest", ImmutableMap.of("mixed", 1L, "mixed2", "1", "x", 100L, "y", 2.02, "z", "300")) .put( "nester", ImmutableMap.of("array", ImmutableList.of("a", "b"), "n", ImmutableMap.of("x", "hello")) @@ -120,7 +122,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .put("t", "2000-01-01") .put("string", "ccc") .put("string_sparse", "10") - .put("nest", ImmutableMap.of("x", 200L, "y", 3.03, "z", "abcdef", "mixed", 1.1, "mixed2", 1L)) + .put("nest", ImmutableMap.of("mixed", 1.1, "mixed2", 1L, "x", 200L, "y", 3.03, "z", "abcdef")) .put("long", 3L) .build(), ImmutableMap.builder() @@ -138,7 +140,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest ImmutableMap.builder() .put("t", "2000-01-02") .put("string", "aaa") - .put("nest", ImmutableMap.of("x", 100L, "y", 2.02, "z", "400", "mixed2", 1.1)) + .put("nest", ImmutableMap.of("mixed2", 1.1, "x", 100L, "y", 2.02, "z", "400")) .put("nester", ImmutableMap.of("array", ImmutableList.of("a", "b"), "n", ImmutableMap.of("x", 1L))) .put("long", 5L) .build(), @@ -347,6 +349,30 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .inputTmpDir(tempDirProducer.newTempFolder()) .buildIncrementalIndex(); + final QueryableIndex indexAllTypesAutoRawJson = + IndexBuilder.create(ColumnConfig.READ_RAW_JSON) + .tmpDir(tempDirProducer.newTempFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + new IncrementalIndexSchema.Builder() + .withTimestampSpec(NestedDataTestUtils.AUTO_SCHEMA.getTimestampSpec()) + .withDimensionsSpec(NestedDataTestUtils.AUTO_SCHEMA.getDimensionsSpec()) + .withMetrics( + new CountAggregatorFactory("cnt") + ) + .withRollup(false) + .build() + ) + .inputSource( + ResourceInputSource.of( + NestedDataTestUtils.class.getClassLoader(), + NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE + ) + ) + .inputFormat(TestDataBuilder.DEFAULT_JSON_INPUT_FORMAT) + .inputTmpDir(tempDirProducer.newTempFolder()) + .buildMMappedIndex(); + walker.add( DataSegment.builder() .dataSource(DATA_SOURCE) @@ -419,6 +445,15 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .size(0) .build(), indexAllTypesAutoRealtime + ).add( + DataSegment.builder() + .dataSource(DATA_SOURCE_ALL_RAW_JSON) + .version("1") + .interval(indexAllTypesAuto.getDataInterval()) + .shardSpec(new LinearShardSpec(1)) + .size(0) + .build(), + indexAllTypesAutoRawJson ); return walker; @@ -582,7 +617,12 @@ public void testGroupByOnNestedColumn() .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setVirtualColumns( - new ExpressionVirtualColumn("v0", "strlen(\"string\")", ColumnType.LONG, queryFramework().macroTable()) + new ExpressionVirtualColumn( + "v0", + "strlen(\"string\")", + ColumnType.LONG, + queryFramework().macroTable() + ) ) .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) @@ -611,7 +651,12 @@ public void testGroupByOnNestedColumnWithOrderBy() .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setVirtualColumns( - new ExpressionVirtualColumn("v0", "strlen(\"string\")", ColumnType.LONG, queryFramework().macroTable()) + new ExpressionVirtualColumn( + "v0", + "strlen(\"string\")", + ColumnType.LONG, + queryFramework().macroTable() + ) ) .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) @@ -1163,7 +1208,12 @@ public void testJsonValueArrays() new NestedFieldVirtualColumn("arrayNestedLong", "$[0]", "v3", ColumnType.LONG_ARRAY) ) .columns("v0", "v1", "v2", "v3") - .columnTypes(ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY, ColumnType.DOUBLE_ARRAY, ColumnType.LONG_ARRAY) + .columnTypes( + ColumnType.STRING_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.DOUBLE_ARRAY, + ColumnType.LONG_ARRAY + ) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .build() ) @@ -1669,23 +1719,23 @@ public void testGroupByRootSingleTypeArrayLongNullsUnnest() .queryContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) .expectedQuery( GroupByQuery.builder() - .setDataSource( - UnnestDataSource.create( - TableDataSource.create(DATA_SOURCE_ARRAYS), - expressionVirtualColumn("j0.unnest", "\"arrayLongNulls\"", ColumnType.LONG_ARRAY), - null - ) - ) - .setInterval(querySegmentSpec(Filtration.eternity())) - .setGranularity(Granularities.ALL) - .setDimensions( - dimensions( - new DefaultDimensionSpec("j0.unnest", "d0", ColumnType.LONG) - ) - ) - .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) - .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) - .build() + .setDataSource( + UnnestDataSource.create( + TableDataSource.create(DATA_SOURCE_ARRAYS), + expressionVirtualColumn("j0.unnest", "\"arrayLongNulls\"", ColumnType.LONG_ARRAY), + null + ) + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("j0.unnest", "d0", ColumnType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .build() ) .expectedResults( ImmutableList.of( @@ -2753,7 +2803,7 @@ public void testJsonAndArrayAgg() ImmutableList.of( new Object[]{ "aaa", - "[{\"x\":100,\"y\":2.02,\"z\":\"300\",\"mixed\":1,\"mixed2\":\"1\"},{\"x\":100,\"y\":2.02,\"z\":\"400\",\"mixed2\":1.1}]", + "[{\"mixed\":1,\"mixed2\":\"1\",\"x\":100,\"y\":2.02,\"z\":\"300\"},{\"mixed2\":1.1,\"x\":100,\"y\":2.02,\"z\":\"400\"}]", 2L }, new Object[]{ @@ -2763,7 +2813,7 @@ public void testJsonAndArrayAgg() }, new Object[]{ "ccc", - "[{\"x\":200,\"y\":3.03,\"z\":\"abcdef\",\"mixed\":1.1,\"mixed2\":1}]", + "[{\"mixed\":1.1,\"mixed2\":1,\"x\":200,\"y\":3.03,\"z\":\"abcdef\"}]", 1L }, new Object[]{ @@ -4646,8 +4696,8 @@ public void testGroupByRootKeys2() ), ImmutableList.of( new Object[]{null, 4L}, - new Object[]{"[\"x\",\"y\",\"z\",\"mixed\",\"mixed2\"]", 2L}, - new Object[]{"[\"x\",\"y\",\"z\",\"mixed2\"]", 1L} + new Object[]{"[\"mixed\",\"mixed2\",\"x\",\"y\",\"z\"]", 2L}, + new Object[]{"[\"mixed2\",\"x\",\"y\",\"z\"]", 1L} ), RowSignature.builder() .add("EXPR$0", ColumnType.STRING_ARRAY) @@ -4912,9 +4962,7 @@ public void testJsonMerging() "nest", "v1", ColumnType.STRING, - ImmutableList.of( - new NestedPathField("x") - ), + ImmutableList.of(new NestedPathField("x")), false, null, false @@ -5020,7 +5068,12 @@ public void testToJsonAndParseJson() ) ) .columns("string", "v0", "v1", "v2") - .columnTypes(ColumnType.STRING, ColumnType.NESTED_DATA, ColumnType.NESTED_DATA, ColumnType.NESTED_DATA) + .columnTypes( + ColumnType.STRING, + ColumnType.NESTED_DATA, + ColumnType.NESTED_DATA, + ColumnType.NESTED_DATA + ) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .build() ), @@ -5938,56 +5991,65 @@ public void testGroupByAndFilterVariant() } @Test - public void testScanAllTypesAuto() + public void testScanAllTypesAutoRawJson() { // Variant types are not supported by MSQ. msqIncompatible(); skipVectorize(); testQuery( - "SELECT * FROM druid.all_auto", + "SELECT * FROM druid.all_auto_raw_json", ImmutableList.of( Druids.newScanQueryBuilder() - .dataSource(DATA_SOURCE_ALL) + .dataSource(DATA_SOURCE_ALL_RAW_JSON) .intervals(querySegmentSpec(Filtration.eternity())) .columns( - "__time", - "str", - "long", - "double", - "bool", - "variant", - "variantNumeric", - "variantEmptyObj", - "variantEmtpyArray", - "variantWithArrays", - "obj", - "complexObj", - "arrayString", - "arrayStringNulls", - "arrayLong", - "arrayLongNulls", - "arrayDouble", - "arrayDoubleNulls", - "arrayVariant", - "arrayBool", - "arrayNestedLong", - "arrayObject", - "null", - "cstr", - "clong", - "cdouble", - "cObj", - "cstringArray", - "cLongArray", - "cDoubleArray", - "cEmptyArray", - "cEmptyObj", - "cNullArray", - "cEmptyObjectArray", - "cObjectArray", + "__time", "str", "long", "double", "bool", + "variant", "variantNumeric", "variantEmptyObj", "variantEmtpyArray", "variantWithArrays", + "obj", "complexObj", "arrayString", "arrayStringNulls", "arrayLong", + "arrayLongNulls", "arrayDouble", "arrayDoubleNulls", "arrayVariant", "arrayBool", + "arrayNestedLong", "arrayObject", "null", "cstr", "clong", + "cdouble", "cObj", "cstringArray", "cLongArray", "cDoubleArray", + "cEmptyArray", "cEmptyObj", "cNullArray", "cEmptyObjectArray", "cObjectArray", "cnt" ) - .columnTypes(ColumnType.LONG, ColumnType.STRING, ColumnType.LONG, ColumnType.DOUBLE, ColumnType.LONG, ColumnType.STRING, ColumnType.DOUBLE, ColumnType.ofComplex("json"), ColumnType.LONG_ARRAY, ColumnType.STRING_ARRAY, ColumnType.ofComplex("json"), ColumnType.ofComplex("json"), ColumnType.STRING_ARRAY, ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY, ColumnType.LONG_ARRAY, ColumnType.DOUBLE_ARRAY, ColumnType.DOUBLE_ARRAY, ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY, ColumnType.ofComplex("json"), ColumnType.ofComplex("json"), ColumnType.STRING, ColumnType.STRING, ColumnType.LONG, ColumnType.DOUBLE, ColumnType.ofComplex("json"), ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY, ColumnType.DOUBLE_ARRAY, ColumnType.LONG_ARRAY, ColumnType.ofComplex("json"), ColumnType.LONG_ARRAY, ColumnType.ofComplex("json"), ColumnType.ofComplex("json"), ColumnType.LONG) + .columnTypes( + ColumnType.LONG, + ColumnType.STRING, + ColumnType.LONG, + ColumnType.DOUBLE, + ColumnType.LONG, + ColumnType.STRING, + ColumnType.DOUBLE, + ColumnType.ofComplex("json"), + ColumnType.LONG_ARRAY, + ColumnType.STRING_ARRAY, + ColumnType.ofComplex("json"), + ColumnType.ofComplex("json"), + ColumnType.STRING_ARRAY, + ColumnType.STRING_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.DOUBLE_ARRAY, + ColumnType.DOUBLE_ARRAY, + ColumnType.STRING_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.ofComplex("json"), + ColumnType.ofComplex("json"), + ColumnType.STRING, + ColumnType.STRING, + ColumnType.LONG, + ColumnType.DOUBLE, + ColumnType.ofComplex("json"), + ColumnType.STRING_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.DOUBLE_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.ofComplex("json"), + ColumnType.LONG_ARRAY, + ColumnType.ofComplex("json"), + ColumnType.ofComplex("json"), + ColumnType.LONG + ) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .build() ), @@ -6300,6 +6362,378 @@ public void testScanAllTypesAuto() ); } + @Test + public void testScanAllTypesAuto() + { + // Variant types are not supported by MSQ. + msqIncompatible(); + skipVectorize(); + testQuery( + "SELECT * FROM druid.all_auto", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(DATA_SOURCE_ALL) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns( + "__time", "str", "long", "double", "bool", + "variant", "variantNumeric", "variantEmptyObj", "variantEmtpyArray", "variantWithArrays", + "obj", "complexObj", "arrayString", "arrayStringNulls", "arrayLong", + "arrayLongNulls", "arrayDouble", "arrayDoubleNulls", "arrayVariant", "arrayBool", + "arrayNestedLong", "arrayObject", "null", "cstr", "clong", + "cdouble", "cObj", "cstringArray", "cLongArray", "cDoubleArray", + "cEmptyArray", "cEmptyObj", "cNullArray", "cEmptyObjectArray", "cObjectArray", + "cnt" + ) + .columnTypes( + ColumnType.LONG, + ColumnType.STRING, + ColumnType.LONG, + ColumnType.DOUBLE, + ColumnType.LONG, + ColumnType.STRING, + ColumnType.DOUBLE, + ColumnType.ofComplex("json"), + ColumnType.LONG_ARRAY, + ColumnType.STRING_ARRAY, + ColumnType.ofComplex("json"), + ColumnType.ofComplex("json"), + ColumnType.STRING_ARRAY, + ColumnType.STRING_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.DOUBLE_ARRAY, + ColumnType.DOUBLE_ARRAY, + ColumnType.STRING_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.ofComplex("json"), + ColumnType.ofComplex("json"), + ColumnType.STRING, + ColumnType.STRING, + ColumnType.LONG, + ColumnType.DOUBLE, + ColumnType.ofComplex("json"), + ColumnType.STRING_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.DOUBLE_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.ofComplex("json"), + ColumnType.LONG_ARRAY, + ColumnType.ofComplex("json"), + ColumnType.ofComplex("json"), + ColumnType.LONG + ) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .build() + ), + ImmutableList.of( + new Object[]{ + 1672531200000L, + null, + null, + null, + 1L, + "51", + -0.13D, + "1", + "[]", + "[51,-35]", + "{\"a\":700,\"b\":{\"x\":\"g\",\"y\":1.1,\"z\":[9,null,9,9]},\"v\":[]}", + "{\"x\":400,\"y\":[{\"l\":[null],\"m\":100,\"n\":5},{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1}]}", + null, + "[\"a\",\"b\"]", + null, + "[2,3]", + null, + "[null]", + null, + "[1,0,1]", + null, + "[{\"x\":1},{\"x\":2}]", + null, + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"],\"d\":[]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "{}", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "", + 2L, + null, + 0L, + "b", + 1.1D, + "\"b\"", + "2", + "b", + "{\"a\":200,\"b\":{\"x\":\"b\",\"y\":1.1,\"z\":[2,4,6]},\"c\":[\"a\",\"b\"],\"v\":[]}", + "{\"x\":10,\"y\":[{\"l\":[\"b\",\"b\",\"c\"],\"m\":\"b\",\"n\":2},[1,2,3]],\"z\":{\"a\":[5.5],\"b\":0}}", + "[\"a\",\"b\",\"c\"]", + "[null,\"b\"]", + "[2,3]", + null, + "[3.3,4.4,5.5]", + "[999.0,null,5.5]", + "[null,null,2.2]", + "[1,1]", + "[null,[null],[]]", + "[{\"x\":3},{\"x\":4}]", + null, + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"],\"d\":[]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "{}", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "a", + 1L, + 1.0D, + 1L, + "1", + 1.0D, + "1", + "1", + "1", + "{\"a\":100,\"b\":{\"x\":\"a\",\"y\":1.1,\"z\":[1,2,3,4]},\"c\":[100],\"v\":[]}", + "{\"x\":1234,\"y\":[{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1},{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1}],\"z\":{\"a\":[1.1,2.2,3.3],\"b\":1}}", + "[\"a\",\"b\"]", + "[\"a\",\"b\"]", + "[1,2,3]", + "[1,null,3]", + "[1.1,2.2,3.3]", + "[1.1,2.2,null]", + "[\"a\",\"1\",\"2.2\"]", + "[1,0,1]", + "[[1,2,null],[3,4]]", + "[{\"x\":1},{\"x\":2}]", + null, + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"],\"d\":[]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "{}", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "b", + 4L, + 3.3D, + 1L, + "1", + null, + "{}", + "4", + "1", + "{\"a\":400,\"b\":{\"x\":\"d\",\"y\":1.1,\"z\":[3,4]},\"c\":{\"a\":1},\"v\":[]}", + "{\"x\":1234,\"z\":{\"a\":[1.1,2.2,3.3],\"b\":1}}", + "[\"d\",\"e\"]", + "[\"b\",\"b\"]", + "[1,4]", + "[1]", + "[2.2,3.3,4.0]", + null, + "[\"a\",\"b\",\"c\"]", + "[null,0,1]", + "[[1,2],[3,4],[5,6,7]]", + "[null,{\"x\":2}]", + null, + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"],\"d\":[]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "{}", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "c", + null, + 4.4D, + 1L, + "hello", + -1000.0D, + "{}", + "[]", + "hello", + "{\"a\":500,\"b\":{\"x\":\"e\",\"z\":[1,2,3,4]},\"c\":\"hello\",\"v\":\"a\"}", + "{\"x\":11,\"y\":[],\"z\":{\"a\":[null],\"b\":0}}", + null, + null, + "[1,2,3]", + "[]", + "[1.1,2.2,3.3]", + null, + null, + "[0]", + null, + "[{\"x\":1000},{\"y\":2000}]", + null, + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"],\"d\":[]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "{}", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "d", + 5L, + 5.9D, + 0L, + null, + 3.33D, + "\"a\"", + "6", + null, + "{\"a\":600,\"b\":{\"x\":\"f\",\"y\":1.1,\"z\":[6,7,8,9]},\"c\":12.3,\"v\":\"b\"}", + null, + "[\"a\",\"b\"]", + null, + null, + "[null,2,9]", + null, + "[999.0,5.5,null]", + "[\"a\",\"1\",\"2.2\"]", + "[]", + "[[1],[1,2,null]]", + "[{\"a\":1},{\"b\":2}]", + null, + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"],\"d\":[]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "{}", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + }, + new Object[]{ + 1672531200000L, + "null", + 3L, + 2.0D, + null, + "3.0", + 1.0D, + "3.3", + "3", + "3.0", + "{\"a\":300}", + "{\"x\":4.4,\"y\":[{\"l\":[],\"m\":100,\"n\":3},{\"l\":[\"a\"]},{\"l\":[\"b\"],\"n\":[]}],\"z\":{\"a\":[],\"b\":1}}", + "[\"b\",\"c\"]", + "[\"d\",null,\"b\"]", + "[1,2,3,4]", + "[1,2,3]", + "[1.1,3.3]", + "[null,2.2,null]", + "[1,null,1]", + "[1,null,1]", + "[[1],null,[1,2,3]]", + "[null,{\"x\":2}]", + null, + "hello", + 1234L, + 1.234D, + "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"],\"d\":[]}}", + "[\"a\",\"b\",\"c\"]", + "[1,2,3]", + "[1.1,2.2,3.3]", + "[]", + "{}", + "[null,null]", + "{}", + "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", + 1L + } + ), + RowSignature.builder() + .add("__time", ColumnType.LONG) + .add("str", ColumnType.STRING) + .add("long", ColumnType.LONG) + .add("double", ColumnType.DOUBLE) + .add("bool", ColumnType.LONG) + .add("variant", ColumnType.STRING) + .add("variantNumeric", ColumnType.DOUBLE) + .add("variantEmptyObj", ColumnType.NESTED_DATA) + .add("variantEmtpyArray", ColumnType.LONG_ARRAY) + .add("variantWithArrays", ColumnType.STRING_ARRAY) + .add("obj", ColumnType.NESTED_DATA) + .add("complexObj", ColumnType.NESTED_DATA) + .add("arrayString", ColumnType.STRING_ARRAY) + .add("arrayStringNulls", ColumnType.STRING_ARRAY) + .add("arrayLong", ColumnType.LONG_ARRAY) + .add("arrayLongNulls", ColumnType.LONG_ARRAY) + .add("arrayDouble", ColumnType.DOUBLE_ARRAY) + .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) + .add("arrayVariant", ColumnType.STRING_ARRAY) + .add("arrayBool", ColumnType.LONG_ARRAY) + .add("arrayNestedLong", ColumnType.NESTED_DATA) + .add("arrayObject", ColumnType.NESTED_DATA) + .add("null", ColumnType.STRING) + .add("cstr", ColumnType.STRING) + .add("clong", ColumnType.LONG) + .add("cdouble", ColumnType.DOUBLE) + .add("cObj", ColumnType.NESTED_DATA) + .add("cstringArray", ColumnType.STRING_ARRAY) + .add("cLongArray", ColumnType.LONG_ARRAY) + .add("cDoubleArray", ColumnType.DOUBLE_ARRAY) + .add("cEmptyArray", ColumnType.LONG_ARRAY) + .add("cEmptyObj", ColumnType.NESTED_DATA) + .add("cNullArray", ColumnType.LONG_ARRAY) + .add("cEmptyObjectArray", ColumnType.NESTED_DATA) + .add("cObjectArray", ColumnType.NESTED_DATA) + .add("cnt", ColumnType.LONG) + .build() + ); + } + @Test public void testFilterJsonIsNotNull() { @@ -6317,9 +6751,9 @@ public void testFilterJsonIsNotNull() .build() ), ImmutableList.of( - new Object[]{"{\"x\":100,\"y\":2.02,\"z\":\"300\",\"mixed\":1,\"mixed2\":\"1\"}"}, - new Object[]{"{\"x\":200,\"y\":3.03,\"z\":\"abcdef\",\"mixed\":1.1,\"mixed2\":1}"}, - new Object[]{"{\"x\":100,\"y\":2.02,\"z\":\"400\",\"mixed2\":1.1}"} + new Object[]{"{\"mixed\":1,\"mixed2\":\"1\",\"x\":100,\"y\":2.02,\"z\":\"300\"}"}, + new Object[]{"{\"mixed\":1.1,\"mixed2\":1,\"x\":200,\"y\":3.03,\"z\":\"abcdef\"}"}, + new Object[]{"{\"mixed2\":1.1,\"x\":100,\"y\":2.02,\"z\":\"400\"}"} ), RowSignature.builder() .add("nest", ColumnType.NESTED_DATA) @@ -6364,25 +6798,30 @@ public void testCoalesceOnNestedColumns() testBuilder() .sql( "select c,long,coalesce(c,long) as col " - + " from druid.all_auto, unnest(json_value(arrayNestedLong, '$[1]' returning bigint array)) as u(c) " + + " from druid.all_auto, unnest(json_value(arrayNestedLong, '$[1]' returning bigint array)) as u(c) " ) .expectedQueries( ImmutableList.of( Druids.newScanQueryBuilder() - .dataSource( - UnnestDataSource.create( - new TableDataSource(DATA_SOURCE_ALL), - new NestedFieldVirtualColumn("arrayNestedLong", "$[1]", "j0.unnest", ColumnType.LONG_ARRAY), - null - ) - ) - .virtualColumns(expressionVirtualColumn("v0", "nvl(\"j0.unnest\",\"long\")", ColumnType.LONG)) - .intervals(querySegmentSpec(Filtration.eternity())) - .columns("j0.unnest", "long", "v0") - .columnTypes(ColumnType.LONG, ColumnType.LONG, ColumnType.LONG) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .context(QUERY_CONTEXT_DEFAULT) - .build() + .dataSource( + UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ALL), + new NestedFieldVirtualColumn( + "arrayNestedLong", + "$[1]", + "j0.unnest", + ColumnType.LONG_ARRAY + ), + null + ) + ) + .virtualColumns(expressionVirtualColumn("v0", "nvl(\"j0.unnest\",\"long\")", ColumnType.LONG)) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("j0.unnest", "long", "v0") + .columnTypes(ColumnType.LONG, ColumnType.LONG, ColumnType.LONG) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .context(QUERY_CONTEXT_DEFAULT) + .build() ) ) .expectedResults( @@ -6399,10 +6838,10 @@ public void testCoalesceOnNestedColumns() ) .expectedSignature( RowSignature.builder() - .add("c", ColumnType.LONG) - .add("long", ColumnType.LONG) - .add("col", ColumnType.LONG) - .build() + .add("c", ColumnType.LONG) + .add("long", ColumnType.LONG) + .add("col", ColumnType.LONG) + .build() ) .run(); } @@ -6562,14 +7001,14 @@ public void testJsonQueryArrays() ImmutableList.of( new Object[]{"[{\"x\":1000},{\"y\":2000}]"}, new Object[]{"[{\"x\":1},{\"x\":2}]"}, - new Object[]{"[{\"x\":null},{\"x\":2}]"}, + new Object[]{"[null,{\"x\":2}]"}, new Object[]{"[{\"a\":1},{\"b\":2}]"}, new Object[]{"[{\"x\":1},{\"x\":2}]"}, new Object[]{"[null,{\"x\":2}]"}, new Object[]{"[{\"x\":3},{\"x\":4}]"}, new Object[]{"[{\"x\":1000},{\"y\":2000}]"}, new Object[]{"[{\"x\":1},{\"x\":2}]"}, - new Object[]{"[{\"x\":null},{\"x\":2}]"}, + new Object[]{"[null,{\"x\":2}]"}, new Object[]{"[{\"a\":1},{\"b\":2}]"}, new Object[]{"[{\"x\":1},{\"x\":2}]"}, new Object[]{"[null,{\"x\":2}]"}, @@ -6658,7 +7097,7 @@ public void testUnnestJsonQueryArrays() new Object[]{"{\"y\":2000}"}, new Object[]{"{\"x\":1}"}, new Object[]{"{\"x\":2}"}, - new Object[]{"{\"x\":null}"}, + new Object[]{null}, new Object[]{"{\"x\":2}"}, new Object[]{"{\"a\":1}"}, new Object[]{"{\"b\":2}"}, @@ -6672,7 +7111,7 @@ public void testUnnestJsonQueryArrays() new Object[]{"{\"y\":2000}"}, new Object[]{"{\"x\":1}"}, new Object[]{"{\"x\":2}"}, - new Object[]{"{\"x\":null}"}, + new Object[]{null}, new Object[]{"{\"x\":2}"}, new Object[]{"{\"a\":1}"}, new Object[]{"{\"b\":2}"}, From 239d58fe07f56fe983c4b826e4e5072e4fae2817 Mon Sep 17 00:00:00 2001 From: cecemei Date: Tue, 30 Sep 2025 17:31:52 -0700 Subject: [PATCH 02/19] default-read-raw --- .../druid/segment/column/ColumnConfig.java | 6 +- .../CompressedNestedDataComplexColumn.java | 3 +- .../calcite/CalciteNestedDataQueryTest.java | 424 +----------------- 3 files changed, 15 insertions(+), 418 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java index 99712b8c9438..2ca819a071dd 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java @@ -32,12 +32,12 @@ public interface ColumnConfig ColumnConfig DEFAULT = new ColumnConfig() {}; - ColumnConfig READ_RAW_JSON = new ColumnConfig() + ColumnConfig DERIVE_JSON = new ColumnConfig() { @Override public boolean deriveJsonColumnFromIndexes() { - return false; + return true; } }; /** @@ -85,6 +85,6 @@ default double skipValueRangeIndexScale() default boolean deriveJsonColumnFromIndexes() { - return true; + return false; } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index 9ebf475da8c6..ee2e9b071ff5 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -80,6 +80,7 @@ import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.utils.CloseableUtils; +import org.apache.druid.utils.CollectionUtils; import javax.annotation.Nullable; import java.io.IOException; @@ -159,7 +160,7 @@ public CompressedNestedDataComplexColumn( this.columnName = columnName; this.logicalType = logicalType; this.nullValues = nullValues; - this.fieldPathMap = new LinkedHashMap<>(fields.size()); + this.fieldPathMap = CollectionUtils.newLinkedHashMapWithExpectedSize(fields.size()); for (int i = 0; i < fields.size(); i++) { String field = fields.get(i); fieldPathMap.put(parsePath(field), Pair.of(fields.get(i), i)); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index bb33448f0aa7..61baee927adf 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -98,7 +98,8 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest public static final String DATA_SOURCE_ARRAYS = "arrays"; public static final String DATA_SOURCE_ALL = "all_auto"; public static final String DATA_SOURCE_ALL_REALTIME = "all_auto_realtime"; - public static final String DATA_SOURCE_ALL_RAW_JSON = "all_auto_raw_json"; + + public static final ColumnConfig DEFAULT_DERIVE_JSON_CONFIG = ColumnConfig.DERIVE_JSON; public static final List> RAW_ROWS = ImmutableList.of( ImmutableMap.builder() @@ -197,7 +198,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu { BuiltInTypesModule.registerHandlersAndSerde(); final QueryableIndex index = - IndexBuilder.create() + IndexBuilder.create(DEFAULT_DERIVE_JSON_CONFIG) .tmpDir(tempDirProducer.newTempFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -213,7 +214,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .buildMMappedIndex(); final QueryableIndex indexMix11 = - IndexBuilder.create() + IndexBuilder.create(DEFAULT_DERIVE_JSON_CONFIG) .tmpDir(tempDirProducer.newTempFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -230,7 +231,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu final QueryableIndex indexMix12 = - IndexBuilder.create() + IndexBuilder.create(DEFAULT_DERIVE_JSON_CONFIG) .tmpDir(tempDirProducer.newTempFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -246,7 +247,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .buildMMappedIndex(); final QueryableIndex indexMix21 = - IndexBuilder.create() + IndexBuilder.create(DEFAULT_DERIVE_JSON_CONFIG) .tmpDir(tempDirProducer.newTempFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -262,7 +263,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .buildMMappedIndex(); final QueryableIndex indexMix22 = - IndexBuilder.create() + IndexBuilder.create(DEFAULT_DERIVE_JSON_CONFIG) .tmpDir(tempDirProducer.newTempFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -278,7 +279,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .buildMMappedIndex(); final QueryableIndex indexArrays = - IndexBuilder.create() + IndexBuilder.create(DEFAULT_DERIVE_JSON_CONFIG) .tmpDir(tempDirProducer.newTempFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -302,7 +303,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .buildMMappedIndex(); final QueryableIndex indexAllTypesAuto = - IndexBuilder.create() + IndexBuilder.create(DEFAULT_DERIVE_JSON_CONFIG) .tmpDir(tempDirProducer.newTempFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -326,7 +327,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .buildMMappedIndex(); final IncrementalIndex indexAllTypesAutoRealtime = - IndexBuilder.create() + IndexBuilder.create(DEFAULT_DERIVE_JSON_CONFIG) .tmpDir(tempDirProducer.newTempFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -349,30 +350,6 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .inputTmpDir(tempDirProducer.newTempFolder()) .buildIncrementalIndex(); - final QueryableIndex indexAllTypesAutoRawJson = - IndexBuilder.create(ColumnConfig.READ_RAW_JSON) - .tmpDir(tempDirProducer.newTempFolder()) - .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) - .schema( - new IncrementalIndexSchema.Builder() - .withTimestampSpec(NestedDataTestUtils.AUTO_SCHEMA.getTimestampSpec()) - .withDimensionsSpec(NestedDataTestUtils.AUTO_SCHEMA.getDimensionsSpec()) - .withMetrics( - new CountAggregatorFactory("cnt") - ) - .withRollup(false) - .build() - ) - .inputSource( - ResourceInputSource.of( - NestedDataTestUtils.class.getClassLoader(), - NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE - ) - ) - .inputFormat(TestDataBuilder.DEFAULT_JSON_INPUT_FORMAT) - .inputTmpDir(tempDirProducer.newTempFolder()) - .buildMMappedIndex(); - walker.add( DataSegment.builder() .dataSource(DATA_SOURCE) @@ -445,15 +422,6 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .size(0) .build(), indexAllTypesAutoRealtime - ).add( - DataSegment.builder() - .dataSource(DATA_SOURCE_ALL_RAW_JSON) - .version("1") - .interval(indexAllTypesAuto.getDataInterval()) - .shardSpec(new LinearShardSpec(1)) - .size(0) - .build(), - indexAllTypesAutoRawJson ); return walker; @@ -5990,378 +5958,6 @@ public void testGroupByAndFilterVariant() ); } - @Test - public void testScanAllTypesAutoRawJson() - { - // Variant types are not supported by MSQ. - msqIncompatible(); - skipVectorize(); - testQuery( - "SELECT * FROM druid.all_auto_raw_json", - ImmutableList.of( - Druids.newScanQueryBuilder() - .dataSource(DATA_SOURCE_ALL_RAW_JSON) - .intervals(querySegmentSpec(Filtration.eternity())) - .columns( - "__time", "str", "long", "double", "bool", - "variant", "variantNumeric", "variantEmptyObj", "variantEmtpyArray", "variantWithArrays", - "obj", "complexObj", "arrayString", "arrayStringNulls", "arrayLong", - "arrayLongNulls", "arrayDouble", "arrayDoubleNulls", "arrayVariant", "arrayBool", - "arrayNestedLong", "arrayObject", "null", "cstr", "clong", - "cdouble", "cObj", "cstringArray", "cLongArray", "cDoubleArray", - "cEmptyArray", "cEmptyObj", "cNullArray", "cEmptyObjectArray", "cObjectArray", - "cnt" - ) - .columnTypes( - ColumnType.LONG, - ColumnType.STRING, - ColumnType.LONG, - ColumnType.DOUBLE, - ColumnType.LONG, - ColumnType.STRING, - ColumnType.DOUBLE, - ColumnType.ofComplex("json"), - ColumnType.LONG_ARRAY, - ColumnType.STRING_ARRAY, - ColumnType.ofComplex("json"), - ColumnType.ofComplex("json"), - ColumnType.STRING_ARRAY, - ColumnType.STRING_ARRAY, - ColumnType.LONG_ARRAY, - ColumnType.LONG_ARRAY, - ColumnType.DOUBLE_ARRAY, - ColumnType.DOUBLE_ARRAY, - ColumnType.STRING_ARRAY, - ColumnType.LONG_ARRAY, - ColumnType.ofComplex("json"), - ColumnType.ofComplex("json"), - ColumnType.STRING, - ColumnType.STRING, - ColumnType.LONG, - ColumnType.DOUBLE, - ColumnType.ofComplex("json"), - ColumnType.STRING_ARRAY, - ColumnType.LONG_ARRAY, - ColumnType.DOUBLE_ARRAY, - ColumnType.LONG_ARRAY, - ColumnType.ofComplex("json"), - ColumnType.LONG_ARRAY, - ColumnType.ofComplex("json"), - ColumnType.ofComplex("json"), - ColumnType.LONG - ) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .build() - ), - ImmutableList.of( - new Object[]{ - 1672531200000L, - null, - null, - null, - 1L, - "51", - -0.13D, - "1", - "[]", - "[51,-35]", - "{\"a\":700,\"b\":{\"x\":\"g\",\"y\":1.1,\"z\":[9,null,9,9]},\"c\":null,\"v\":[]}", - "{\"x\":400,\"y\":[{\"l\":[null],\"m\":100,\"n\":5},{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1}],\"z\":{}}", - null, - "[\"a\",\"b\"]", - null, - "[2,3]", - null, - "[null]", - null, - "[1,0,1]", - null, - "[{\"x\":1},{\"x\":2}]", - null, - "hello", - 1234L, - 1.234D, - "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"],\"d\":[]}}", - "[\"a\",\"b\",\"c\"]", - "[1,2,3]", - "[1.1,2.2,3.3]", - "[]", - "{}", - "[null,null]", - "[{},{},{}]", - "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", - 1L - }, - new Object[]{ - 1672531200000L, - "", - 2L, - null, - 0L, - "b", - 1.1D, - "\"b\"", - "2", - "b", - "{\"a\":200,\"b\":{\"x\":\"b\",\"y\":1.1,\"z\":[2,4,6]},\"c\":[\"a\",\"b\"],\"v\":[]}", - "{\"x\":10,\"y\":[{\"l\":[\"b\",\"b\",\"c\"],\"m\":\"b\",\"n\":2},[1,2,3]],\"z\":{\"a\":[5.5],\"b\":false}}", - "[\"a\",\"b\",\"c\"]", - "[null,\"b\"]", - "[2,3]", - null, - "[3.3,4.4,5.5]", - "[999.0,null,5.5]", - "[null,null,2.2]", - "[1,1]", - "[null,[null],[]]", - "[{\"x\":3},{\"x\":4}]", - null, - "hello", - 1234L, - 1.234D, - "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"],\"d\":[]}}", - "[\"a\",\"b\",\"c\"]", - "[1,2,3]", - "[1.1,2.2,3.3]", - "[]", - "{}", - "[null,null]", - "[{},{},{}]", - "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", - 1L - }, - new Object[]{ - 1672531200000L, - "a", - 1L, - 1.0D, - 1L, - "1", - 1.0D, - "1", - "1", - "1", - "{\"a\":100,\"b\":{\"x\":\"a\",\"y\":1.1,\"z\":[1,2,3,4]},\"c\":[100],\"v\":[]}", - "{\"x\":1234,\"y\":[{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1},{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1}],\"z\":{\"a\":[1.1,2.2,3.3],\"b\":true}}", - "[\"a\",\"b\"]", - "[\"a\",\"b\"]", - "[1,2,3]", - "[1,null,3]", - "[1.1,2.2,3.3]", - "[1.1,2.2,null]", - "[\"a\",\"1\",\"2.2\"]", - "[1,0,1]", - "[[1,2,null],[3,4]]", - "[{\"x\":1},{\"x\":2}]", - null, - "hello", - 1234L, - 1.234D, - "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"],\"d\":[]}}", - "[\"a\",\"b\",\"c\"]", - "[1,2,3]", - "[1.1,2.2,3.3]", - "[]", - "{}", - "[null,null]", - "[{},{},{}]", - "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", - 1L - }, - new Object[]{ - 1672531200000L, - "b", - 4L, - 3.3D, - 1L, - "1", - null, - "{}", - "4", - "1", - "{\"a\":400,\"b\":{\"x\":\"d\",\"y\":1.1,\"z\":[3,4]},\"c\":{\"a\":1},\"v\":[]}", - "{\"x\":1234,\"z\":{\"a\":[1.1,2.2,3.3],\"b\":true}}", - "[\"d\",\"e\"]", - "[\"b\",\"b\"]", - "[1,4]", - "[1]", - "[2.2,3.3,4.0]", - null, - "[\"a\",\"b\",\"c\"]", - "[null,0,1]", - "[[1,2],[3,4],[5,6,7]]", - "[{\"x\":null},{\"x\":2}]", - null, - "hello", - 1234L, - 1.234D, - "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"],\"d\":[]}}", - "[\"a\",\"b\",\"c\"]", - "[1,2,3]", - "[1.1,2.2,3.3]", - "[]", - "{}", - "[null,null]", - "[{},{},{}]", - "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", - 1L - }, - new Object[]{ - 1672531200000L, - "c", - null, - 4.4D, - 1L, - "hello", - -1000.0D, - "{}", - "[]", - "hello", - "{\"a\":500,\"b\":{\"x\":\"e\",\"z\":[1,2,3,4]},\"c\":\"hello\",\"v\":\"a\"}", - "{\"x\":11,\"y\":[],\"z\":{\"a\":[null],\"b\":false}}", - null, - null, - "[1,2,3]", - "[]", - "[1.1,2.2,3.3]", - null, - null, - "[0]", - null, - "[{\"x\":1000},{\"y\":2000}]", - null, - "hello", - 1234L, - 1.234D, - "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"],\"d\":[]}}", - "[\"a\",\"b\",\"c\"]", - "[1,2,3]", - "[1.1,2.2,3.3]", - "[]", - "{}", - "[null,null]", - "[{},{},{}]", - "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", - 1L - }, - new Object[]{ - 1672531200000L, - "d", - 5L, - 5.9D, - 0L, - null, - 3.33D, - "\"a\"", - "6", - null, - "{\"a\":600,\"b\":{\"x\":\"f\",\"y\":1.1,\"z\":[6,7,8,9]},\"c\":12.3,\"v\":\"b\"}", - null, - "[\"a\",\"b\"]", - null, - null, - "[null,2,9]", - null, - "[999.0,5.5,null]", - "[\"a\",\"1\",\"2.2\"]", - "[]", - "[[1],[1,2,null]]", - "[{\"a\":1},{\"b\":2}]", - null, - "hello", - 1234L, - 1.234D, - "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"],\"d\":[]}}", - "[\"a\",\"b\",\"c\"]", - "[1,2,3]", - "[1.1,2.2,3.3]", - "[]", - "{}", - "[null,null]", - "[{},{},{}]", - "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", - 1L - }, - new Object[]{ - 1672531200000L, - "null", - 3L, - 2.0D, - null, - "3.0", - 1.0D, - "3.3", - "3", - "3.0", - "{\"a\":300}", - "{\"x\":4.4,\"y\":[{\"l\":[],\"m\":100,\"n\":3},{\"l\":[\"a\"]},{\"l\":[\"b\"],\"n\":[]}],\"z\":{\"a\":[],\"b\":true}}", - "[\"b\",\"c\"]", - "[\"d\",null,\"b\"]", - "[1,2,3,4]", - "[1,2,3]", - "[1.1,3.3]", - "[null,2.2,null]", - "[1,null,1]", - "[1,null,1]", - "[[1],null,[1,2,3]]", - "[null,{\"x\":2}]", - null, - "hello", - 1234L, - 1.234D, - "{\"x\":1,\"y\":\"hello\",\"z\":{\"a\":1.1,\"b\":1234,\"c\":[\"a\",\"b\",\"c\"],\"d\":[]}}", - "[\"a\",\"b\",\"c\"]", - "[1,2,3]", - "[1.1,2.2,3.3]", - "[]", - "{}", - "[null,null]", - "[{},{},{}]", - "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", - 1L - } - ), - RowSignature.builder() - .add("__time", ColumnType.LONG) - .add("str", ColumnType.STRING) - .add("long", ColumnType.LONG) - .add("double", ColumnType.DOUBLE) - .add("bool", ColumnType.LONG) - .add("variant", ColumnType.STRING) - .add("variantNumeric", ColumnType.DOUBLE) - .add("variantEmptyObj", ColumnType.NESTED_DATA) - .add("variantEmtpyArray", ColumnType.LONG_ARRAY) - .add("variantWithArrays", ColumnType.STRING_ARRAY) - .add("obj", ColumnType.NESTED_DATA) - .add("complexObj", ColumnType.NESTED_DATA) - .add("arrayString", ColumnType.STRING_ARRAY) - .add("arrayStringNulls", ColumnType.STRING_ARRAY) - .add("arrayLong", ColumnType.LONG_ARRAY) - .add("arrayLongNulls", ColumnType.LONG_ARRAY) - .add("arrayDouble", ColumnType.DOUBLE_ARRAY) - .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) - .add("arrayVariant", ColumnType.STRING_ARRAY) - .add("arrayBool", ColumnType.LONG_ARRAY) - .add("arrayNestedLong", ColumnType.NESTED_DATA) - .add("arrayObject", ColumnType.NESTED_DATA) - .add("null", ColumnType.STRING) - .add("cstr", ColumnType.STRING) - .add("clong", ColumnType.LONG) - .add("cdouble", ColumnType.DOUBLE) - .add("cObj", ColumnType.NESTED_DATA) - .add("cstringArray", ColumnType.STRING_ARRAY) - .add("cLongArray", ColumnType.LONG_ARRAY) - .add("cDoubleArray", ColumnType.DOUBLE_ARRAY) - .add("cEmptyArray", ColumnType.LONG_ARRAY) - .add("cEmptyObj", ColumnType.NESTED_DATA) - .add("cNullArray", ColumnType.LONG_ARRAY) - .add("cEmptyObjectArray", ColumnType.NESTED_DATA) - .add("cObjectArray", ColumnType.NESTED_DATA) - .add("cnt", ColumnType.LONG) - .build() - ); - } - @Test public void testScanAllTypesAuto() { From f8d84b8a27a4d72679713be941fd49ab15c9ce80 Mon Sep 17 00:00:00 2001 From: cecemei Date: Tue, 30 Sep 2025 20:15:58 -0700 Subject: [PATCH 03/19] object-encoding --- ...ssedVariableSizedBlobColumnSerializer.java | 77 ++++++++---- ...ressedVariableSizedBlobColumnSupplier.java | 26 +++- .../CompressedNestedDataComplexColumn.java | 111 ++++++++++++++---- .../nested/NestedDataColumnSerializer.java | 1 + .../segment/nested/ObjectStorageEncoding.java | 1 + .../CompressedComplexColumnSerializer.java | 1 + .../CompressedVariableSizeBlobColumnTest.java | 53 +++++++++ 7 files changed, 216 insertions(+), 54 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSerializer.java index 6693daa4326a..fcc661a4bff7 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSerializer.java @@ -19,9 +19,11 @@ package org.apache.druid.segment.data; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter; import org.apache.druid.segment.CompressedPools; +import org.apache.druid.segment.nested.ObjectStorageEncoding; import org.apache.druid.segment.serde.MetaSerdeHelper; import org.apache.druid.segment.serde.Serializer; import org.apache.druid.segment.writeout.SegmentWriteOutMedium; @@ -40,6 +42,7 @@ public class CompressedVariableSizedBlobColumnSerializer implements Serializer private final String offsetsFile; private final String blobsFile; private final SegmentWriteOutMedium segmentWriteOutMedium; + private final ObjectStorageEncoding objectStorageEncoding; private final CompressionStrategy compression; private int numValues; @@ -51,6 +54,7 @@ public class CompressedVariableSizedBlobColumnSerializer implements Serializer public CompressedVariableSizedBlobColumnSerializer( final String filenameBase, final SegmentWriteOutMedium segmentWriteOutMedium, + final ObjectStorageEncoding objectStorageEncoding, final CompressionStrategy compression ) { @@ -58,6 +62,7 @@ public CompressedVariableSizedBlobColumnSerializer( this.offsetsFile = getCompressedOffsetsFileName(filenameBase); this.blobsFile = getCompressedBlobsFileName(filenameBase); this.segmentWriteOutMedium = segmentWriteOutMedium; + this.objectStorageEncoding = objectStorageEncoding; this.compression = compression; this.numValues = 0; } @@ -66,28 +71,40 @@ public void open() throws IOException { numValues = 0; currentOffset = 0; - offsetsSerializer = new CompressedLongsSerializer( - segmentWriteOutMedium, - compression, - segmentWriteOutMedium.getCloser() - ); - offsetsSerializer.open(); - - valuesSerializer = new CompressedBlockSerializer( - segmentWriteOutMedium, - compression, - CompressedPools.BUFFER_SIZE, - segmentWriteOutMedium.getCloser() - ); - valuesSerializer.open(); + if (ObjectStorageEncoding.SMILE.equals(objectStorageEncoding)) { + offsetsSerializer = new CompressedLongsSerializer( + segmentWriteOutMedium, + compression, + segmentWriteOutMedium.getCloser() + ); + offsetsSerializer.open(); + + valuesSerializer = new CompressedBlockSerializer( + segmentWriteOutMedium, + compression, + CompressedPools.BUFFER_SIZE, + segmentWriteOutMedium.getCloser() + ); + valuesSerializer.open(); + } else if (ObjectStorageEncoding.NONE.equals(objectStorageEncoding)) { + // skip skip serialize + } else { + throw DruidException.defensive("unreachable"); + } + } public void addValue(byte[] bytes) throws IOException { - valuesSerializer.addValue(bytes); - - currentOffset += bytes.length; - offsetsSerializer.add(currentOffset); + if (ObjectStorageEncoding.SMILE.equals(objectStorageEncoding)) { + valuesSerializer.addValue(bytes); + currentOffset += bytes.length; + offsetsSerializer.add(currentOffset); + } else if (ObjectStorageEncoding.NONE.equals(objectStorageEncoding)) { + // skip serialize + } else { + throw DruidException.defensive("unreachable"); + } numValues++; if (numValues < 0) { throw new ColumnCapacityExceededException(filenameBase); @@ -96,9 +113,15 @@ public void addValue(byte[] bytes) throws IOException public void addValue(ByteBuffer bytes) throws IOException { - currentOffset += bytes.remaining(); - valuesSerializer.addValue(bytes); - offsetsSerializer.add(currentOffset); + if (ObjectStorageEncoding.SMILE.equals(objectStorageEncoding)) { + currentOffset += bytes.remaining(); + valuesSerializer.addValue(bytes); + offsetsSerializer.add(currentOffset); + } else if (ObjectStorageEncoding.NONE.equals(objectStorageEncoding)) { + // skip serialize + } else { + throw DruidException.defensive("unreachable"); + } numValues++; if (numValues < 0) { throw new ColumnCapacityExceededException(filenameBase); @@ -116,11 +139,13 @@ public long getSerializedSize() public void writeTo(WritableByteChannel channel, FileSmoosher smoosher) throws IOException { META_SERDE_HELPER.writeTo(channel, this); - try (SmooshedWriter sub = smoosher.addWithSmooshedWriter(offsetsFile, offsetsSerializer.getSerializedSize())) { - offsetsSerializer.writeTo(sub, smoosher); - } - try (SmooshedWriter sub = smoosher.addWithSmooshedWriter(blobsFile, valuesSerializer.getSerializedSize())) { - valuesSerializer.writeTo(sub, smoosher); + if (ObjectStorageEncoding.SMILE.equals(objectStorageEncoding)) { + try (SmooshedWriter sub = smoosher.addWithSmooshedWriter(offsetsFile, offsetsSerializer.getSerializedSize())) { + offsetsSerializer.writeTo(sub, smoosher); + } + try (SmooshedWriter sub = smoosher.addWithSmooshedWriter(blobsFile, valuesSerializer.getSerializedSize())) { + valuesSerializer.writeTo(sub, smoosher); + } } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSupplier.java index 17ef19f7f41f..be34a8fe6be3 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSupplier.java @@ -22,6 +22,7 @@ import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; +import javax.annotation.Nullable; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -79,8 +80,8 @@ public static CompressedVariableSizedBlobColumnSupplier fromByteBuffer( private final Supplier blockDataReaderSupplier; private CompressedVariableSizedBlobColumnSupplier( - ByteBuffer offsetsBuffer, - ByteBuffer dataBuffer, + @Nullable ByteBuffer offsetsBuffer, + @Nullable ByteBuffer dataBuffer, ByteOrder compressionOrder, ByteOrder valueOrder, int numElements, @@ -88,17 +89,34 @@ private CompressedVariableSizedBlobColumnSupplier( ) { this.numElements = numElements; - this.offsetReaderSupplier = CompressedLongsReader.fromByteBuffer(offsetsBuffer, compressionOrder); - this.blockDataReaderSupplier = CompressedBlockReader.fromByteBuffer(dataBuffer, compressionOrder, valueOrder, copyValuesOnRead); + this.offsetReaderSupplier = offsetsBuffer == null + ? null + : () -> CompressedLongsReader.fromByteBuffer(offsetsBuffer, compressionOrder).get(); + this.blockDataReaderSupplier = dataBuffer == null + ? null + : () -> CompressedBlockReader.fromByteBuffer( + dataBuffer, + compressionOrder, + valueOrder, + copyValuesOnRead + ).get(); } @Override public CompressedVariableSizedBlobColumn get() { + if (offsetReaderSupplier == null || blockDataReaderSupplier == null) { + return null; + } return new CompressedVariableSizedBlobColumn( numElements, offsetReaderSupplier.get(), blockDataReaderSupplier.get() ); } + + int getNumElements() + { + return numElements; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index 337207f8e232..2505f3eacd01 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -42,7 +42,6 @@ import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.NilColumnValueSelector; import org.apache.druid.segment.ObjectColumnSelector; -import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.BaseColumnHolder; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnConfig; @@ -96,6 +95,7 @@ import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; import java.util.stream.Collectors; @@ -330,12 +330,25 @@ public Object getRowValue(int rowNum) return null; } - if (compressedRawColumn == null) { + if (compressedRawColumn == null && !columnConfig.deriveJsonColumnFromIndexes()) { compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); } - final ByteBuffer valueBuffer = compressedRawColumn.get(rowNum); - return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); + if (compressedRawColumn != null) { + final ByteBuffer valueBuffer = compressedRawColumn.get(rowNum); + return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); + } + + ReadableOffset offset = new AtomicIntegerReadableOffset(new AtomicInteger(rowNum)); + final List elements = + fieldPathMap.keySet().stream() + .map(path -> StructuredDataBuilder.Element.of( + path, + (Objects.requireNonNull(getColumnHolder(path)).getColumn()).makeColumnValueSelector(offset) + .getObject() + )) + .collect(Collectors.toList()); + return new StructuredDataBuilder(elements).build(); } @Override @@ -350,17 +363,21 @@ public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) offset ); } - final List, BaseColumn>> fieldColumns; - if (columnConfig.deriveJsonColumnFromIndexes()) { - fieldColumns = fieldPathMap.keySet().stream() - .map(path -> Pair.of(path, Objects.requireNonNull(getColumnHolder(path)).getColumn())) - .collect(Collectors.toList()); - } else { - fieldColumns = null; - if (compressedRawColumn == null) { - compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); - } + if (compressedRawColumn == null && !columnConfig.deriveJsonColumnFromIndexes()) { + compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); } + final List, ColumnValueSelector>> fieldSelectors = + compressedRawColumn != null + ? null + : fieldPathMap.keySet() + .stream() + .map(path -> Pair.of( + path, + ((DictionaryEncodedColumn) Objects.requireNonNull(getColumnHolder(path)) + .getColumn()).makeColumnValueSelector(offset) + )) + .collect(Collectors.toList()); + return new ObjectColumnSelector() { @@ -371,16 +388,17 @@ public Object getObject() if (nullValues.get(offset.getOffset())) { return null; } - if (columnConfig.deriveJsonColumnFromIndexes()) { - List elements = fieldColumns - .stream() - .map(c -> StructuredDataBuilder.Element.of(c.lhs, c.rhs.makeColumnValueSelector(offset).getObject())) - .collect(Collectors.toList()); - return new StructuredDataBuilder(elements).build(); - } else { + if (compressedRawColumn != null) { final ByteBuffer valueBuffer = compressedRawColumn.get(offset.getOffset()); return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); } + List elements = + Objects.requireNonNull(fieldSelectors) + .stream() + .map(c -> StructuredDataBuilder.Element.of(c.lhs, c.rhs.getObject())) + .collect(Collectors.toList()); + return new StructuredDataBuilder(elements).build(); + } @Override @@ -409,9 +427,23 @@ public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset offset ); } - if (compressedRawColumn == null) { + + if (compressedRawColumn == null && !columnConfig.deriveJsonColumnFromIndexes()) { compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); } + AtomicInteger rowNumber = new AtomicInteger(-1); + AtomicIntegerReadableOffset atomicOffset = new AtomicIntegerReadableOffset(rowNumber); + final List, ColumnValueSelector>> fieldSelectors = + compressedRawColumn != null ? null : + fieldPathMap.keySet() + .stream() + .map(path -> Pair.of( + path, + ((DictionaryEncodedColumn) Objects.requireNonNull(getColumnHolder(path)) + .getColumn()).makeColumnValueSelector(atomicOffset) + )) + .collect(Collectors.toList()); + return new VectorObjectSelector() { final Object[] vector = new Object[offset.getMaxVectorSize()]; @@ -453,8 +485,17 @@ private Object getForOffset(int offset) // maybe someday can use bitmap batch operations for nulls? return null; } - final ByteBuffer valueBuffer = compressedRawColumn.get(offset); - return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); + if (compressedRawColumn != null) { + final ByteBuffer valueBuffer = compressedRawColumn.get(offset); + return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); + } else { + rowNumber.set(offset); + List elements = fieldSelectors + .stream() + .map(c -> StructuredDataBuilder.Element.of(c.lhs, c.rhs.getObject())) + .collect(Collectors.toList()); + return new StructuredDataBuilder(elements).build(); + } } @Override @@ -1192,4 +1233,26 @@ public int compare(Object o1, Object o2) return Integer.compare(((Number) o1).intValue(), ((Number) o2).intValue()); } } + + private static class AtomicIntegerReadableOffset implements ReadableOffset + { + private final AtomicInteger offset; + + AtomicIntegerReadableOffset(AtomicInteger offset) + { + this.offset = offset; + } + + @Override + public int getOffset() + { + return offset.get(); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java index 157750b3c7b2..d29ff3367e70 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java @@ -246,6 +246,7 @@ public void open() throws IOException rawWriter = new CompressedVariableSizedBlobColumnSerializer( ColumnSerializerUtils.getInternalFileName(name, RAW_FILE_NAME), segmentWriteOutMedium, + columnFormatSpec.getObjectStorageEncoding(), columnFormatSpec.getObjectStorageCompression() ); rawWriter.open(); diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ObjectStorageEncoding.java b/processing/src/main/java/org/apache/druid/segment/nested/ObjectStorageEncoding.java index 3b1c823dbe44..2f21cbba64b9 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ObjectStorageEncoding.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ObjectStorageEncoding.java @@ -25,6 +25,7 @@ public enum ObjectStorageEncoding { + NONE, SMILE; @JsonValue diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CompressedComplexColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/serde/CompressedComplexColumnSerializer.java index 321cf8bc8b08..af7b2fff9c1a 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/CompressedComplexColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/CompressedComplexColumnSerializer.java @@ -85,6 +85,7 @@ public void open() throws IOException writer = new CompressedVariableSizedBlobColumnSerializer( ColumnSerializerUtils.getInternalFileName(name, FILE_NAME), segmentWriteOutMedium, + indexSpec.getAutoColumnFormatSpec().getObjectStorageEncoding(), indexSpec.getComplexMetricCompression() ); writer.open(); diff --git a/processing/src/test/java/org/apache/druid/segment/data/CompressedVariableSizeBlobColumnTest.java b/processing/src/test/java/org/apache/druid/segment/data/CompressedVariableSizeBlobColumnTest.java index 070440d203e1..37a48b86f401 100644 --- a/processing/src/test/java/org/apache/druid/segment/data/CompressedVariableSizeBlobColumnTest.java +++ b/processing/src/test/java/org/apache/druid/segment/data/CompressedVariableSizeBlobColumnTest.java @@ -23,6 +23,7 @@ import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter; import org.apache.druid.segment.CompressedPools; +import org.apache.druid.segment.nested.ObjectStorageEncoding; import org.apache.druid.segment.writeout.SegmentWriteOutMedium; import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; import org.junit.Assert; @@ -44,6 +45,55 @@ public class CompressedVariableSizeBlobColumnTest @Rule public final TemporaryFolder tempFolder = new TemporaryFolder(); + @Test + public void testNoneEncoding() throws IOException + { + // value sizes increase until they span at least 3 pages of compressed buffers + final File tmpFile = tempFolder.newFolder(); + final FileSmoosher smoosher = new FileSmoosher(tmpFile); + + final File tmpFile2 = tempFolder.newFolder(); + final SegmentWriteOutMedium writeOutMedium = + TmpFileSegmentWriteOutMediumFactory.instance().makeSegmentWriteOutMedium(tmpFile2); + + final String fileNameBase = "test"; + + final CompressionStrategy compressionStrategy = CompressionStrategy.LZ4; + CompressedVariableSizedBlobColumnSerializer serializer = new CompressedVariableSizedBlobColumnSerializer( + fileNameBase, + writeOutMedium, + ObjectStorageEncoding.NONE, + compressionStrategy + ); + serializer.open(); + + int numWritten = 0; + final Random r = ThreadLocalRandom.current(); + for (int i = 0; i < r.nextInt(10); i++) { + byte[] value = new byte[r.nextInt(5)]; + serializer.addValue(value); + numWritten++; + } + + SmooshedWriter writer = smoosher.addWithSmooshedWriter(fileNameBase, serializer.getSerializedSize()); + serializer.writeTo(writer, smoosher); + writer.close(); + smoosher.close(); + SmooshedFileMapper fileMapper = SmooshedFileMapper.load(tmpFile); + + ByteBuffer base = fileMapper.mapFile(fileNameBase); + CompressedVariableSizedBlobColumnSupplier supplier = CompressedVariableSizedBlobColumnSupplier.fromByteBuffer( + fileNameBase, + base, + ByteOrder.nativeOrder(), + ByteOrder.nativeOrder(), + fileMapper + ); + + Assert.assertEquals(numWritten, supplier.getNumElements()); + Assert.assertNull(supplier.get()); + } + @Test public void testSomeValues() throws IOException { @@ -61,6 +111,7 @@ public void testSomeValues() throws IOException CompressedVariableSizedBlobColumnSerializer serializer = new CompressedVariableSizedBlobColumnSerializer( fileNameBase, writeOutMedium, + ObjectStorageEncoding.SMILE, compressionStrategy ); serializer.open(); @@ -125,6 +176,7 @@ public void testSomeValuesByteBuffers() throws IOException CompressedVariableSizedBlobColumnSerializer serializer = new CompressedVariableSizedBlobColumnSerializer( fileNameBase, writeOutMedium, + ObjectStorageEncoding.SMILE, compressionStrategy ); serializer.open(); @@ -188,6 +240,7 @@ public void testSomeValuesByteBuffersBigEndian() throws IOException CompressedVariableSizedBlobColumnSerializer serializer = new CompressedVariableSizedBlobColumnSerializer( fileNameBase, writeOutMedium, + ObjectStorageEncoding.SMILE, compressionStrategy ); serializer.open(); From 591049264c6a905d10a6a6a4b6ab999aeb504a77 Mon Sep 17 00:00:00 2001 From: cecemei Date: Tue, 30 Sep 2025 21:10:08 -0700 Subject: [PATCH 04/19] default --- .../segment/nested/NestedCommonFormatColumnFormatSpec.java | 2 +- .../segment/serde/CompressedComplexColumnSerializer.java | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java index 592e759b72ed..343012e94c14 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java @@ -40,7 +40,7 @@ */ public class NestedCommonFormatColumnFormatSpec { - private static final NestedCommonFormatColumnFormatSpec DEFAULT = + public static final NestedCommonFormatColumnFormatSpec DEFAULT = NestedCommonFormatColumnFormatSpec.builder() .setObjectFieldsDictionaryEncoding(StringEncodingStrategy.UTF8_STRATEGY) .setObjectStorageEncoding(ObjectStorageEncoding.SMILE) diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CompressedComplexColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/serde/CompressedComplexColumnSerializer.java index af7b2fff9c1a..1ffae8f3aee1 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/CompressedComplexColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/CompressedComplexColumnSerializer.java @@ -22,6 +22,7 @@ import com.google.common.base.Preconditions; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.collections.bitmap.MutableBitmap; +import org.apache.druid.common.guava.GuavaUtils; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.GenericColumnSerializer; @@ -30,6 +31,7 @@ import org.apache.druid.segment.data.ByteBufferWriter; import org.apache.druid.segment.data.CompressedVariableSizedBlobColumnSerializer; import org.apache.druid.segment.data.ObjectStrategy; +import org.apache.druid.segment.nested.NestedCommonFormatColumnFormatSpec; import org.apache.druid.segment.writeout.SegmentWriteOutMedium; import java.io.ByteArrayOutputStream; @@ -85,8 +87,8 @@ public void open() throws IOException writer = new CompressedVariableSizedBlobColumnSerializer( ColumnSerializerUtils.getInternalFileName(name, FILE_NAME), segmentWriteOutMedium, - indexSpec.getAutoColumnFormatSpec().getObjectStorageEncoding(), - indexSpec.getComplexMetricCompression() + GuavaUtils.firstNonNull(indexSpec.getAutoColumnFormatSpec(), NestedCommonFormatColumnFormatSpec.DEFAULT) + .getObjectStorageEncoding(), indexSpec.getComplexMetricCompression() ); writer.open(); From 1f093dc8d1636473fd40f7d776d6f569d728c9e3 Mon Sep 17 00:00:00 2001 From: cecemei Date: Tue, 30 Sep 2025 21:22:25 -0700 Subject: [PATCH 05/19] buffer --- .../data/CompressedVariableSizedBlobColumnSupplier.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSupplier.java index be34a8fe6be3..bf4aa9238695 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSupplier.java @@ -91,15 +91,15 @@ private CompressedVariableSizedBlobColumnSupplier( this.numElements = numElements; this.offsetReaderSupplier = offsetsBuffer == null ? null - : () -> CompressedLongsReader.fromByteBuffer(offsetsBuffer, compressionOrder).get(); + : CompressedLongsReader.fromByteBuffer(offsetsBuffer, compressionOrder); this.blockDataReaderSupplier = dataBuffer == null ? null - : () -> CompressedBlockReader.fromByteBuffer( + : CompressedBlockReader.fromByteBuffer( dataBuffer, compressionOrder, valueOrder, copyValuesOnRead - ).get(); + ); } @Override From 5ff95ee481766423dbc549ad6fdb82c8f003754b Mon Sep 17 00:00:00 2001 From: cecemei Date: Wed, 1 Oct 2025 17:49:03 -0700 Subject: [PATCH 06/19] format --- .../data/CompressedVariableSizedBlobColumnSupplier.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSupplier.java index bf4aa9238695..10b199716f43 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSupplier.java @@ -76,8 +76,8 @@ public static CompressedVariableSizedBlobColumnSupplier fromByteBuffer( private final int numElements; - private final Supplier offsetReaderSupplier; - private final Supplier blockDataReaderSupplier; + @Nullable private final Supplier offsetReaderSupplier; + @Nullable private final Supplier blockDataReaderSupplier; private CompressedVariableSizedBlobColumnSupplier( @Nullable ByteBuffer offsetsBuffer, From e044d0e214499d1cf56a04d0d8852c49a067d115 Mon Sep 17 00:00:00 2001 From: cecemei Date: Thu, 2 Oct 2025 13:17:58 -0700 Subject: [PATCH 07/19] lazy-supplier --- .../CompressedNestedDataComplexColumn.java | 602 ++++++++++-------- .../nested/NestedDataColumnSupplier.java | 39 +- .../segment/nested/NestedDataColumnV5.java | 3 +- .../NestedCommonFormatColumnPartSerde.java | 2 +- .../nested/NestedDataColumnSupplierTest.java | 2 +- 5 files changed, 366 insertions(+), 282 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index 2505f3eacd01..ba307b5ae63a 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -45,7 +45,6 @@ import org.apache.druid.segment.column.BaseColumnHolder; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnConfig; -import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.DictionaryEncodedColumn; @@ -79,15 +78,14 @@ import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.utils.CloseableUtils; -import org.apache.druid.utils.CollectionUtils; import javax.annotation.Nullable; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -121,9 +119,10 @@ public abstract class CompressedNestedDataComplexColumn, Pair> fieldPathMap; + private final Supplier fieldsSupplier; private final FieldTypeInfo fieldInfo; private final Supplier stringDictionarySupplier; private final Supplier> longDictionarySupplier; @@ -138,12 +137,13 @@ public abstract class CompressedNestedDataComplexColumn columns = new ConcurrentHashMap<>(); private CompressedVariableSizedBlobColumn compressedRawColumn; + private ArrayField arrayField; public CompressedNestedDataComplexColumn( String columnName, ColumnType logicalType, @SuppressWarnings("unused") ColumnConfig columnConfig, - CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier, + @Nullable CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier, ImmutableBitmap nullValues, Supplier fieldsSupplier, FieldTypeInfo fieldInfo, @@ -160,12 +160,7 @@ public CompressedNestedDataComplexColumn( this.columnName = columnName; this.logicalType = logicalType; this.nullValues = nullValues; - final TKeyDictionary fields = fieldsSupplier.get(); - this.fieldPathMap = CollectionUtils.newLinkedHashMapWithExpectedSize(fields.size()); - for (int i = 0; i < fields.size(); i++) { - String field = StringUtils.fromUtf8(fields.get(i)); - fieldPathMap.put(parsePath(field), Pair.of(field, i)); - } + this.fieldsSupplier = fieldsSupplier; this.fieldInfo = fieldInfo; this.stringDictionarySupplier = stringDictionary; this.longDictionarySupplier = longDictionarySupplier; @@ -190,9 +185,9 @@ public CompressedNestedDataComplexColumn( public SortedMap getFieldTypeInfo() { SortedMap fieldMap = new TreeMap<>(); - for (Pair field : fieldPathMap.values()) { - FieldTypeInfo.TypeSet types = fieldInfo.getTypes(field.rhs); - fieldMap.put(field.lhs, new FieldTypeInfo.MutableTypeSet(types.getByteValue())); + for (BaseField field : getAllFields()) { + FieldTypeInfo.TypeSet types = fieldInfo.getTypes(field.fieldIndex); + fieldMap.put(field.fieldName, new FieldTypeInfo.MutableTypeSet(types.getByteValue())); } return fieldMap; } @@ -206,7 +201,7 @@ public ColumnType getLogicalType() @Override public List> getNestedFields() { - return ImmutableList.copyOf(fieldPathMap.keySet()); + return ImmutableList.copyOf(getAllParsedFields().stream().map(pair -> pair.rhs).collect(Collectors.toList())); } public TStringDictionary getUtf8BytesDictionary() @@ -330,7 +325,7 @@ public Object getRowValue(int rowNum) return null; } - if (compressedRawColumn == null && !columnConfig.deriveJsonColumnFromIndexes()) { + if (compressedRawColumn == null && compressedRawColumnSupplier != null) { compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); } @@ -341,42 +336,45 @@ public Object getRowValue(int rowNum) ReadableOffset offset = new AtomicIntegerReadableOffset(new AtomicInteger(rowNum)); final List elements = - fieldPathMap.keySet().stream() - .map(path -> StructuredDataBuilder.Element.of( - path, - (Objects.requireNonNull(getColumnHolder(path)).getColumn()).makeColumnValueSelector(offset) - .getObject() - )) - .collect(Collectors.toList()); + getAllParsedFields().stream() + .map(pair -> StructuredDataBuilder.Element.of( + pair.rhs, + getColumnHolder(pair.lhs.fieldName, pair.lhs.fieldIndex).getColumn() + .makeColumnValueSelector(offset) + .getObject() + )) + .collect(Collectors.toList()); return new StructuredDataBuilder(elements).build(); } @Override public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) { + List allFields = getAllFields(); if (!logicalType.equals(ColumnType.NESTED_DATA) - && fieldPathMap.size() == 1 - && rootFieldPath.equals(Iterables.getOnlyElement(fieldPathMap.values()).lhs)) { + && allFields.size() == 1 + && rootFieldPath.equals(Iterables.getOnlyElement(allFields).fieldName)) { return makeColumnValueSelector( ImmutableList.of(), null /* not used */, offset ); } - if (compressedRawColumn == null && !columnConfig.deriveJsonColumnFromIndexes()) { + if (compressedRawColumn == null && compressedRawColumnSupplier != null) { compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); } final List, ColumnValueSelector>> fieldSelectors = - compressedRawColumn != null + compressedRawColumnSupplier != null ? null - : fieldPathMap.keySet() - .stream() - .map(path -> Pair.of( - path, - ((DictionaryEncodedColumn) Objects.requireNonNull(getColumnHolder(path)) - .getColumn()).makeColumnValueSelector(offset) - )) - .collect(Collectors.toList()); + : getAllParsedFields().stream() + .map(pair -> Pair.of( + pair.rhs, + ((DictionaryEncodedColumn) getColumnHolder( + pair.lhs.fieldName, + pair.lhs.fieldIndex + ).getColumn()).makeColumnValueSelector(offset) + )) + .collect(Collectors.toList()); return new ObjectColumnSelector() @@ -418,9 +416,10 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) { + List>> allFields = getAllParsedFields(); if (!logicalType.equals(ColumnType.NESTED_DATA) - && fieldPathMap.size() == 1 - && rootFieldPath.equals(Iterables.getOnlyElement(fieldPathMap.values()).lhs)) { + && allFields.size() == 1 + && rootFieldPath.equals(Iterables.getOnlyElement(allFields).lhs.fieldName)) { return makeVectorObjectSelector( Collections.emptyList(), null /* not used */, @@ -428,21 +427,22 @@ public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset ); } - if (compressedRawColumn == null && !columnConfig.deriveJsonColumnFromIndexes()) { + if (compressedRawColumn == null && compressedRawColumnSupplier != null) { compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); } AtomicInteger rowNumber = new AtomicInteger(-1); AtomicIntegerReadableOffset atomicOffset = new AtomicIntegerReadableOffset(rowNumber); final List, ColumnValueSelector>> fieldSelectors = - compressedRawColumn != null ? null : - fieldPathMap.keySet() - .stream() - .map(path -> Pair.of( - path, - ((DictionaryEncodedColumn) Objects.requireNonNull(getColumnHolder(path)) - .getColumn()).makeColumnValueSelector(atomicOffset) - )) - .collect(Collectors.toList()); + compressedRawColumnSupplier != null ? null : + allFields.stream() + .map(pair -> Pair.of( + pair.rhs, + ((DictionaryEncodedColumn) Objects.requireNonNull(getColumnHolder( + pair.lhs.fieldName, + pair.lhs.fieldIndex + )).getColumn()).makeColumnValueSelector(atomicOffset) + )) + .collect(Collectors.toList()); return new VectorObjectSelector() { @@ -515,9 +515,10 @@ public int getMaxVectorSize() @Override public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset offset) { + List allFields = getAllFields(); if (!logicalType.equals(ColumnType.NESTED_DATA) - && fieldPathMap.size() == 1 - && rootFieldPath.equals(Iterables.getOnlyElement(fieldPathMap.values()).lhs)) { + && allFields.size() == 1 + && rootFieldPath.equals(Iterables.getOnlyElement(allFields).fieldName)) { return makeVectorValueSelector( Collections.emptyList(), null /* not used */, @@ -530,10 +531,10 @@ public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset offset) @Override public int getLength() { - if (compressedRawColumn == null) { + if (compressedRawColumn == null && compressedRawColumnSupplier != null) { compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); } - return compressedRawColumn.size(); + return compressedRawColumnSupplier != null ? compressedRawColumn.size() : -1; } @Override @@ -542,6 +543,7 @@ public void close() CloseableUtils.closeAndWrapExceptions(closer); } + /** * Create a selector for a nested path. * @@ -557,53 +559,54 @@ public DimensionSelector makeDimensionSelector( ReadableOffset readableOffset ) { - final Pair field = fieldPathMap.get(path); - if (field != null) { - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder(field.lhs, field.rhs).getColumn(); + final Field field = getBaseOrArrayFieldFromPath(path); + if (field instanceof BaseField) { + DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( + ((BaseField) field).fieldName, + ((BaseField) field).fieldIndex + ).getColumn(); return col.makeDimensionSelector(readableOffset, extractionFn); - } - if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final Pair arrayField = fieldPathMap.get(path.subList(0, path.size() - 1)); - if (arrayField != null) { - final int elementNumber = ((NestedPathArrayElement) path.get(path.size() - 1)).getIndex(); - if (elementNumber < 0) { - throw new IAE( - "Cannot make array element selector for path [%s], negative array index not supported for this selector", - path - ); - } - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( - arrayField.lhs, - arrayField.rhs - ).getColumn(); - ColumnValueSelector arraySelector = col.makeColumnValueSelector(readableOffset); - return new BaseSingleValueDimensionSelector() + } else if (field instanceof ArrayField) { + final ArrayField arrayField = (ArrayField) field; + final int elementNumber = arrayField.elementNumber; + if (elementNumber < 0) { + throw new IAE( + "Cannot make array element selector for path [%s], negative array index not supported for this selector", + path + ); + } + DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( + arrayField.baseField.fieldName, + arrayField.baseField.fieldIndex + ).getColumn(); + ColumnValueSelector arraySelector = col.makeColumnValueSelector(readableOffset); + return new BaseSingleValueDimensionSelector() + { + @Nullable + @Override + protected String getValue() { - @Nullable - @Override - protected String getValue() - { - Object o = arraySelector.getObject(); - if (o instanceof Object[]) { - Object[] array = (Object[]) o; - if (elementNumber < array.length) { - Object element = array[elementNumber]; - if (element == null) { - return null; - } - return String.valueOf(element); + Object o = arraySelector.getObject(); + if (o instanceof Object[]) { + Object[] array = (Object[]) o; + if (elementNumber < array.length) { + Object element = array[elementNumber]; + if (element == null) { + return null; } + return String.valueOf(element); } - return null; } + return null; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + arraySelector.inspectRuntimeShape(inspector); + } + }; - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - arraySelector.inspectRuntimeShape(inspector); - } - }; - } } return DimensionSelector.constant(null); } @@ -622,81 +625,84 @@ public ColumnValueSelector makeColumnValueSelector( ReadableOffset readableOffset ) { - BaseColumnHolder columnHolder = getColumnHolder(path); - if (columnHolder != null) { - return columnHolder.getColumn().makeColumnValueSelector(readableOffset); - } - if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final BaseColumnHolder arrayColumnHolder = getColumnHolder(path.subList(0, path.size() - 1)); - if (arrayColumnHolder != null) { - final int elementNumber = ((NestedPathArrayElement) path.get(path.size() - 1)).getIndex(); - if (elementNumber < 0) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build( - "Cannot make array element selector for path [%s], negative array index not supported for this selector", - path - ); - } - DictionaryEncodedColumn col = (DictionaryEncodedColumn) arrayColumnHolder.getColumn(); - ColumnValueSelector arraySelector = col.makeColumnValueSelector(readableOffset); - return new ColumnValueSelector<>() + Field field = getBaseOrArrayFieldFromPath(path); + if (field instanceof BaseField) { + final BaseField baseField = (BaseField) field; + return getColumnHolder(baseField.fieldName, baseField.fieldIndex).getColumn() + .makeColumnValueSelector(readableOffset); + } else if (field instanceof ArrayField) { + final ArrayField arrayField = (ArrayField) field; + final int elementNumber = arrayField.elementNumber; + if (elementNumber < 0) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build( + "Cannot make array element selector for path [%s], negative array index not supported for this selector", + path + ); + } + DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( + arrayField.baseField.fieldName, + arrayField.baseField.fieldIndex + ).getColumn(); + ColumnValueSelector arraySelector = col.makeColumnValueSelector(readableOffset); + return new ColumnValueSelector<>() + { + @Override + public boolean isNull() { - @Override - public boolean isNull() - { - Object o = getObject(); - return !(o instanceof Number); - } + Object o = getObject(); + return !(o instanceof Number); + } - @Override - public long getLong() - { - Object o = getObject(); - return o instanceof Number ? ((Number) o).longValue() : 0L; - } + @Override + public long getLong() + { + Object o = getObject(); + return o instanceof Number ? ((Number) o).longValue() : 0L; + } - @Override - public float getFloat() - { - Object o = getObject(); - return o instanceof Number ? ((Number) o).floatValue() : 0f; - } + @Override + public float getFloat() + { + Object o = getObject(); + return o instanceof Number ? ((Number) o).floatValue() : 0f; + } - @Override - public double getDouble() - { - Object o = getObject(); - return o instanceof Number ? ((Number) o).doubleValue() : 0.0; - } + @Override + public double getDouble() + { + Object o = getObject(); + return o instanceof Number ? ((Number) o).doubleValue() : 0.0; + } - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - arraySelector.inspectRuntimeShape(inspector); - } + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + arraySelector.inspectRuntimeShape(inspector); + } - @Nullable - @Override - public Object getObject() - { - Object o = arraySelector.getObject(); - if (o instanceof Object[]) { - Object[] array = (Object[]) o; - if (elementNumber < array.length) { - return array[elementNumber]; - } + @Nullable + @Override + public Object getObject() + { + Object o = arraySelector.getObject(); + if (o instanceof Object[]) { + Object[] array = (Object[]) o; + if (elementNumber < array.length) { + return array[elementNumber]; } - return null; } + return null; + } + + @Override + public Class classOfObject() + { + return Object.class; + } + }; - @Override - public Class classOfObject() - { - return Object.class; - } - }; - } } return NilColumnValueSelector.instance(); } @@ -708,9 +714,13 @@ public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector ReadableVectorOffset readableOffset ) { - final ColumnHolder columnHolder = getColumnHolder(path); - if (columnHolder != null) { - DictionaryEncodedColumn col = (DictionaryEncodedColumn) columnHolder.getColumn(); + final Field field = getBaseOrArrayFieldFromPath(path); + if (field instanceof BaseField) { + BaseField baseField = (BaseField) field; + DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( + baseField.fieldName, + baseField.fieldIndex + ).getColumn(); return col.makeSingleValueDimensionVectorSelector(readableOffset); } else { return NilVectorSelector.create(readableOffset); @@ -731,67 +741,69 @@ public VectorObjectSelector makeVectorObjectSelector( ReadableVectorOffset readableOffset ) { - final BaseColumnHolder columnHolder = getColumnHolder(path); - if (columnHolder != null) { - return columnHolder.getColumn().makeVectorObjectSelector(readableOffset); - } - if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final BaseColumnHolder arrayColumnHolder = getColumnHolder(path.subList(0, path.size() - 1)); - if (arrayColumnHolder != null) { - final int elementNumber = ((NestedPathArrayElement) path.get(path.size() - 1)).getIndex(); - if (elementNumber < 0) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build( - "Cannot make array element selector for path [%s], negative array index not supported for this selector", - path - ); - } - VectorObjectSelector arraySelector = arrayColumnHolder.getColumn().makeVectorObjectSelector(readableOffset); + final Field field = getBaseOrArrayFieldFromPath(path); + if (field instanceof BaseField) { + BaseField baseField = (BaseField) field; + return getColumnHolder(baseField.fieldName, baseField.fieldIndex).getColumn() + .makeVectorObjectSelector(readableOffset); + } else if (field instanceof ArrayField) { + final ArrayField arrayField = (ArrayField) field; + final int elementNumber = arrayField.elementNumber; + if (elementNumber < 0) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build( + "Cannot make array element selector for path [%s], negative array index not supported for this selector", + path + ); + } + VectorObjectSelector arraySelector = getColumnHolder( + arrayField.baseField.fieldName, + arrayField.baseField.fieldIndex + ).getColumn().makeVectorObjectSelector(readableOffset); + return new VectorObjectSelector() + { + private final Object[] elements = new Object[arraySelector.getMaxVectorSize()]; + private int id = ReadableVectorInspector.NULL_ID; - return new VectorObjectSelector() + @Override + public Object[] getObjectVector() { - private final Object[] elements = new Object[arraySelector.getMaxVectorSize()]; - private int id = ReadableVectorInspector.NULL_ID; - - @Override - public Object[] getObjectVector() - { - if (readableOffset.getId() != id) { - final Object[] delegate = arraySelector.getObjectVector(); - for (int i = 0; i < arraySelector.getCurrentVectorSize(); i++) { - Object maybeArray = delegate[i]; - if (maybeArray instanceof Object[]) { - Object[] anArray = (Object[]) maybeArray; - if (elementNumber < anArray.length) { - final Object element = anArray[elementNumber]; - elements[i] = element; - } else { - elements[i] = null; - } + if (readableOffset.getId() != id) { + final Object[] delegate = arraySelector.getObjectVector(); + for (int i = 0; i < arraySelector.getCurrentVectorSize(); i++) { + Object maybeArray = delegate[i]; + if (maybeArray instanceof Object[]) { + Object[] anArray = (Object[]) maybeArray; + if (elementNumber < anArray.length) { + final Object element = anArray[elementNumber]; + elements[i] = element; } else { elements[i] = null; } + } else { + elements[i] = null; } - id = readableOffset.getId(); } - return elements; + id = readableOffset.getId(); } + return elements; + } - @Override - public int getMaxVectorSize() - { - return arraySelector.getMaxVectorSize(); - } + @Override + public int getMaxVectorSize() + { + return arraySelector.getMaxVectorSize(); + } - @Override - public int getCurrentVectorSize() - { - return arraySelector.getCurrentVectorSize(); - } - }; - } + @Override + public int getCurrentVectorSize() + { + return arraySelector.getCurrentVectorSize(); + } + }; } + return NilVectorSelector.create(readableOffset); } @@ -942,24 +954,22 @@ public int getCurrentVectorSize() @Override public Set getFieldTypes(List path) { - final Pair field = fieldPathMap.get(path); - if (field != null) { - return FieldTypeInfo.convertToSet(fieldInfo.getTypes(field.rhs).getByteValue()); - } else if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final Pair arrayField = fieldPathMap.get(path.subList(0, path.size() - 1)); - if (arrayField != null) { - final Set arrayFieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(arrayField.rhs) - .getByteValue()); - final Set elementTypes = Sets.newHashSetWithExpectedSize(arrayFieldTypes.size()); - for (ColumnType type : arrayFieldTypes) { - if (type.isArray()) { - elementTypes.add((ColumnType) type.getElementType()); - } else { - elementTypes.add(type); - } + final Field field = getBaseOrArrayFieldFromPath(path); + if (field instanceof BaseField) { + return FieldTypeInfo.convertToSet(fieldInfo.getTypes(((BaseField) field).fieldIndex).getByteValue()); + } else if (field instanceof ArrayField) { + final ArrayField arrayField = (ArrayField) field; + final Set arrayFieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(arrayField.baseField.fieldIndex) + .getByteValue()); + final Set elementTypes = Sets.newHashSetWithExpectedSize(arrayFieldTypes.size()); + for (ColumnType type : arrayFieldTypes) { + if (type.isArray()) { + elementTypes.add((ColumnType) type.getElementType()); + } else { + elementTypes.add(type); } - return elementTypes; } + return elementTypes; } return null; } @@ -968,28 +978,28 @@ public Set getFieldTypes(List path) @Override public ColumnType getFieldLogicalType(List path) { - final Pair field = fieldPathMap.get(path); - if (field != null) { - final Set fieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(field.rhs).getByteValue()); + final Field field = getBaseOrArrayFieldFromPath(path); + if (field instanceof BaseField) { + final Set fieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(((BaseField) field).fieldIndex) + .getByteValue()); return ColumnType.leastRestrictiveType(fieldTypes); - } else if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final Pair arrayField = fieldPathMap.get(path.subList(0, path.size() - 1)); - if (arrayField != null) { - final Set arrayFieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(arrayField.rhs) - .getByteValue()); - ColumnType leastRestrictiveType = null; - for (ColumnType type : arrayFieldTypes) { - if (type.isArray()) { - leastRestrictiveType = ColumnType.leastRestrictiveType( - leastRestrictiveType, - (ColumnType) type.getElementType() - ); - } else { - leastRestrictiveType = ColumnType.leastRestrictiveType(leastRestrictiveType, type); - } + } else if (field instanceof ArrayField) { + final ArrayField arrayField = (ArrayField) field; + final Set arrayFieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(arrayField.baseField.fieldIndex) + .getByteValue()); + ColumnType leastRestrictiveType = null; + for (ColumnType type : arrayFieldTypes) { + if (type.isArray()) { + leastRestrictiveType = ColumnType.leastRestrictiveType( + leastRestrictiveType, + (ColumnType) type.getElementType() + ); + } else { + leastRestrictiveType = ColumnType.leastRestrictiveType(leastRestrictiveType, type); } - return leastRestrictiveType; } + return leastRestrictiveType; + } return null; @@ -999,26 +1009,24 @@ public ColumnType getFieldLogicalType(List path) @Override public BaseColumnHolder getColumnHolder(List path) { - final Pair field = fieldPathMap.get(path); - if (field == null) { - return null; + final Field field = getBaseOrArrayFieldFromPath(path); + if (field instanceof BaseField) { + final BaseField baseField = (BaseField) field; + return getColumnHolder(baseField.fieldName, baseField.fieldIndex); } - return getColumnHolder(field.lhs, field.rhs); + return null; } @Nullable @Override public ColumnIndexSupplier getColumnIndexSupplier(List path) { - final ColumnHolder columnHolder = getColumnHolder(path); - if (columnHolder != null) { - return columnHolder.getIndexSupplier(); - } - if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final ColumnHolder arrayColumnHolder = getColumnHolder(path.subList(0, path.size() - 1)); - if (arrayColumnHolder != null) { - return NoIndexesColumnIndexSupplier.getInstance(); - } + final Field field = getBaseOrArrayFieldFromPath(path); + if (field instanceof BaseField) { + final BaseField baseField = (BaseField) field; + return getColumnHolder(baseField.fieldName, baseField.fieldIndex).getIndexSupplier(); + } else if (field instanceof ArrayField) { + return NoIndexesColumnIndexSupplier.getInstance(); } return null; } @@ -1026,9 +1034,10 @@ public ColumnIndexSupplier getColumnIndexSupplier(List path) @Override public boolean isNumeric(List path) { - final ColumnHolder columnHolder = getColumnHolder(path); - if (columnHolder != null) { - return columnHolder.getCapabilities().isNumeric(); + final Field field = getBaseOrArrayFieldFromPath(path); + if (field instanceof BaseField) { + final BaseField baseField = (BaseField) field; + return getColumnHolder(baseField.fieldName, baseField.fieldIndex).getCapabilities().isNumeric(); } return true; } @@ -1234,6 +1243,75 @@ public int compare(Object o1, Object o2) } } + private List getAllFields() + { + TKeyDictionary fields = fieldsSupplier.get(); + List allFields = new ArrayList<>(fields.size()); + for (int i = 0; i < fields.size(); i++) { + String field = StringUtils.fromUtf8(fields.get(i)); + allFields.add(new BaseField(field, i)); + } + return allFields; + } + + private List>> getAllParsedFields() + { + TKeyDictionary fields = fieldsSupplier.get(); + List>> allFields = new ArrayList<>(fields.size()); + for (int i = 0; i < fields.size(); i++) { + String field = StringUtils.fromUtf8(fields.get(i)); + allFields.add(Pair.of(new BaseField(field, i), parsePath(field))); + } + return allFields; + } + + @Nullable + private Field getBaseOrArrayFieldFromPath(List path) + { + TKeyDictionary fields = fieldsSupplier.get(); + List arrayPath = (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) + ? path.subList(0, path.size() - 1) + : null; + for (int i = 0; i < fields.size(); i++) { + String field = StringUtils.fromUtf8(fields.get(i)); + List parsed = parsePath(field); + if (parsed.equals(path)) { + return new BaseField(field, i); + } else if (parsed.equals(arrayPath)) { + return new ArrayField(new BaseField(field, i), ((NestedPathArrayElement) path.get(path.size() - 1)).getIndex()); + } + } + return null; + } + + interface Field + { + } + + private static class BaseField implements Field + { + private final String fieldName; + private final int fieldIndex; + + BaseField(String fieldName, int fieldIndex) + { + this.fieldName = fieldName; + this.fieldIndex = fieldIndex; + } + } + + private static class ArrayField implements Field + { + private final BaseField baseField; + private final int elementNumber; + + ArrayField(BaseField baseField, int elementNumber) + { + this.baseField = baseField; + this.elementNumber = elementNumber; + } + } + private static class AtomicIntegerReadableOffset implements ReadableOffset { private final AtomicInteger offset; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java index 204f8d2b3ba8..630cf7db2e05 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java @@ -53,7 +53,7 @@ public static NestedDataColumnSupplier read( ByteBuffer bb, ColumnBuilder columnBuilder, ColumnConfig columnConfig, - BitmapSerdeFactory bitmapSerdeFactory, + NestedCommonFormatColumnFormatSpec nestedCommonFormatColumnFormatSpec, ByteOrder byteOrder, NestedDataColumnSupplier parent ) @@ -75,7 +75,6 @@ public static NestedDataColumnSupplier read( final Supplier arrayDictionarySupplier; - if (parent != null) { fieldsSupplier = parent.fieldSupplier; fieldInfo = parent.fieldInfo; @@ -136,16 +135,18 @@ public static NestedDataColumnSupplier read( columnName, NestedCommonFormatColumnSerializer.RAW_FILE_NAME ); - compressedRawColumnSupplier = CompressedVariableSizedBlobColumnSupplier.fromByteBuffer( - ColumnSerializerUtils.getInternalFileName( - columnName, - NestedCommonFormatColumnSerializer.RAW_FILE_NAME - ), - rawBuffer, - byteOrder, - byteOrder, // byte order doesn't matter since serde is byte blobs - mapper - ); + compressedRawColumnSupplier = ObjectStorageEncoding.NONE.equals(nestedCommonFormatColumnFormatSpec.getObjectStorageEncoding()) + ? null + : CompressedVariableSizedBlobColumnSupplier.fromByteBuffer( + ColumnSerializerUtils.getInternalFileName( + columnName, + NestedCommonFormatColumnSerializer.RAW_FILE_NAME + ), + rawBuffer, + byteOrder, + byteOrder, // byte order doesn't matter since serde is byte blobs + mapper + ); if (hasNulls) { columnBuilder.setHasNulls(true); final ByteBuffer nullIndexBuffer = NestedCommonFormatColumnPartSerde.loadInternalFile( @@ -153,9 +154,13 @@ public static NestedDataColumnSupplier read( columnName, ColumnSerializerUtils.NULL_BITMAP_FILE_NAME ); - nullValues = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(nullIndexBuffer); + nullValues = nestedCommonFormatColumnFormatSpec.getBitmapEncoding() + .getObjectStrategy() + .fromByteBufferWithSize(nullIndexBuffer); } else { - nullValues = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); + nullValues = nestedCommonFormatColumnFormatSpec.getBitmapEncoding() + .getBitmapFactory() + .makeEmptyImmutableBitmap(); } return new NestedDataColumnSupplier( @@ -170,7 +175,7 @@ public static NestedDataColumnSupplier read( arrayDictionarySupplier, columnConfig, mapper, - bitmapSerdeFactory, + nestedCommonFormatColumnFormatSpec.getBitmapEncoding(), byteOrder, logicalType ); @@ -186,7 +191,7 @@ public static NestedDataColumnSupplier read( private final String columnName; private final Supplier> fieldSupplier; private final FieldTypeInfo fieldInfo; - private final CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier; + @Nullable private final CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier; private final ImmutableBitmap nullValues; private final Supplier> stringDictionarySupplier; private final Supplier> longDictionarySupplier; @@ -204,7 +209,7 @@ private NestedDataColumnSupplier( String columnName, Supplier> fieldSupplier, FieldTypeInfo fieldInfo, - CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier, + @Nullable CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier, ImmutableBitmap nullValues, Supplier> stringDictionarySupplier, Supplier> longDictionarySupplier, diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV5.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV5.java index bdc90b9d9b4a..2a375538422c 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV5.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV5.java @@ -31,6 +31,7 @@ import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.serde.ColumnSerializerUtils; +import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.List; @@ -51,7 +52,7 @@ public NestedDataColumnV5( String columnName, ColumnType logicalType, ColumnConfig columnConfig, - CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier, + @Nullable CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier, ImmutableBitmap nullValues, Supplier fields, FieldTypeInfo fieldInfo, diff --git a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java index fa74a2d98c32..ee74706e7904 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java @@ -346,7 +346,7 @@ public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnCo buffer, builder, columnConfig, - formatSpec != null ? formatSpec.getBitmapEncoding() : bitmapSerdeFactory, + formatSpec, byteOrder, parent == null ? null : (NestedDataColumnSupplier) parent.getColumnSupplier() ); diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java index 39109f5ed049..5f4beb266081 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java @@ -349,7 +349,7 @@ public void testConcurrency() throws ExecutionException, InterruptedException baseBuffer, bob, ColumnConfig.SELECTION_SIZE, - bitmapSerdeFactory, + columnFormatSpec, ByteOrder.nativeOrder(), null ); From 748a64d81c629599c9422032835cdb50b0194bde Mon Sep 17 00:00:00 2001 From: cecemei Date: Thu, 2 Oct 2025 13:20:47 -0700 Subject: [PATCH 08/19] revert-column-config --- .../druid/segment/column/ColumnConfig.java | 13 - .../calcite/CalciteNestedDataQueryTest.java | 267 ++++++++---------- 2 files changed, 116 insertions(+), 164 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java index 2ca819a071dd..a4bdfcfd6fa4 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java @@ -32,14 +32,6 @@ public interface ColumnConfig ColumnConfig DEFAULT = new ColumnConfig() {}; - ColumnConfig DERIVE_JSON = new ColumnConfig() - { - @Override - public boolean deriveJsonColumnFromIndexes() - { - return true; - } - }; /** * Use range indexes if dictionary range is same size or smaller than selection size */ @@ -82,9 +74,4 @@ default double skipValueRangeIndexScale() { return DEFAULT_SKIP_VALUE_RANGE_INDEX_SCALE; } - - default boolean deriveJsonColumnFromIndexes() - { - return false; - } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index ddcb823861c1..97db549de27f 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -60,7 +60,6 @@ import org.apache.druid.segment.AutoTypeColumnSchema; import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.QueryableIndex; -import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.incremental.IncrementalIndex; @@ -99,14 +98,12 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest public static final String DATA_SOURCE_ALL = "all_auto"; public static final String DATA_SOURCE_ALL_REALTIME = "all_auto_realtime"; - public static final ColumnConfig DEFAULT_DERIVE_JSON_CONFIG = ColumnConfig.DERIVE_JSON; - public static final List> RAW_ROWS = ImmutableList.of( ImmutableMap.builder() .put("t", "2000-01-01") .put("string", "aaa") .put("string_sparse", "zzz") - .put("nest", ImmutableMap.of("mixed", 1L, "mixed2", "1", "x", 100L, "y", 2.02, "z", "300")) + .put("nest", ImmutableMap.of("x", 100L, "y", 2.02, "z", "300", "mixed", 1L, "mixed2", "1")) .put( "nester", ImmutableMap.of("array", ImmutableList.of("a", "b"), "n", ImmutableMap.of("x", "hello")) @@ -123,7 +120,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .put("t", "2000-01-01") .put("string", "ccc") .put("string_sparse", "10") - .put("nest", ImmutableMap.of("mixed", 1.1, "mixed2", 1L, "x", 200L, "y", 3.03, "z", "abcdef")) + .put("nest", ImmutableMap.of("x", 200L, "y", 3.03, "z", "abcdef", "mixed", 1.1, "mixed2", 1L)) .put("long", 3L) .build(), ImmutableMap.builder() @@ -141,7 +138,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest ImmutableMap.builder() .put("t", "2000-01-02") .put("string", "aaa") - .put("nest", ImmutableMap.of("mixed2", 1.1, "x", 100L, "y", 2.02, "z", "400")) + .put("nest", ImmutableMap.of("x", 100L, "y", 2.02, "z", "400", "mixed2", 1.1)) .put("nester", ImmutableMap.of("array", ImmutableList.of("a", "b"), "n", ImmutableMap.of("x", 1L))) .put("long", 5L) .build(), @@ -198,7 +195,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu { BuiltInTypesModule.registerHandlersAndSerde(); final QueryableIndex index = - IndexBuilder.create(DEFAULT_DERIVE_JSON_CONFIG) + IndexBuilder.create() .tmpDir(tempDirProducer.newTempFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -214,7 +211,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .buildMMappedIndex(); final QueryableIndex indexMix11 = - IndexBuilder.create(DEFAULT_DERIVE_JSON_CONFIG) + IndexBuilder.create() .tmpDir(tempDirProducer.newTempFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -231,7 +228,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu final QueryableIndex indexMix12 = - IndexBuilder.create(DEFAULT_DERIVE_JSON_CONFIG) + IndexBuilder.create() .tmpDir(tempDirProducer.newTempFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -247,7 +244,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .buildMMappedIndex(); final QueryableIndex indexMix21 = - IndexBuilder.create(DEFAULT_DERIVE_JSON_CONFIG) + IndexBuilder.create() .tmpDir(tempDirProducer.newTempFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -263,7 +260,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .buildMMappedIndex(); final QueryableIndex indexMix22 = - IndexBuilder.create(DEFAULT_DERIVE_JSON_CONFIG) + IndexBuilder.create() .tmpDir(tempDirProducer.newTempFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -279,7 +276,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .buildMMappedIndex(); final QueryableIndex indexArrays = - IndexBuilder.create(DEFAULT_DERIVE_JSON_CONFIG) + IndexBuilder.create() .tmpDir(tempDirProducer.newTempFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -303,7 +300,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .buildMMappedIndex(); final QueryableIndex indexAllTypesAuto = - IndexBuilder.create(DEFAULT_DERIVE_JSON_CONFIG) + IndexBuilder.create() .tmpDir(tempDirProducer.newTempFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -327,7 +324,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .buildMMappedIndex(); final IncrementalIndex indexAllTypesAutoRealtime = - IndexBuilder.create(DEFAULT_DERIVE_JSON_CONFIG) + IndexBuilder.create() .tmpDir(tempDirProducer.newTempFolder()) .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) .schema( @@ -585,12 +582,7 @@ public void testGroupByOnNestedColumn() .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setVirtualColumns( - new ExpressionVirtualColumn( - "v0", - "strlen(\"string\")", - ColumnType.LONG, - queryFramework().macroTable() - ) + new ExpressionVirtualColumn("v0", "strlen(\"string\")", ColumnType.LONG, queryFramework().macroTable()) ) .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) @@ -619,12 +611,7 @@ public void testGroupByOnNestedColumnWithOrderBy() .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setVirtualColumns( - new ExpressionVirtualColumn( - "v0", - "strlen(\"string\")", - ColumnType.LONG, - queryFramework().macroTable() - ) + new ExpressionVirtualColumn("v0", "strlen(\"string\")", ColumnType.LONG, queryFramework().macroTable()) ) .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) @@ -1176,12 +1163,7 @@ public void testJsonValueArrays() new NestedFieldVirtualColumn("arrayNestedLong", "$[0]", "v3", ColumnType.LONG_ARRAY) ) .columns("v0", "v1", "v2", "v3") - .columnTypes( - ColumnType.STRING_ARRAY, - ColumnType.LONG_ARRAY, - ColumnType.DOUBLE_ARRAY, - ColumnType.LONG_ARRAY - ) + .columnTypes(ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY, ColumnType.DOUBLE_ARRAY, ColumnType.LONG_ARRAY) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .build() ) @@ -1687,23 +1669,23 @@ public void testGroupByRootSingleTypeArrayLongNullsUnnest() .queryContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) .expectedQuery( GroupByQuery.builder() - .setDataSource( - UnnestDataSource.create( - TableDataSource.create(DATA_SOURCE_ARRAYS), - expressionVirtualColumn("j0.unnest", "\"arrayLongNulls\"", ColumnType.LONG_ARRAY), - null - ) - ) - .setInterval(querySegmentSpec(Filtration.eternity())) - .setGranularity(Granularities.ALL) - .setDimensions( - dimensions( - new DefaultDimensionSpec("j0.unnest", "d0", ColumnType.LONG) - ) - ) - .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) - .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) - .build() + .setDataSource( + UnnestDataSource.create( + TableDataSource.create(DATA_SOURCE_ARRAYS), + expressionVirtualColumn("j0.unnest", "\"arrayLongNulls\"", ColumnType.LONG_ARRAY), + null + ) + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("j0.unnest", "d0", ColumnType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .build() ) .expectedResults( ImmutableList.of( @@ -2771,7 +2753,7 @@ public void testJsonAndArrayAgg() ImmutableList.of( new Object[]{ "aaa", - "[{\"mixed\":1,\"mixed2\":\"1\",\"x\":100,\"y\":2.02,\"z\":\"300\"},{\"mixed2\":1.1,\"x\":100,\"y\":2.02,\"z\":\"400\"}]", + "[{\"x\":100,\"y\":2.02,\"z\":\"300\",\"mixed\":1,\"mixed2\":\"1\"},{\"x\":100,\"y\":2.02,\"z\":\"400\",\"mixed2\":1.1}]", 2L }, new Object[]{ @@ -2781,7 +2763,7 @@ public void testJsonAndArrayAgg() }, new Object[]{ "ccc", - "[{\"mixed\":1.1,\"mixed2\":1,\"x\":200,\"y\":3.03,\"z\":\"abcdef\"}]", + "[{\"x\":200,\"y\":3.03,\"z\":\"abcdef\",\"mixed\":1.1,\"mixed2\":1}]", 1L }, new Object[]{ @@ -4664,8 +4646,8 @@ public void testGroupByRootKeys2() ), ImmutableList.of( new Object[]{null, 4L}, - new Object[]{"[\"mixed\",\"mixed2\",\"x\",\"y\",\"z\"]", 2L}, - new Object[]{"[\"mixed2\",\"x\",\"y\",\"z\"]", 1L} + new Object[]{"[\"x\",\"y\",\"z\",\"mixed\",\"mixed2\"]", 2L}, + new Object[]{"[\"x\",\"y\",\"z\",\"mixed2\"]", 1L} ), RowSignature.builder() .add("EXPR$0", ColumnType.STRING_ARRAY) @@ -4930,7 +4912,9 @@ public void testJsonMerging() "nest", "v1", ColumnType.STRING, - ImmutableList.of(new NestedPathField("x")), + ImmutableList.of( + new NestedPathField("x") + ), false, null, false @@ -5036,12 +5020,7 @@ public void testToJsonAndParseJson() ) ) .columns("string", "v0", "v1", "v2") - .columnTypes( - ColumnType.STRING, - ColumnType.NESTED_DATA, - ColumnType.NESTED_DATA, - ColumnType.NESTED_DATA - ) + .columnTypes(ColumnType.STRING, ColumnType.NESTED_DATA, ColumnType.NESTED_DATA, ColumnType.NESTED_DATA) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .build() ), @@ -5971,53 +5950,44 @@ public void testScanAllTypesAuto() .dataSource(DATA_SOURCE_ALL) .intervals(querySegmentSpec(Filtration.eternity())) .columns( - "__time", "str", "long", "double", "bool", - "variant", "variantNumeric", "variantEmptyObj", "variantEmtpyArray", "variantWithArrays", - "obj", "complexObj", "arrayString", "arrayStringNulls", "arrayLong", - "arrayLongNulls", "arrayDouble", "arrayDoubleNulls", "arrayVariant", "arrayBool", - "arrayNestedLong", "arrayObject", "null", "cstr", "clong", - "cdouble", "cObj", "cstringArray", "cLongArray", "cDoubleArray", - "cEmptyArray", "cEmptyObj", "cNullArray", "cEmptyObjectArray", "cObjectArray", + "__time", + "str", + "long", + "double", + "bool", + "variant", + "variantNumeric", + "variantEmptyObj", + "variantEmtpyArray", + "variantWithArrays", + "obj", + "complexObj", + "arrayString", + "arrayStringNulls", + "arrayLong", + "arrayLongNulls", + "arrayDouble", + "arrayDoubleNulls", + "arrayVariant", + "arrayBool", + "arrayNestedLong", + "arrayObject", + "null", + "cstr", + "clong", + "cdouble", + "cObj", + "cstringArray", + "cLongArray", + "cDoubleArray", + "cEmptyArray", + "cEmptyObj", + "cNullArray", + "cEmptyObjectArray", + "cObjectArray", "cnt" ) - .columnTypes( - ColumnType.LONG, - ColumnType.STRING, - ColumnType.LONG, - ColumnType.DOUBLE, - ColumnType.LONG, - ColumnType.STRING, - ColumnType.DOUBLE, - ColumnType.ofComplex("json"), - ColumnType.LONG_ARRAY, - ColumnType.STRING_ARRAY, - ColumnType.ofComplex("json"), - ColumnType.ofComplex("json"), - ColumnType.STRING_ARRAY, - ColumnType.STRING_ARRAY, - ColumnType.LONG_ARRAY, - ColumnType.LONG_ARRAY, - ColumnType.DOUBLE_ARRAY, - ColumnType.DOUBLE_ARRAY, - ColumnType.STRING_ARRAY, - ColumnType.LONG_ARRAY, - ColumnType.ofComplex("json"), - ColumnType.ofComplex("json"), - ColumnType.STRING, - ColumnType.STRING, - ColumnType.LONG, - ColumnType.DOUBLE, - ColumnType.ofComplex("json"), - ColumnType.STRING_ARRAY, - ColumnType.LONG_ARRAY, - ColumnType.DOUBLE_ARRAY, - ColumnType.LONG_ARRAY, - ColumnType.ofComplex("json"), - ColumnType.LONG_ARRAY, - ColumnType.ofComplex("json"), - ColumnType.ofComplex("json"), - ColumnType.LONG - ) + .columnTypes(ColumnType.LONG, ColumnType.STRING, ColumnType.LONG, ColumnType.DOUBLE, ColumnType.LONG, ColumnType.STRING, ColumnType.DOUBLE, ColumnType.ofComplex("json"), ColumnType.LONG_ARRAY, ColumnType.STRING_ARRAY, ColumnType.ofComplex("json"), ColumnType.ofComplex("json"), ColumnType.STRING_ARRAY, ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY, ColumnType.LONG_ARRAY, ColumnType.DOUBLE_ARRAY, ColumnType.DOUBLE_ARRAY, ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY, ColumnType.ofComplex("json"), ColumnType.ofComplex("json"), ColumnType.STRING, ColumnType.STRING, ColumnType.LONG, ColumnType.DOUBLE, ColumnType.ofComplex("json"), ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY, ColumnType.DOUBLE_ARRAY, ColumnType.LONG_ARRAY, ColumnType.ofComplex("json"), ColumnType.LONG_ARRAY, ColumnType.ofComplex("json"), ColumnType.ofComplex("json"), ColumnType.LONG) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .build() ), @@ -6033,8 +6003,8 @@ public void testScanAllTypesAuto() "1", "[]", "[51,-35]", - "{\"a\":700,\"b\":{\"x\":\"g\",\"y\":1.1,\"z\":[9,null,9,9]},\"v\":[]}", - "{\"x\":400,\"y\":[{\"l\":[null],\"m\":100,\"n\":5},{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1}]}", + "{\"a\":700,\"b\":{\"x\":\"g\",\"y\":1.1,\"z\":[9,null,9,9]},\"c\":null,\"v\":[]}", + "{\"x\":400,\"y\":[{\"l\":[null],\"m\":100,\"n\":5},{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1}],\"z\":{}}", null, "[\"a\",\"b\"]", null, @@ -6056,7 +6026,7 @@ public void testScanAllTypesAuto() "[]", "{}", "[null,null]", - "{}", + "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L }, @@ -6072,7 +6042,7 @@ public void testScanAllTypesAuto() "2", "b", "{\"a\":200,\"b\":{\"x\":\"b\",\"y\":1.1,\"z\":[2,4,6]},\"c\":[\"a\",\"b\"],\"v\":[]}", - "{\"x\":10,\"y\":[{\"l\":[\"b\",\"b\",\"c\"],\"m\":\"b\",\"n\":2},[1,2,3]],\"z\":{\"a\":[5.5],\"b\":0}}", + "{\"x\":10,\"y\":[{\"l\":[\"b\",\"b\",\"c\"],\"m\":\"b\",\"n\":2},[1,2,3]],\"z\":{\"a\":[5.5],\"b\":false}}", "[\"a\",\"b\",\"c\"]", "[null,\"b\"]", "[2,3]", @@ -6094,7 +6064,7 @@ public void testScanAllTypesAuto() "[]", "{}", "[null,null]", - "{}", + "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L }, @@ -6110,7 +6080,7 @@ public void testScanAllTypesAuto() "1", "1", "{\"a\":100,\"b\":{\"x\":\"a\",\"y\":1.1,\"z\":[1,2,3,4]},\"c\":[100],\"v\":[]}", - "{\"x\":1234,\"y\":[{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1},{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1}],\"z\":{\"a\":[1.1,2.2,3.3],\"b\":1}}", + "{\"x\":1234,\"y\":[{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1},{\"l\":[\"a\",\"b\",\"c\"],\"m\":\"a\",\"n\":1}],\"z\":{\"a\":[1.1,2.2,3.3],\"b\":true}}", "[\"a\",\"b\"]", "[\"a\",\"b\"]", "[1,2,3]", @@ -6132,7 +6102,7 @@ public void testScanAllTypesAuto() "[]", "{}", "[null,null]", - "{}", + "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L }, @@ -6148,7 +6118,7 @@ public void testScanAllTypesAuto() "4", "1", "{\"a\":400,\"b\":{\"x\":\"d\",\"y\":1.1,\"z\":[3,4]},\"c\":{\"a\":1},\"v\":[]}", - "{\"x\":1234,\"z\":{\"a\":[1.1,2.2,3.3],\"b\":1}}", + "{\"x\":1234,\"z\":{\"a\":[1.1,2.2,3.3],\"b\":true}}", "[\"d\",\"e\"]", "[\"b\",\"b\"]", "[1,4]", @@ -6158,7 +6128,7 @@ public void testScanAllTypesAuto() "[\"a\",\"b\",\"c\"]", "[null,0,1]", "[[1,2],[3,4],[5,6,7]]", - "[null,{\"x\":2}]", + "[{\"x\":null},{\"x\":2}]", null, "hello", 1234L, @@ -6170,7 +6140,7 @@ public void testScanAllTypesAuto() "[]", "{}", "[null,null]", - "{}", + "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L }, @@ -6186,7 +6156,7 @@ public void testScanAllTypesAuto() "[]", "hello", "{\"a\":500,\"b\":{\"x\":\"e\",\"z\":[1,2,3,4]},\"c\":\"hello\",\"v\":\"a\"}", - "{\"x\":11,\"y\":[],\"z\":{\"a\":[null],\"b\":0}}", + "{\"x\":11,\"y\":[],\"z\":{\"a\":[null],\"b\":false}}", null, null, "[1,2,3]", @@ -6208,7 +6178,7 @@ public void testScanAllTypesAuto() "[]", "{}", "[null,null]", - "{}", + "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L }, @@ -6246,7 +6216,7 @@ public void testScanAllTypesAuto() "[]", "{}", "[null,null]", - "{}", + "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L }, @@ -6262,7 +6232,7 @@ public void testScanAllTypesAuto() "3", "3.0", "{\"a\":300}", - "{\"x\":4.4,\"y\":[{\"l\":[],\"m\":100,\"n\":3},{\"l\":[\"a\"]},{\"l\":[\"b\"],\"n\":[]}],\"z\":{\"a\":[],\"b\":1}}", + "{\"x\":4.4,\"y\":[{\"l\":[],\"m\":100,\"n\":3},{\"l\":[\"a\"]},{\"l\":[\"b\"],\"n\":[]}],\"z\":{\"a\":[],\"b\":true}}", "[\"b\",\"c\"]", "[\"d\",null,\"b\"]", "[1,2,3,4]", @@ -6284,7 +6254,7 @@ public void testScanAllTypesAuto() "[]", "{}", "[null,null]", - "{}", + "[{},{},{}]", "[{\"a\":\"b\",\"x\":1,\"y\":1.3}]", 1L } @@ -6347,9 +6317,9 @@ public void testFilterJsonIsNotNull() .build() ), ImmutableList.of( - new Object[]{"{\"mixed\":1,\"mixed2\":\"1\",\"x\":100,\"y\":2.02,\"z\":\"300\"}"}, - new Object[]{"{\"mixed\":1.1,\"mixed2\":1,\"x\":200,\"y\":3.03,\"z\":\"abcdef\"}"}, - new Object[]{"{\"mixed2\":1.1,\"x\":100,\"y\":2.02,\"z\":\"400\"}"} + new Object[]{"{\"x\":100,\"y\":2.02,\"z\":\"300\",\"mixed\":1,\"mixed2\":\"1\"}"}, + new Object[]{"{\"x\":200,\"y\":3.03,\"z\":\"abcdef\",\"mixed\":1.1,\"mixed2\":1}"}, + new Object[]{"{\"x\":100,\"y\":2.02,\"z\":\"400\",\"mixed2\":1.1}"} ), RowSignature.builder() .add("nest", ColumnType.NESTED_DATA) @@ -6394,30 +6364,25 @@ public void testCoalesceOnNestedColumns() testBuilder() .sql( "select c,long,coalesce(c,long) as col " - + " from druid.all_auto, unnest(json_value(arrayNestedLong, '$[1]' returning bigint array)) as u(c) " + + " from druid.all_auto, unnest(json_value(arrayNestedLong, '$[1]' returning bigint array)) as u(c) " ) .expectedQueries( ImmutableList.of( Druids.newScanQueryBuilder() - .dataSource( - UnnestDataSource.create( - new TableDataSource(DATA_SOURCE_ALL), - new NestedFieldVirtualColumn( - "arrayNestedLong", - "$[1]", - "j0.unnest", - ColumnType.LONG_ARRAY - ), - null - ) - ) - .virtualColumns(expressionVirtualColumn("v0", "nvl(\"j0.unnest\",\"long\")", ColumnType.LONG)) - .intervals(querySegmentSpec(Filtration.eternity())) - .columns("j0.unnest", "long", "v0") - .columnTypes(ColumnType.LONG, ColumnType.LONG, ColumnType.LONG) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .context(QUERY_CONTEXT_DEFAULT) - .build() + .dataSource( + UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ALL), + new NestedFieldVirtualColumn("arrayNestedLong", "$[1]", "j0.unnest", ColumnType.LONG_ARRAY), + null + ) + ) + .virtualColumns(expressionVirtualColumn("v0", "nvl(\"j0.unnest\",\"long\")", ColumnType.LONG)) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("j0.unnest", "long", "v0") + .columnTypes(ColumnType.LONG, ColumnType.LONG, ColumnType.LONG) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .context(QUERY_CONTEXT_DEFAULT) + .build() ) ) .expectedResults( @@ -6434,10 +6399,10 @@ public void testCoalesceOnNestedColumns() ) .expectedSignature( RowSignature.builder() - .add("c", ColumnType.LONG) - .add("long", ColumnType.LONG) - .add("col", ColumnType.LONG) - .build() + .add("c", ColumnType.LONG) + .add("long", ColumnType.LONG) + .add("col", ColumnType.LONG) + .build() ) .run(); } @@ -6597,14 +6562,14 @@ public void testJsonQueryArrays() ImmutableList.of( new Object[]{"[{\"x\":1000},{\"y\":2000}]"}, new Object[]{"[{\"x\":1},{\"x\":2}]"}, - new Object[]{"[null,{\"x\":2}]"}, + new Object[]{"[{\"x\":null},{\"x\":2}]"}, new Object[]{"[{\"a\":1},{\"b\":2}]"}, new Object[]{"[{\"x\":1},{\"x\":2}]"}, new Object[]{"[null,{\"x\":2}]"}, new Object[]{"[{\"x\":3},{\"x\":4}]"}, new Object[]{"[{\"x\":1000},{\"y\":2000}]"}, new Object[]{"[{\"x\":1},{\"x\":2}]"}, - new Object[]{"[null,{\"x\":2}]"}, + new Object[]{"[{\"x\":null},{\"x\":2}]"}, new Object[]{"[{\"a\":1},{\"b\":2}]"}, new Object[]{"[{\"x\":1},{\"x\":2}]"}, new Object[]{"[null,{\"x\":2}]"}, @@ -6693,7 +6658,7 @@ public void testUnnestJsonQueryArrays() new Object[]{"{\"y\":2000}"}, new Object[]{"{\"x\":1}"}, new Object[]{"{\"x\":2}"}, - new Object[]{null}, + new Object[]{"{\"x\":null}"}, new Object[]{"{\"x\":2}"}, new Object[]{"{\"a\":1}"}, new Object[]{"{\"b\":2}"}, @@ -6707,7 +6672,7 @@ public void testUnnestJsonQueryArrays() new Object[]{"{\"y\":2000}"}, new Object[]{"{\"x\":1}"}, new Object[]{"{\"x\":2}"}, - new Object[]{null}, + new Object[]{"{\"x\":null}"}, new Object[]{"{\"x\":2}"}, new Object[]{"{\"a\":1}"}, new Object[]{"{\"b\":2}"}, From 5209d25e44285ee8f720659ee8de63d3498d751d Mon Sep 17 00:00:00 2001 From: cecemei Date: Thu, 2 Oct 2025 14:26:54 -0700 Subject: [PATCH 09/19] serializer --- ...ssedVariableSizedBlobColumnSerializer.java | 77 +++++++------------ ...ressedVariableSizedBlobColumnSupplier.java | 30 ++------ .../CompressedNestedDataComplexColumn.java | 22 +++--- .../nested/NestedDataColumnSerializer.java | 30 +++++--- .../NestedFieldDictionaryEncodedColumn.java | 18 +++++ .../CompressedComplexColumnSerializer.java | 5 +- .../CompressedVariableSizeBlobColumnTest.java | 53 ------------- .../resources/nested-array-test-data.json | 4 +- 8 files changed, 86 insertions(+), 153 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSerializer.java index fcc661a4bff7..6693daa4326a 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSerializer.java @@ -19,11 +19,9 @@ package org.apache.druid.segment.data; -import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter; import org.apache.druid.segment.CompressedPools; -import org.apache.druid.segment.nested.ObjectStorageEncoding; import org.apache.druid.segment.serde.MetaSerdeHelper; import org.apache.druid.segment.serde.Serializer; import org.apache.druid.segment.writeout.SegmentWriteOutMedium; @@ -42,7 +40,6 @@ public class CompressedVariableSizedBlobColumnSerializer implements Serializer private final String offsetsFile; private final String blobsFile; private final SegmentWriteOutMedium segmentWriteOutMedium; - private final ObjectStorageEncoding objectStorageEncoding; private final CompressionStrategy compression; private int numValues; @@ -54,7 +51,6 @@ public class CompressedVariableSizedBlobColumnSerializer implements Serializer public CompressedVariableSizedBlobColumnSerializer( final String filenameBase, final SegmentWriteOutMedium segmentWriteOutMedium, - final ObjectStorageEncoding objectStorageEncoding, final CompressionStrategy compression ) { @@ -62,7 +58,6 @@ public CompressedVariableSizedBlobColumnSerializer( this.offsetsFile = getCompressedOffsetsFileName(filenameBase); this.blobsFile = getCompressedBlobsFileName(filenameBase); this.segmentWriteOutMedium = segmentWriteOutMedium; - this.objectStorageEncoding = objectStorageEncoding; this.compression = compression; this.numValues = 0; } @@ -71,40 +66,28 @@ public void open() throws IOException { numValues = 0; currentOffset = 0; - if (ObjectStorageEncoding.SMILE.equals(objectStorageEncoding)) { - offsetsSerializer = new CompressedLongsSerializer( - segmentWriteOutMedium, - compression, - segmentWriteOutMedium.getCloser() - ); - offsetsSerializer.open(); - - valuesSerializer = new CompressedBlockSerializer( - segmentWriteOutMedium, - compression, - CompressedPools.BUFFER_SIZE, - segmentWriteOutMedium.getCloser() - ); - valuesSerializer.open(); - } else if (ObjectStorageEncoding.NONE.equals(objectStorageEncoding)) { - // skip skip serialize - } else { - throw DruidException.defensive("unreachable"); - } - + offsetsSerializer = new CompressedLongsSerializer( + segmentWriteOutMedium, + compression, + segmentWriteOutMedium.getCloser() + ); + offsetsSerializer.open(); + + valuesSerializer = new CompressedBlockSerializer( + segmentWriteOutMedium, + compression, + CompressedPools.BUFFER_SIZE, + segmentWriteOutMedium.getCloser() + ); + valuesSerializer.open(); } public void addValue(byte[] bytes) throws IOException { - if (ObjectStorageEncoding.SMILE.equals(objectStorageEncoding)) { - valuesSerializer.addValue(bytes); - currentOffset += bytes.length; - offsetsSerializer.add(currentOffset); - } else if (ObjectStorageEncoding.NONE.equals(objectStorageEncoding)) { - // skip serialize - } else { - throw DruidException.defensive("unreachable"); - } + valuesSerializer.addValue(bytes); + + currentOffset += bytes.length; + offsetsSerializer.add(currentOffset); numValues++; if (numValues < 0) { throw new ColumnCapacityExceededException(filenameBase); @@ -113,15 +96,9 @@ public void addValue(byte[] bytes) throws IOException public void addValue(ByteBuffer bytes) throws IOException { - if (ObjectStorageEncoding.SMILE.equals(objectStorageEncoding)) { - currentOffset += bytes.remaining(); - valuesSerializer.addValue(bytes); - offsetsSerializer.add(currentOffset); - } else if (ObjectStorageEncoding.NONE.equals(objectStorageEncoding)) { - // skip serialize - } else { - throw DruidException.defensive("unreachable"); - } + currentOffset += bytes.remaining(); + valuesSerializer.addValue(bytes); + offsetsSerializer.add(currentOffset); numValues++; if (numValues < 0) { throw new ColumnCapacityExceededException(filenameBase); @@ -139,13 +116,11 @@ public long getSerializedSize() public void writeTo(WritableByteChannel channel, FileSmoosher smoosher) throws IOException { META_SERDE_HELPER.writeTo(channel, this); - if (ObjectStorageEncoding.SMILE.equals(objectStorageEncoding)) { - try (SmooshedWriter sub = smoosher.addWithSmooshedWriter(offsetsFile, offsetsSerializer.getSerializedSize())) { - offsetsSerializer.writeTo(sub, smoosher); - } - try (SmooshedWriter sub = smoosher.addWithSmooshedWriter(blobsFile, valuesSerializer.getSerializedSize())) { - valuesSerializer.writeTo(sub, smoosher); - } + try (SmooshedWriter sub = smoosher.addWithSmooshedWriter(offsetsFile, offsetsSerializer.getSerializedSize())) { + offsetsSerializer.writeTo(sub, smoosher); + } + try (SmooshedWriter sub = smoosher.addWithSmooshedWriter(blobsFile, valuesSerializer.getSerializedSize())) { + valuesSerializer.writeTo(sub, smoosher); } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSupplier.java index 10b199716f43..17ef19f7f41f 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/data/CompressedVariableSizedBlobColumnSupplier.java @@ -22,7 +22,6 @@ import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; -import javax.annotation.Nullable; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -76,12 +75,12 @@ public static CompressedVariableSizedBlobColumnSupplier fromByteBuffer( private final int numElements; - @Nullable private final Supplier offsetReaderSupplier; - @Nullable private final Supplier blockDataReaderSupplier; + private final Supplier offsetReaderSupplier; + private final Supplier blockDataReaderSupplier; private CompressedVariableSizedBlobColumnSupplier( - @Nullable ByteBuffer offsetsBuffer, - @Nullable ByteBuffer dataBuffer, + ByteBuffer offsetsBuffer, + ByteBuffer dataBuffer, ByteOrder compressionOrder, ByteOrder valueOrder, int numElements, @@ -89,34 +88,17 @@ private CompressedVariableSizedBlobColumnSupplier( ) { this.numElements = numElements; - this.offsetReaderSupplier = offsetsBuffer == null - ? null - : CompressedLongsReader.fromByteBuffer(offsetsBuffer, compressionOrder); - this.blockDataReaderSupplier = dataBuffer == null - ? null - : CompressedBlockReader.fromByteBuffer( - dataBuffer, - compressionOrder, - valueOrder, - copyValuesOnRead - ); + this.offsetReaderSupplier = CompressedLongsReader.fromByteBuffer(offsetsBuffer, compressionOrder); + this.blockDataReaderSupplier = CompressedBlockReader.fromByteBuffer(dataBuffer, compressionOrder, valueOrder, copyValuesOnRead); } @Override public CompressedVariableSizedBlobColumn get() { - if (offsetReaderSupplier == null || blockDataReaderSupplier == null) { - return null; - } return new CompressedVariableSizedBlobColumn( numElements, offsetReaderSupplier.get(), blockDataReaderSupplier.get() ); } - - int getNumElements() - { - return numElements; - } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index ba307b5ae63a..480c4dbf572b 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -329,7 +329,7 @@ public Object getRowValue(int rowNum) compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); } - if (compressedRawColumn != null) { + if (compressedRawColumnSupplier != null) { final ByteBuffer valueBuffer = compressedRawColumn.get(rowNum); return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); } @@ -337,12 +337,16 @@ public Object getRowValue(int rowNum) ReadableOffset offset = new AtomicIntegerReadableOffset(new AtomicInteger(rowNum)); final List elements = getAllParsedFields().stream() - .map(pair -> StructuredDataBuilder.Element.of( - pair.rhs, - getColumnHolder(pair.lhs.fieldName, pair.lhs.fieldIndex).getColumn() - .makeColumnValueSelector(offset) - .getObject() - )) + .map(pair -> { + NestedFieldDictionaryEncodedColumn column = (NestedFieldDictionaryEncodedColumn) getColumnHolder( + pair.lhs.fieldName, + pair.lhs.fieldIndex + ).getColumn(); + return StructuredDataBuilder.Element.of( + pair.rhs, + column.lookupObject(rowNum) + ); + }) .collect(Collectors.toList()); return new StructuredDataBuilder(elements).build(); } @@ -386,7 +390,7 @@ public Object getObject() if (nullValues.get(offset.getOffset())) { return null; } - if (compressedRawColumn != null) { + if (compressedRawColumnSupplier != null) { final ByteBuffer valueBuffer = compressedRawColumn.get(offset.getOffset()); return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); } @@ -485,7 +489,7 @@ private Object getForOffset(int offset) // maybe someday can use bitmap batch operations for nulls? return null; } - if (compressedRawColumn != null) { + if (compressedRawColumnSupplier != null) { final ByteBuffer valueBuffer = compressedRawColumn.get(offset); return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); } else { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java index d29ff3367e70..bae670f05545 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java @@ -142,7 +142,7 @@ public ProcessedValue processArrayField( private FixedIndexedWriter longDictionaryWriter; private FixedIndexedWriter doubleDictionaryWriter; private FrontCodedIntArrayIndexedWriter arrayDictionaryWriter; - private CompressedVariableSizedBlobColumnSerializer rawWriter; + @Nullable private CompressedVariableSizedBlobColumnSerializer rawWriter; private ByteBufferWriter nullBitmapWriter; private MutableBitmap nullRowsBitmap; private Map> fieldWriters; @@ -243,13 +243,19 @@ public void openDictionaryWriter(File segmentBaseDir) throws IOException @Override public void open() throws IOException { - rawWriter = new CompressedVariableSizedBlobColumnSerializer( - ColumnSerializerUtils.getInternalFileName(name, RAW_FILE_NAME), - segmentWriteOutMedium, - columnFormatSpec.getObjectStorageEncoding(), - columnFormatSpec.getObjectStorageCompression() - ); - rawWriter.open(); + if (ObjectStorageEncoding.NONE.equals(columnFormatSpec.getObjectStorageEncoding())) { + rawWriter = null; + } else if (ObjectStorageEncoding.SMILE.equals(columnFormatSpec.getObjectStorageEncoding())) { + rawWriter = new CompressedVariableSizedBlobColumnSerializer( + ColumnSerializerUtils.getInternalFileName(name, RAW_FILE_NAME), + segmentWriteOutMedium, + columnFormatSpec.getObjectStorageCompression() + ); + rawWriter.open(); + } else { + throw DruidException.defensive("Unknown object storage encoding [%s]", columnFormatSpec.getObjectStorageEncoding() + ); + } nullBitmapWriter = new ByteBufferWriter<>( segmentWriteOutMedium, @@ -340,7 +346,9 @@ public void serialize(StructuredData data) throws IOException if (data == null) { nullRowsBitmap.add(rowCount); } - rawWriter.addValue(NestedDataComplexTypeSerde.INSTANCE.toBytes(data)); + if (rawWriter != null) { + rawWriter.addValue(NestedDataComplexTypeSerde.INSTANCE.toBytes(data)); + } if (data != null) { fieldProcessor.processFields(data.getValue()); } @@ -411,7 +419,9 @@ public void writeTo( writeInternal(smoosher, arrayDictionaryWriter, ColumnSerializerUtils.ARRAY_DICTIONARY_FILE_NAME); } } - writeInternal(smoosher, rawWriter, RAW_FILE_NAME); + if (rawWriter != null) { + writeInternal(smoosher, rawWriter, RAW_FILE_NAME); + } if (!nullRowsBitmap.isEmpty()) { writeInternal(smoosher, nullBitmapWriter, ColumnSerializerUtils.NULL_BITMAP_FILE_NAME); } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldDictionaryEncodedColumn.java index 11b21adbf9d2..261da5ddfc73 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldDictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldDictionaryEncodedColumn.java @@ -169,6 +169,23 @@ public String lookupName(int id) return null; } + public Object lookupObject(int id) + { + final int globalId = dictionary.get(id); + if (globalId < adjustArrayId) { + return lookupGlobalScalarObject(globalId); + } + int[] arr = globalArrayDictionary.get(globalId - adjustArrayId); + if (arr == null) { + return null; + } + final Object[] array = new Object[arr.length]; + for (int i = 0; i < arr.length; i++) { + array[i] = lookupGlobalScalarObject(arr[i]); + } + return array; + } + @Override public int lookupId(String name) { @@ -979,6 +996,7 @@ private void computeVectorsIfNeeded() @Nullable private PeekableIntIterator nullIterator = nullBitmap != null ? nullBitmap.peekableIterator() : null; private int offsetMark = -1; + @Override public double[] getDoubleVector() { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/CompressedComplexColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/serde/CompressedComplexColumnSerializer.java index 1ffae8f3aee1..321cf8bc8b08 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/CompressedComplexColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/CompressedComplexColumnSerializer.java @@ -22,7 +22,6 @@ import com.google.common.base.Preconditions; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.collections.bitmap.MutableBitmap; -import org.apache.druid.common.guava.GuavaUtils; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.GenericColumnSerializer; @@ -31,7 +30,6 @@ import org.apache.druid.segment.data.ByteBufferWriter; import org.apache.druid.segment.data.CompressedVariableSizedBlobColumnSerializer; import org.apache.druid.segment.data.ObjectStrategy; -import org.apache.druid.segment.nested.NestedCommonFormatColumnFormatSpec; import org.apache.druid.segment.writeout.SegmentWriteOutMedium; import java.io.ByteArrayOutputStream; @@ -87,8 +85,7 @@ public void open() throws IOException writer = new CompressedVariableSizedBlobColumnSerializer( ColumnSerializerUtils.getInternalFileName(name, FILE_NAME), segmentWriteOutMedium, - GuavaUtils.firstNonNull(indexSpec.getAutoColumnFormatSpec(), NestedCommonFormatColumnFormatSpec.DEFAULT) - .getObjectStorageEncoding(), indexSpec.getComplexMetricCompression() + indexSpec.getComplexMetricCompression() ); writer.open(); diff --git a/processing/src/test/java/org/apache/druid/segment/data/CompressedVariableSizeBlobColumnTest.java b/processing/src/test/java/org/apache/druid/segment/data/CompressedVariableSizeBlobColumnTest.java index 37a48b86f401..070440d203e1 100644 --- a/processing/src/test/java/org/apache/druid/segment/data/CompressedVariableSizeBlobColumnTest.java +++ b/processing/src/test/java/org/apache/druid/segment/data/CompressedVariableSizeBlobColumnTest.java @@ -23,7 +23,6 @@ import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter; import org.apache.druid.segment.CompressedPools; -import org.apache.druid.segment.nested.ObjectStorageEncoding; import org.apache.druid.segment.writeout.SegmentWriteOutMedium; import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; import org.junit.Assert; @@ -45,55 +44,6 @@ public class CompressedVariableSizeBlobColumnTest @Rule public final TemporaryFolder tempFolder = new TemporaryFolder(); - @Test - public void testNoneEncoding() throws IOException - { - // value sizes increase until they span at least 3 pages of compressed buffers - final File tmpFile = tempFolder.newFolder(); - final FileSmoosher smoosher = new FileSmoosher(tmpFile); - - final File tmpFile2 = tempFolder.newFolder(); - final SegmentWriteOutMedium writeOutMedium = - TmpFileSegmentWriteOutMediumFactory.instance().makeSegmentWriteOutMedium(tmpFile2); - - final String fileNameBase = "test"; - - final CompressionStrategy compressionStrategy = CompressionStrategy.LZ4; - CompressedVariableSizedBlobColumnSerializer serializer = new CompressedVariableSizedBlobColumnSerializer( - fileNameBase, - writeOutMedium, - ObjectStorageEncoding.NONE, - compressionStrategy - ); - serializer.open(); - - int numWritten = 0; - final Random r = ThreadLocalRandom.current(); - for (int i = 0; i < r.nextInt(10); i++) { - byte[] value = new byte[r.nextInt(5)]; - serializer.addValue(value); - numWritten++; - } - - SmooshedWriter writer = smoosher.addWithSmooshedWriter(fileNameBase, serializer.getSerializedSize()); - serializer.writeTo(writer, smoosher); - writer.close(); - smoosher.close(); - SmooshedFileMapper fileMapper = SmooshedFileMapper.load(tmpFile); - - ByteBuffer base = fileMapper.mapFile(fileNameBase); - CompressedVariableSizedBlobColumnSupplier supplier = CompressedVariableSizedBlobColumnSupplier.fromByteBuffer( - fileNameBase, - base, - ByteOrder.nativeOrder(), - ByteOrder.nativeOrder(), - fileMapper - ); - - Assert.assertEquals(numWritten, supplier.getNumElements()); - Assert.assertNull(supplier.get()); - } - @Test public void testSomeValues() throws IOException { @@ -111,7 +61,6 @@ public void testSomeValues() throws IOException CompressedVariableSizedBlobColumnSerializer serializer = new CompressedVariableSizedBlobColumnSerializer( fileNameBase, writeOutMedium, - ObjectStorageEncoding.SMILE, compressionStrategy ); serializer.open(); @@ -176,7 +125,6 @@ public void testSomeValuesByteBuffers() throws IOException CompressedVariableSizedBlobColumnSerializer serializer = new CompressedVariableSizedBlobColumnSerializer( fileNameBase, writeOutMedium, - ObjectStorageEncoding.SMILE, compressionStrategy ); serializer.open(); @@ -240,7 +188,6 @@ public void testSomeValuesByteBuffersBigEndian() throws IOException CompressedVariableSizedBlobColumnSerializer serializer = new CompressedVariableSizedBlobColumnSerializer( fileNameBase, writeOutMedium, - ObjectStorageEncoding.SMILE, compressionStrategy ); serializer.open(); diff --git a/processing/src/test/resources/nested-array-test-data.json b/processing/src/test/resources/nested-array-test-data.json index a7368aacb127..430fe165eac5 100644 --- a/processing/src/test/resources/nested-array-test-data.json +++ b/processing/src/test/resources/nested-array-test-data.json @@ -1,14 +1,14 @@ {"timestamp": "2023-01-01T00:00:00", "arrayString": ["a", "b"], "arrayStringNulls": ["a", "b"], "arrayLong":[1, 2, 3], "arrayLongNulls":[1, null,3], "arrayDouble":[1.1, 2.2, 3.3], "arrayDoubleNulls":[1.1, 2.2, null], "arrayVariant":["a", 1, 2.2], "arrayNoType":[], "arrayNestedLong":[[1, 2, null], [3, 4]], "arrayObject":[{"x": 1},{"x":2}]} {"timestamp": "2023-01-01T00:00:00", "arrayString": ["a", "b", "c"], "arrayStringNulls": [null, "b"], "arrayLong":[2, 3], "arrayDouble":[3.3, 4.4, 5.5], "arrayDoubleNulls":[999, null, 5.5], "arrayVariant":[null, null, 2.2], "arrayNoType":[null], "arrayNestedLong":[null, [null], []], "arrayObject":[{"x": 3},{"x":4}]} {"timestamp": "2023-01-01T00:00:00", "arrayString": ["b", "c"], "arrayStringNulls": ["d", null, "b"], "arrayLong":[1, 2, 3, 4], "arrayLongNulls":[1, 2, 3], "arrayDouble":[1.1, 3.3], "arrayDoubleNulls":[null, 2.2, null], "arrayVariant":[1, null, 1], "arrayNestedLong":[[1], null, [1, 2, 3]], "arrayObject":[null,{"x":2}]} -{"timestamp": "2023-01-01T00:00:00", "arrayString": ["d", "e"], "arrayStringNulls": ["b", "b"], "arrayLong":[1, 4], "arrayLongNulls":[1], "arrayDouble":[2.2, 3.3, 4.0], "arrayVariant":["a", "b", "c"], "arrayNoType":[], "arrayNestedLong":[[1, 2], [3, 4], [5, 6, 7]], "arrayObject":[null,{"x":2}]} +{"timestamp": "2023-01-01T00:00:00", "arrayString": ["d", "e"], "arrayStringNulls": ["b", "b"], "arrayLong":[1, 4], "arrayLongNulls":[1], "arrayDouble":[2.2, 3.3, 4.0], "arrayVariant":["a", "b", "c"], "arrayNoType":[], "arrayNestedLong":[[1, 2], [3, 4], [5, 6, 7]], "arrayObject":[{"x": null},{"x":2}]} {"timestamp": "2023-01-01T00:00:00", "arrayString": null, "arrayLong":[1, 2, 3], "arrayLongNulls":[], "arrayDouble":[1.1, 2.2, 3.3], "arrayDoubleNulls":null, "arrayNoType":[], "arrayObject":[{"x": 1000},{"y":2000}]} {"timestamp": "2023-01-01T00:00:00", "arrayString": ["a", "b"], "arrayStringNulls": null, "arrayLongNulls":[null, 2, 9], "arrayDouble":null, "arrayDoubleNulls":[999, 5.5, null], "arrayVariant":["a", 1, 2.2], "arrayNoType":[null, null], "arrayNestedLong":[[1], [1, 2, null]], "arrayObject":[{"a": 1},{"b":2}]} {"timestamp": "2023-01-01T00:00:00", "arrayStringNulls": ["a", "b"], "arrayLong":null, "arrayLongNulls":[2, 3], "arrayDoubleNulls":[null], "arrayVariant":null, "arrayNoType":[], "arrayNestedLong":null, "arrayObject":[{"x": 1},{"x":2}]} {"timestamp": "2023-01-02T00:00:00", "arrayString": ["a", "b"], "arrayStringNulls": [], "arrayLong":[1, 2, 3], "arrayLongNulls":[1, null,3], "arrayDouble":[1.1, 2.2, 3.3], "arrayDoubleNulls":[1.1, 2.2, null], "arrayVariant":["a", 1, 2.2], "arrayNoType":[], "arrayNestedLong":[[2, 3], [1, 5]], "arrayObject":[{"x": 1},{"x":2}]} {"timestamp": "2023-01-02T00:00:00", "arrayString": ["a", "b", "c"], "arrayStringNulls": [null, "b"], "arrayLong":[2, 3], "arrayDouble":[3.3, 4.4, 5.5], "arrayDoubleNulls":[999, null, 5.5], "arrayVariant":[null, null, 2.2], "arrayNoType":[], "arrayNestedLong":[null], "arrayObject":[{"x": 3},{"x":4}]} {"timestamp": "2023-01-02T00:00:00", "arrayString": ["b", "c"], "arrayStringNulls": ["d", null, "b"], "arrayLong":[1, 2, 3, 4], "arrayLongNulls":[1, 2, 3], "arrayDouble":[1.1, 3.3], "arrayDoubleNulls":[null, 2.2, null], "arrayVariant":[1, null, 1], "arrayNoType":[null], "arrayNestedLong":[[1], null, [1]], "arrayObject":[null,{"x":2}]} -{"timestamp": "2023-01-02T00:00:00", "arrayString": ["d", "e"], "arrayStringNulls": ["b", "b"], "arrayLong":[1, 4], "arrayLongNulls":[null], "arrayDouble":[2.2, 3.3, 4.0], "arrayVariant":["a", "b", "c"], "arrayNoType":[], "arrayNestedLong":[[1, 2], [3, 4], [5, 6, 7]], "arrayObject":[null,{"x":2}]} +{"timestamp": "2023-01-02T00:00:00", "arrayString": ["d", "e"], "arrayStringNulls": ["b", "b"], "arrayLong":[1, 4], "arrayLongNulls":[null], "arrayDouble":[2.2, 3.3, 4.0], "arrayVariant":["a", "b", "c"], "arrayNoType":[], "arrayNestedLong":[[1, 2], [3, 4], [5, 6, 7]], "arrayObject":[{"x": null},{"x":2}]} {"timestamp": "2023-01-02T00:00:00", "arrayString": null, "arrayLong":[1, 2, 3], "arrayLongNulls":null, "arrayDouble":[1.1, 2.2, 3.3], "arrayDoubleNulls":[], "arrayNoType":[], "arrayObject":[{"x": 1000},{"y":2000}]} {"timestamp": "2023-01-02T00:00:00", "arrayString": ["a", "b"], "arrayStringNulls": [null], "arrayLongNulls":[null, 2, 9], "arrayDouble":null, "arrayDoubleNulls":[999, 5.5, null], "arrayVariant":["a", 1, 2.2], "arrayNoType":null, "arrayNestedLong":[], "arrayObject":[{"a": 1},{"b":2}]} {"timestamp": "2023-01-02T00:00:00", "arrayStringNulls": ["a", "b"], "arrayLong":null, "arrayLongNulls":[2, 3], "arrayDoubleNulls":[null, 1.1], "arrayVariant":null, "arrayNoType":[], "arrayNestedLong":null, "arrayObject":[{"x": 1},{"x":2}]} \ No newline at end of file From 9e9384b1b0029ec6f8e726df50f06850b9be801f Mon Sep 17 00:00:00 2001 From: cecemei Date: Thu, 2 Oct 2025 15:33:56 -0700 Subject: [PATCH 10/19] supplier --- .../CompressedNestedDataComplexColumn.java | 36 +++++++++++-------- .../NestedCommonFormatColumnFormatSpec.java | 2 +- .../nested/NestedDataColumnSupplier.java | 18 ++++------ .../NestedCommonFormatColumnPartSerde.java | 2 +- .../nested/NestedDataColumnSupplierTest.java | 2 +- 5 files changed, 31 insertions(+), 29 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index 480c4dbf572b..ae713a661113 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -137,7 +137,6 @@ public abstract class CompressedNestedDataComplexColumn columns = new ConcurrentHashMap<>(); private CompressedVariableSizedBlobColumn compressedRawColumn; - private ArrayField arrayField; public CompressedNestedDataComplexColumn( String columnName, @@ -393,14 +392,14 @@ public Object getObject() if (compressedRawColumnSupplier != null) { final ByteBuffer valueBuffer = compressedRawColumn.get(offset.getOffset()); return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); + } else { + List elements = + Objects.requireNonNull(fieldSelectors) + .stream() + .map(c -> StructuredDataBuilder.Element.of(c.lhs, c.rhs.getObject())) + .collect(Collectors.toList()); + return new StructuredDataBuilder(elements).build(); } - List elements = - Objects.requireNonNull(fieldSelectors) - .stream() - .map(c -> StructuredDataBuilder.Element.of(c.lhs, c.rhs.getObject())) - .collect(Collectors.toList()); - return new StructuredDataBuilder(elements).build(); - } @Override @@ -1273,16 +1272,23 @@ private List>> getAllParsedFields() private Field getBaseOrArrayFieldFromPath(List path) { TKeyDictionary fields = fieldsSupplier.get(); - List arrayPath = (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) - ? path.subList(0, path.size() - 1) - : null; + List> parsed = new ArrayList<>(fields.size()); for (int i = 0; i < fields.size(); i++) { String field = StringUtils.fromUtf8(fields.get(i)); - List parsed = parsePath(field); - if (parsed.equals(path)) { + parsed.add(parsePath(field)); + if (parsed.get(i).equals(path)) { return new BaseField(field, i); - } else if (parsed.equals(arrayPath)) { - return new ArrayField(new BaseField(field, i), ((NestedPathArrayElement) path.get(path.size() - 1)).getIndex()); + } + } + if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { + List arrayPath = path.subList(0, path.size() - 1); + for (int i = 0; i < fields.size(); i++) { + if (parsed.get(i).equals(arrayPath)) { + return new ArrayField( + new BaseField(StringUtils.fromUtf8(fields.get(i)), i), + ((NestedPathArrayElement) path.get(path.size() - 1)).getIndex() + ); + } } } return null; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java index 343012e94c14..592e759b72ed 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java @@ -40,7 +40,7 @@ */ public class NestedCommonFormatColumnFormatSpec { - public static final NestedCommonFormatColumnFormatSpec DEFAULT = + private static final NestedCommonFormatColumnFormatSpec DEFAULT = NestedCommonFormatColumnFormatSpec.builder() .setObjectFieldsDictionaryEncoding(StringEncodingStrategy.UTF8_STRATEGY) .setObjectStorageEncoding(ObjectStorageEncoding.SMILE) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java index 630cf7db2e05..816c7fe5f140 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java @@ -53,7 +53,7 @@ public static NestedDataColumnSupplier read( ByteBuffer bb, ColumnBuilder columnBuilder, ColumnConfig columnConfig, - NestedCommonFormatColumnFormatSpec nestedCommonFormatColumnFormatSpec, + BitmapSerdeFactory bitmapSerdeFactory, ByteOrder byteOrder, NestedDataColumnSupplier parent ) @@ -129,13 +129,12 @@ public static NestedDataColumnSupplier read( ); } - final ByteBuffer rawBuffer = NestedCommonFormatColumnPartSerde.loadInternalFile( mapper, columnName, NestedCommonFormatColumnSerializer.RAW_FILE_NAME ); - compressedRawColumnSupplier = ObjectStorageEncoding.NONE.equals(nestedCommonFormatColumnFormatSpec.getObjectStorageEncoding()) + compressedRawColumnSupplier = rawBuffer == null ? null : CompressedVariableSizedBlobColumnSupplier.fromByteBuffer( ColumnSerializerUtils.getInternalFileName( @@ -154,13 +153,9 @@ public static NestedDataColumnSupplier read( columnName, ColumnSerializerUtils.NULL_BITMAP_FILE_NAME ); - nullValues = nestedCommonFormatColumnFormatSpec.getBitmapEncoding() - .getObjectStrategy() - .fromByteBufferWithSize(nullIndexBuffer); + nullValues = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(nullIndexBuffer); } else { - nullValues = nestedCommonFormatColumnFormatSpec.getBitmapEncoding() - .getBitmapFactory() - .makeEmptyImmutableBitmap(); + nullValues = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap(); } return new NestedDataColumnSupplier( @@ -175,7 +170,7 @@ public static NestedDataColumnSupplier read( arrayDictionarySupplier, columnConfig, mapper, - nestedCommonFormatColumnFormatSpec.getBitmapEncoding(), + bitmapSerdeFactory, byteOrder, logicalType ); @@ -191,7 +186,8 @@ public static NestedDataColumnSupplier read( private final String columnName; private final Supplier> fieldSupplier; private final FieldTypeInfo fieldInfo; - @Nullable private final CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier; + @Nullable + private final CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier; private final ImmutableBitmap nullValues; private final Supplier> stringDictionarySupplier; private final Supplier> longDictionarySupplier; diff --git a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java index ee74706e7904..fa74a2d98c32 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java @@ -346,7 +346,7 @@ public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnCo buffer, builder, columnConfig, - formatSpec, + formatSpec != null ? formatSpec.getBitmapEncoding() : bitmapSerdeFactory, byteOrder, parent == null ? null : (NestedDataColumnSupplier) parent.getColumnSupplier() ); diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java index 5f4beb266081..39109f5ed049 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java @@ -349,7 +349,7 @@ public void testConcurrency() throws ExecutionException, InterruptedException baseBuffer, bob, ColumnConfig.SELECTION_SIZE, - columnFormatSpec, + bitmapSerdeFactory, ByteOrder.nativeOrder(), null ); From 4b0fbd0b3f63a93fd85cca2bd7d21495ed8df070 Mon Sep 17 00:00:00 2001 From: cecemei Date: Thu, 2 Oct 2025 16:36:06 -0700 Subject: [PATCH 11/19] value-provider --- .../CompressedNestedDataComplexColumn.java | 152 +++++++++--------- 1 file changed, 76 insertions(+), 76 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index ae713a661113..4ba00ea1b06d 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -88,7 +88,6 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; @@ -184,7 +183,7 @@ public CompressedNestedDataComplexColumn( public SortedMap getFieldTypeInfo() { SortedMap fieldMap = new TreeMap<>(); - for (BaseField field : getAllFields()) { + for (BaseField field : getAllBaseFields()) { FieldTypeInfo.TypeSet types = fieldInfo.getTypes(field.fieldIndex); fieldMap.put(field.fieldName, new FieldTypeInfo.MutableTypeSet(types.getByteValue())); } @@ -200,7 +199,7 @@ public ColumnType getLogicalType() @Override public List> getNestedFields() { - return ImmutableList.copyOf(getAllParsedFields().stream().map(pair -> pair.rhs).collect(Collectors.toList())); + return getAllParsedBaseFields().stream().map(pair -> pair.rhs).collect(Collectors.toList()); } public TStringDictionary getUtf8BytesDictionary() @@ -333,27 +332,23 @@ public Object getRowValue(int rowNum) return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); } - ReadableOffset offset = new AtomicIntegerReadableOffset(new AtomicInteger(rowNum)); - final List elements = - getAllParsedFields().stream() - .map(pair -> { - NestedFieldDictionaryEncodedColumn column = (NestedFieldDictionaryEncodedColumn) getColumnHolder( - pair.lhs.fieldName, - pair.lhs.fieldIndex - ).getColumn(); - return StructuredDataBuilder.Element.of( - pair.rhs, - column.lookupObject(rowNum) - ); - }) - .collect(Collectors.toList()); + final List elements = getAllParsedBaseFields() + .stream() + .map(pair -> { + NestedFieldDictionaryEncodedColumn column = (NestedFieldDictionaryEncodedColumn) getColumnHolder( + pair.lhs.fieldName, + pair.lhs.fieldIndex + ).getColumn(); + return StructuredDataBuilder.Element.of(pair.rhs, column.lookupObject(rowNum)); + }) + .collect(Collectors.toList()); return new StructuredDataBuilder(elements).build(); } @Override public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) { - List allFields = getAllFields(); + List allFields = getAllBaseFields(); if (!logicalType.equals(ColumnType.NESTED_DATA) && allFields.size() == 1 && rootFieldPath.equals(Iterables.getOnlyElement(allFields).fieldName)) { @@ -363,23 +358,34 @@ public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) offset ); } - if (compressedRawColumn == null && compressedRawColumnSupplier != null) { - compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); + final Supplier valueProvider; + if (compressedRawColumnSupplier != null) { + if (compressedRawColumn == null) { + compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); + } + valueProvider = () -> { + final ByteBuffer valueBuffer = compressedRawColumn.get(offset.getOffset()); + return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); + }; + } else { + List, ColumnValueSelector>> fieldSelectors = + getAllParsedBaseFields().stream() + .map(pair -> Pair.of( + pair.rhs, + ((DictionaryEncodedColumn) getColumnHolder( + pair.lhs.fieldName, + pair.lhs.fieldIndex + ).getColumn()).makeColumnValueSelector(offset) + )) + .collect(Collectors.toList()); + valueProvider = () -> { + List elements = fieldSelectors + .stream() + .map(c -> StructuredDataBuilder.Element.of(c.lhs, c.rhs.getObject())) + .collect(Collectors.toList()); + return new StructuredDataBuilder(elements).build(); + }; } - final List, ColumnValueSelector>> fieldSelectors = - compressedRawColumnSupplier != null - ? null - : getAllParsedFields().stream() - .map(pair -> Pair.of( - pair.rhs, - ((DictionaryEncodedColumn) getColumnHolder( - pair.lhs.fieldName, - pair.lhs.fieldIndex - ).getColumn()).makeColumnValueSelector(offset) - )) - .collect(Collectors.toList()); - - return new ObjectColumnSelector() { @Nullable @@ -389,17 +395,7 @@ public Object getObject() if (nullValues.get(offset.getOffset())) { return null; } - if (compressedRawColumnSupplier != null) { - final ByteBuffer valueBuffer = compressedRawColumn.get(offset.getOffset()); - return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); - } else { - List elements = - Objects.requireNonNull(fieldSelectors) - .stream() - .map(c -> StructuredDataBuilder.Element.of(c.lhs, c.rhs.getObject())) - .collect(Collectors.toList()); - return new StructuredDataBuilder(elements).build(); - } + return valueProvider.get(); } @Override @@ -419,7 +415,7 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) { - List>> allFields = getAllParsedFields(); + List>> allFields = getAllParsedBaseFields(); if (!logicalType.equals(ColumnType.NESTED_DATA) && allFields.size() == 1 && rootFieldPath.equals(Iterables.getOnlyElement(allFields).lhs.fieldName)) { @@ -430,22 +426,36 @@ public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset ); } - if (compressedRawColumn == null && compressedRawColumnSupplier != null) { - compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); + AtomicInteger atomicOffset = new AtomicInteger(-1); + final Supplier valueProvider; + if (compressedRawColumnSupplier != null) { + if (compressedRawColumn == null) { + compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); + } + valueProvider = () -> { + final ByteBuffer valueBuffer = compressedRawColumn.get(atomicOffset.get()); + return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); + }; + } else { + AtomicIntegerReadableOffset readableAtomicOffset = new AtomicIntegerReadableOffset(atomicOffset); + final List, ColumnValueSelector>> fieldSelectors = + allFields.stream() + .map(pair -> Pair.of( + pair.rhs, + ((DictionaryEncodedColumn) getColumnHolder( + pair.lhs.fieldName, + pair.lhs.fieldIndex + ).getColumn()).makeColumnValueSelector(readableAtomicOffset) + )) + .collect(Collectors.toList()); + valueProvider = () -> { + List elements = fieldSelectors + .stream() + .map(c -> StructuredDataBuilder.Element.of(c.lhs, c.rhs.getObject())) + .collect(Collectors.toList()); + return new StructuredDataBuilder(elements).build(); + }; } - AtomicInteger rowNumber = new AtomicInteger(-1); - AtomicIntegerReadableOffset atomicOffset = new AtomicIntegerReadableOffset(rowNumber); - final List, ColumnValueSelector>> fieldSelectors = - compressedRawColumnSupplier != null ? null : - allFields.stream() - .map(pair -> Pair.of( - pair.rhs, - ((DictionaryEncodedColumn) Objects.requireNonNull(getColumnHolder( - pair.lhs.fieldName, - pair.lhs.fieldIndex - )).getColumn()).makeColumnValueSelector(atomicOffset) - )) - .collect(Collectors.toList()); return new VectorObjectSelector() { @@ -488,17 +498,8 @@ private Object getForOffset(int offset) // maybe someday can use bitmap batch operations for nulls? return null; } - if (compressedRawColumnSupplier != null) { - final ByteBuffer valueBuffer = compressedRawColumn.get(offset); - return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); - } else { - rowNumber.set(offset); - List elements = fieldSelectors - .stream() - .map(c -> StructuredDataBuilder.Element.of(c.lhs, c.rhs.getObject())) - .collect(Collectors.toList()); - return new StructuredDataBuilder(elements).build(); - } + atomicOffset.set(offset); + return valueProvider.get(); } @Override @@ -518,7 +519,7 @@ public int getMaxVectorSize() @Override public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset offset) { - List allFields = getAllFields(); + List allFields = getAllBaseFields(); if (!logicalType.equals(ColumnType.NESTED_DATA) && allFields.size() == 1 && rootFieldPath.equals(Iterables.getOnlyElement(allFields).fieldName)) { @@ -1005,7 +1006,6 @@ public ColumnType getFieldLogicalType(List path) } return null; - } @Nullable @@ -1246,7 +1246,7 @@ public int compare(Object o1, Object o2) } } - private List getAllFields() + private List getAllBaseFields() { TKeyDictionary fields = fieldsSupplier.get(); List allFields = new ArrayList<>(fields.size()); @@ -1257,7 +1257,7 @@ private List getAllFields() return allFields; } - private List>> getAllParsedFields() + private List>> getAllParsedBaseFields() { TKeyDictionary fields = fieldsSupplier.get(); List>> allFields = new ArrayList<>(fields.size()); From 5d03ac8c347f0e751c598a527216228e4afa2d18 Mon Sep 17 00:00:00 2001 From: cecemei Date: Tue, 7 Oct 2025 14:22:06 -0700 Subject: [PATCH 12/19] test --- .../druid/query/NestedDataTestUtils.java | 54 ++++---- .../query/scan/NestedDataScanQueryTest.java | 32 ++++- .../calcite/CalciteNestedDataQueryTest.java | 115 +++++++++++------- 3 files changed, 121 insertions(+), 80 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java index d41a51f2ea8d..63280ac9eb10 100644 --- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java +++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java @@ -27,6 +27,7 @@ import org.apache.druid.data.input.InputSource; import org.apache.druid.data.input.ResourceInputSource; import org.apache.druid.data.input.impl.DelimitedInputFormat; +import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.LocalInputSource; import org.apache.druid.data.input.impl.TimestampSpec; @@ -39,6 +40,7 @@ import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.segment.AutoTypeColumnSchema; +import org.apache.druid.segment.DefaultColumnFormatConfig; import org.apache.druid.segment.IncrementalIndexSegment; import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.IndexSpec; @@ -49,6 +51,7 @@ import org.apache.druid.segment.TestIndex; import org.apache.druid.segment.column.StringEncodingStrategy; import org.apache.druid.segment.incremental.IncrementalIndexSchema; +import org.apache.druid.segment.nested.NestedCommonFormatColumnFormatSpec; import org.apache.druid.segment.transform.ExpressionTransform; import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.timeline.SegmentId; @@ -66,6 +69,7 @@ import java.util.Collections; import java.util.List; import java.util.function.BiFunction; +import java.util.stream.Collectors; public class NestedDataTestUtils { @@ -91,34 +95,15 @@ public class NestedDataTestUtils DimensionsSpec.builder() .useSchemaDiscovery(true) .build(); + private static final List COLUMN_NAMES = Arrays.asList( + "dim", + "nest_json", + "nester_json", + "variant_json", + "list_json", + "nonexistent" + ); - public static final DimensionsSpec TSV_SCHEMA = - DimensionsSpec.builder() - .setDimensions( - Arrays.asList( - AutoTypeColumnSchema.of("dim"), - AutoTypeColumnSchema.of("nest_json"), - AutoTypeColumnSchema.of("nester_json"), - AutoTypeColumnSchema.of("variant_json"), - AutoTypeColumnSchema.of("list_json"), - AutoTypeColumnSchema.of("nonexistent") - ) - ) - .build(); - - public static final DimensionsSpec TSV_NESTED_SCHEMA = - DimensionsSpec.builder() - .setDimensions( - Arrays.asList( - new NestedDataColumnSchema("dim", 5), - new NestedDataColumnSchema("nest_json", 5), - new NestedDataColumnSchema("nester_json", 5), - new NestedDataColumnSchema("variant_json", 5), - new NestedDataColumnSchema("list_json", 5), - new NestedDataColumnSchema("nonexistent", 5) - ) - ) - .build(); public static final InputRowSchema AUTO_SCHEMA = new InputRowSchema( TIMESTAMP_SPEC, AUTO_DISCOVERY, @@ -163,30 +148,41 @@ public class NestedDataTestUtils public static List createSimpleSegmentsTsv( TemporaryFolder tempFolder, + NestedCommonFormatColumnFormatSpec spec, Closer closer ) throws Exception { + List dimensionsSpecs = + COLUMN_NAMES.stream() + .map(name -> (DimensionSchema) new AutoTypeColumnSchema(name, null, spec)) + .collect(Collectors.toList()); return createSimpleNestedTestDataTsvSegments( tempFolder, closer, Granularities.NONE, - TSV_SCHEMA, + DimensionsSpec.builder().setDimensions(dimensionsSpecs).build(), true ); } public static List createSimpleSegmentsTsvNested( TemporaryFolder tempFolder, + NestedCommonFormatColumnFormatSpec spec, Closer closer ) throws Exception { + DefaultColumnFormatConfig config = new DefaultColumnFormatConfig(null, null, null); + List dimensionsSpecs = + COLUMN_NAMES.stream() + .map(name -> (DimensionSchema) new NestedDataColumnSchema(name, 5, spec, config)) + .collect(Collectors.toList()); return createSimpleNestedTestDataTsvSegments( tempFolder, closer, Granularities.NONE, - TSV_NESTED_SCHEMA, + DimensionsSpec.builder().setDimensions(dimensionsSpecs).build(), true ); } diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index e297cedc425b..645b4f2840a0 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -22,6 +22,9 @@ import com.fasterxml.jackson.databind.Module; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import junitparams.JUnitParamsRunner; +import junitparams.Parameters; +import junitparams.naming.TestCaseName; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.guice.BuiltInTypesModule; import org.apache.druid.java.util.common.Intervals; @@ -46,6 +49,8 @@ import org.apache.druid.segment.Segment; import org.apache.druid.segment.TestIndex; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.nested.NestedCommonFormatColumnFormatSpec; +import org.apache.druid.segment.nested.ObjectStorageEncoding; import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.segment.virtual.NestedFieldVirtualColumn; import org.apache.druid.testing.InitializedNullHandlingTest; @@ -54,11 +59,13 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; import java.io.IOException; import java.util.Collections; import java.util.List; +@RunWith(JUnitParamsRunner.class) public class NestedDataScanQueryTest extends InitializedNullHandlingTest { private static final Logger LOG = new Logger(NestedDataScanQueryTest.class); @@ -69,6 +76,18 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest @Rule public final TemporaryFolder tempFolder = new TemporaryFolder(); + public static Object[] getNestedColumnFormatSpec() + { + return new Object[]{ + new Object[]{"default", null}, + new Object[]{ + "noneObjectStorageEncoding", + NestedCommonFormatColumnFormatSpec.builder() + .setObjectStorageEncoding(ObjectStorageEncoding.NONE).build() + } + }; + } + @After public void teardown() throws IOException { @@ -267,7 +286,9 @@ public void testIngestAndScanSegmentsRealtimeWithFallback() throws Exception } @Test - public void testIngestAndScanSegmentsTsvV4() throws Exception + @Parameters(method = "getNestedColumnFormatSpec") + @TestCaseName("{0}") + public void testIngestAndScanSegmentsTsvV4(String name, NestedCommonFormatColumnFormatSpec spec) throws Exception { Query scanQuery = Druids.newScanQueryBuilder() .dataSource("test_datasource") @@ -285,7 +306,7 @@ public void testIngestAndScanSegmentsTsvV4() throws Exception .limit(100) .context(ImmutableMap.of()) .build(); - List segs = NestedDataTestUtils.createSimpleSegmentsTsvNested(tempFolder, closer); + List segs = NestedDataTestUtils.createSimpleSegmentsTsvNested(tempFolder, spec, closer); final Sequence seq = helper.runQueryOnSegmentsObjs(segs, scanQuery); @@ -295,9 +316,10 @@ public void testIngestAndScanSegmentsTsvV4() throws Exception logResults(results); } - @Test - public void testIngestAndScanSegmentsTsv() throws Exception + @Parameters(method = "getNestedColumnFormatSpec") + @TestCaseName("{0}") + public void testIngestAndScanSegmentsTsv(String name, NestedCommonFormatColumnFormatSpec spec) throws Exception { Query scanQuery = Druids.newScanQueryBuilder() .dataSource("test_datasource") @@ -315,7 +337,7 @@ public void testIngestAndScanSegmentsTsv() throws Exception .limit(100) .context(ImmutableMap.of()) .build(); - List segs = NestedDataTestUtils.createSimpleSegmentsTsv(tempFolder, closer); + List segs = NestedDataTestUtils.createSimpleSegmentsTsv(tempFolder, spec, closer); final Sequence seq = helper.runQueryOnSegmentsObjs(segs, scanQuery); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 97db549de27f..294815af5e8a 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -64,7 +64,9 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; +import org.apache.druid.segment.nested.NestedCommonFormatColumnFormatSpec; import org.apache.druid.segment.nested.NestedPathField; +import org.apache.druid.segment.nested.ObjectStorageEncoding; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.segment.virtual.NestedFieldVirtualColumn; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; @@ -103,7 +105,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .put("t", "2000-01-01") .put("string", "aaa") .put("string_sparse", "zzz") - .put("nest", ImmutableMap.of("x", 100L, "y", 2.02, "z", "300", "mixed", 1L, "mixed2", "1")) + .put("nest", ImmutableMap.of("mixed", 1L, "mixed2", "1", "x", 100L, "y", 2.02, "z", "300")) .put( "nester", ImmutableMap.of("array", ImmutableList.of("a", "b"), "n", ImmutableMap.of("x", "hello")) @@ -120,7 +122,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .put("t", "2000-01-01") .put("string", "ccc") .put("string_sparse", "10") - .put("nest", ImmutableMap.of("x", 200L, "y", 3.03, "z", "abcdef", "mixed", 1.1, "mixed2", 1L)) + .put("nest", ImmutableMap.of("mixed", 1.1, "mixed2", 1L, "x", 200L, "y", 3.03, "z", "abcdef")) .put("long", 3L) .build(), ImmutableMap.builder() @@ -138,7 +140,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest ImmutableMap.builder() .put("t", "2000-01-02") .put("string", "aaa") - .put("nest", ImmutableMap.of("x", 100L, "y", 2.02, "z", "400", "mixed2", 1.1)) + .put("nest", ImmutableMap.of("mixed2", 1.1, "x", 100L, "y", 2.02, "z", "400")) .put("nester", ImmutableMap.of("array", ImmutableList.of("a", "b"), "n", ImmutableMap.of("x", 1L))) .put("long", 5L) .build(), @@ -150,15 +152,18 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .build() ); + private static final NestedCommonFormatColumnFormatSpec NONE_OBJECT_STORAGE = + NestedCommonFormatColumnFormatSpec.builder().setObjectStorageEncoding(ObjectStorageEncoding.NONE).build(); + public static final InputRowSchema ALL_JSON_COLUMNS = new InputRowSchema( new TimestampSpec("t", "iso", null), DimensionsSpec.builder().setDimensions( ImmutableList.builder() - .add(AutoTypeColumnSchema.of("string")) - .add(AutoTypeColumnSchema.of("nest")) - .add(AutoTypeColumnSchema.of("nester")) - .add(AutoTypeColumnSchema.of("long")) - .add(AutoTypeColumnSchema.of("string_sparse")) + .add(new AutoTypeColumnSchema("string", null, NONE_OBJECT_STORAGE)) + .add(new AutoTypeColumnSchema("nest", null, NONE_OBJECT_STORAGE)) + .add(new AutoTypeColumnSchema("nester", null, NONE_OBJECT_STORAGE)) + .add(new AutoTypeColumnSchema("long", null, NONE_OBJECT_STORAGE)) + .add(new AutoTypeColumnSchema("string_sparse", null, NONE_OBJECT_STORAGE)) .build() ).build(), null @@ -169,8 +174,8 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest DimensionsSpec.builder().setDimensions( ImmutableList.builder() .add(new StringDimensionSchema("string")) - .add(AutoTypeColumnSchema.of("nest")) - .add(AutoTypeColumnSchema.of("nester")) + .add(new AutoTypeColumnSchema("nest", null, NONE_OBJECT_STORAGE)) + .add(new AutoTypeColumnSchema("nester", null, NONE_OBJECT_STORAGE)) .add(new LongDimensionSchema("long")) .add(new StringDimensionSchema("string_sparse")) .build() @@ -582,7 +587,12 @@ public void testGroupByOnNestedColumn() .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setVirtualColumns( - new ExpressionVirtualColumn("v0", "strlen(\"string\")", ColumnType.LONG, queryFramework().macroTable()) + new ExpressionVirtualColumn( + "v0", + "strlen(\"string\")", + ColumnType.LONG, + queryFramework().macroTable() + ) ) .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) @@ -611,7 +621,12 @@ public void testGroupByOnNestedColumnWithOrderBy() .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setVirtualColumns( - new ExpressionVirtualColumn("v0", "strlen(\"string\")", ColumnType.LONG, queryFramework().macroTable()) + new ExpressionVirtualColumn( + "v0", + "strlen(\"string\")", + ColumnType.LONG, + queryFramework().macroTable() + ) ) .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) @@ -1163,7 +1178,12 @@ public void testJsonValueArrays() new NestedFieldVirtualColumn("arrayNestedLong", "$[0]", "v3", ColumnType.LONG_ARRAY) ) .columns("v0", "v1", "v2", "v3") - .columnTypes(ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY, ColumnType.DOUBLE_ARRAY, ColumnType.LONG_ARRAY) + .columnTypes( + ColumnType.STRING_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.DOUBLE_ARRAY, + ColumnType.LONG_ARRAY + ) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .build() ) @@ -1669,23 +1689,23 @@ public void testGroupByRootSingleTypeArrayLongNullsUnnest() .queryContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) .expectedQuery( GroupByQuery.builder() - .setDataSource( - UnnestDataSource.create( - TableDataSource.create(DATA_SOURCE_ARRAYS), - expressionVirtualColumn("j0.unnest", "\"arrayLongNulls\"", ColumnType.LONG_ARRAY), - null - ) - ) - .setInterval(querySegmentSpec(Filtration.eternity())) - .setGranularity(Granularities.ALL) - .setDimensions( - dimensions( - new DefaultDimensionSpec("j0.unnest", "d0", ColumnType.LONG) - ) - ) - .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) - .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) - .build() + .setDataSource( + UnnestDataSource.create( + TableDataSource.create(DATA_SOURCE_ARRAYS), + expressionVirtualColumn("j0.unnest", "\"arrayLongNulls\"", ColumnType.LONG_ARRAY), + null + ) + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("j0.unnest", "d0", ColumnType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .build() ) .expectedResults( ImmutableList.of( @@ -2753,7 +2773,7 @@ public void testJsonAndArrayAgg() ImmutableList.of( new Object[]{ "aaa", - "[{\"x\":100,\"y\":2.02,\"z\":\"300\",\"mixed\":1,\"mixed2\":\"1\"},{\"x\":100,\"y\":2.02,\"z\":\"400\",\"mixed2\":1.1}]", + "[{\"mixed\":1,\"mixed2\":\"1\",\"x\":100,\"y\":2.02,\"z\":\"300\"},{\"mixed2\":1.1,\"x\":100,\"y\":2.02,\"z\":\"400\"}]", 2L }, new Object[]{ @@ -2763,7 +2783,7 @@ public void testJsonAndArrayAgg() }, new Object[]{ "ccc", - "[{\"x\":200,\"y\":3.03,\"z\":\"abcdef\",\"mixed\":1.1,\"mixed2\":1}]", + "[{\"mixed\":1.1,\"mixed2\":1,\"x\":200,\"y\":3.03,\"z\":\"abcdef\"}]", 1L }, new Object[]{ @@ -4646,8 +4666,8 @@ public void testGroupByRootKeys2() ), ImmutableList.of( new Object[]{null, 4L}, - new Object[]{"[\"x\",\"y\",\"z\",\"mixed\",\"mixed2\"]", 2L}, - new Object[]{"[\"x\",\"y\",\"z\",\"mixed2\"]", 1L} + new Object[]{"[\"mixed\",\"mixed2\",\"x\",\"y\",\"z\"]", 2L}, + new Object[]{"[\"mixed2\",\"x\",\"y\",\"z\"]", 1L} ), RowSignature.builder() .add("EXPR$0", ColumnType.STRING_ARRAY) @@ -4912,9 +4932,7 @@ public void testJsonMerging() "nest", "v1", ColumnType.STRING, - ImmutableList.of( - new NestedPathField("x") - ), + ImmutableList.of(new NestedPathField("x")), false, null, false @@ -5020,7 +5038,12 @@ public void testToJsonAndParseJson() ) ) .columns("string", "v0", "v1", "v2") - .columnTypes(ColumnType.STRING, ColumnType.NESTED_DATA, ColumnType.NESTED_DATA, ColumnType.NESTED_DATA) + .columnTypes( + ColumnType.STRING, + ColumnType.NESTED_DATA, + ColumnType.NESTED_DATA, + ColumnType.NESTED_DATA + ) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .build() ), @@ -6317,9 +6340,9 @@ public void testFilterJsonIsNotNull() .build() ), ImmutableList.of( - new Object[]{"{\"x\":100,\"y\":2.02,\"z\":\"300\",\"mixed\":1,\"mixed2\":\"1\"}"}, - new Object[]{"{\"x\":200,\"y\":3.03,\"z\":\"abcdef\",\"mixed\":1.1,\"mixed2\":1}"}, - new Object[]{"{\"x\":100,\"y\":2.02,\"z\":\"400\",\"mixed2\":1.1}"} + new Object[]{"{\"mixed\":1,\"mixed2\":\"1\",\"x\":100,\"y\":2.02,\"z\":\"300\"}"}, + new Object[]{"{\"mixed\":1.1,\"mixed2\":1,\"x\":200,\"y\":3.03,\"z\":\"abcdef\"}"}, + new Object[]{"{\"mixed2\":1.1,\"x\":100,\"y\":2.02,\"z\":\"400\"}"} ), RowSignature.builder() .add("nest", ColumnType.NESTED_DATA) @@ -6364,7 +6387,7 @@ public void testCoalesceOnNestedColumns() testBuilder() .sql( "select c,long,coalesce(c,long) as col " - + " from druid.all_auto, unnest(json_value(arrayNestedLong, '$[1]' returning bigint array)) as u(c) " + + " from druid.all_auto, unnest(json_value(arrayNestedLong, '$[1]' returning bigint array)) as u(c) " ) .expectedQueries( ImmutableList.of( @@ -6399,10 +6422,10 @@ public void testCoalesceOnNestedColumns() ) .expectedSignature( RowSignature.builder() - .add("c", ColumnType.LONG) - .add("long", ColumnType.LONG) - .add("col", ColumnType.LONG) - .build() + .add("c", ColumnType.LONG) + .add("long", ColumnType.LONG) + .add("col", ColumnType.LONG) + .build() ) .run(); } From 6f97741223f2dc398cca43cff51eef8d37e00dc5 Mon Sep 17 00:00:00 2001 From: cecemei Date: Tue, 7 Oct 2025 14:48:32 -0700 Subject: [PATCH 13/19] javadoc --- .../concrete/ColumnHolderRACColumn.java | 25 +-- .../data/AtomicIntegerReadableOffset.java | 49 ++++ .../CompressedNestedDataComplexColumn.java | 212 +++++++++--------- 3 files changed, 153 insertions(+), 133 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/segment/data/AtomicIntegerReadableOffset.java diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnHolderRACColumn.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnHolderRACColumn.java index 5e299f6aa98f..fe6b117653b2 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnHolderRACColumn.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnHolderRACColumn.java @@ -19,14 +19,13 @@ package org.apache.druid.query.rowsandcols.concrete; -import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.query.rowsandcols.column.Column; import org.apache.druid.query.rowsandcols.column.ColumnAccessor; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.BaseColumnHolder; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.data.AtomicIntegerReadableOffset; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -161,26 +160,4 @@ public BaseColumn getBaseColumn() } return baseColumn; } - - private static class AtomicIntegerReadableOffset implements ReadableOffset - { - private final AtomicInteger offset; - - public AtomicIntegerReadableOffset(AtomicInteger offset) - { - this.offset = offset; - } - - @Override - public int getOffset() - { - return offset.get(); - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - - } - } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/AtomicIntegerReadableOffset.java b/processing/src/main/java/org/apache/druid/segment/data/AtomicIntegerReadableOffset.java new file mode 100644 index 000000000000..e063c6b0e21b --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/data/AtomicIntegerReadableOffset.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.data; + +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; + +import java.util.concurrent.atomic.AtomicInteger; + +/** + * A {@link ReadableOffset} implementation that wraps an AtomicInteger. + */ +public class AtomicIntegerReadableOffset implements ReadableOffset +{ + private final AtomicInteger offset; + + public AtomicIntegerReadableOffset(AtomicInteger offset) + { + this.offset = offset; + } + + @Override + public int getOffset() + { + return offset.get(); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index 4ba00ea1b06d..e404512e66e6 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -51,6 +51,7 @@ import org.apache.druid.segment.column.StringEncodingStrategies; import org.apache.druid.segment.column.TypeStrategies; import org.apache.druid.segment.column.TypeStrategy; +import org.apache.druid.segment.data.AtomicIntegerReadableOffset; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.ColumnarDoubles; import org.apache.druid.segment.data.ColumnarInts; @@ -183,7 +184,7 @@ public CompressedNestedDataComplexColumn( public SortedMap getFieldTypeInfo() { SortedMap fieldMap = new TreeMap<>(); - for (BaseField field : getAllBaseFields()) { + for (NestedField field : getAllNestedFields()) { FieldTypeInfo.TypeSet types = fieldInfo.getTypes(field.fieldIndex); fieldMap.put(field.fieldName, new FieldTypeInfo.MutableTypeSet(types.getByteValue())); } @@ -199,7 +200,7 @@ public ColumnType getLogicalType() @Override public List> getNestedFields() { - return getAllParsedBaseFields().stream().map(pair -> pair.rhs).collect(Collectors.toList()); + return getAllParsedNestedFields().stream().map(pair -> pair.rhs).collect(Collectors.toList()); } public TStringDictionary getUtf8BytesDictionary() @@ -332,7 +333,7 @@ public Object getRowValue(int rowNum) return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); } - final List elements = getAllParsedBaseFields() + final List elements = getAllParsedNestedFields() .stream() .map(pair -> { NestedFieldDictionaryEncodedColumn column = (NestedFieldDictionaryEncodedColumn) getColumnHolder( @@ -348,7 +349,7 @@ public Object getRowValue(int rowNum) @Override public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) { - List allFields = getAllBaseFields(); + List allFields = getAllNestedFields(); if (!logicalType.equals(ColumnType.NESTED_DATA) && allFields.size() == 1 && rootFieldPath.equals(Iterables.getOnlyElement(allFields).fieldName)) { @@ -369,15 +370,15 @@ public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) }; } else { List, ColumnValueSelector>> fieldSelectors = - getAllParsedBaseFields().stream() - .map(pair -> Pair.of( - pair.rhs, - ((DictionaryEncodedColumn) getColumnHolder( - pair.lhs.fieldName, - pair.lhs.fieldIndex - ).getColumn()).makeColumnValueSelector(offset) - )) - .collect(Collectors.toList()); + getAllParsedNestedFields().stream() + .map(pair -> Pair.of( + pair.rhs, + ((DictionaryEncodedColumn) getColumnHolder( + pair.lhs.fieldName, + pair.lhs.fieldIndex + ).getColumn()).makeColumnValueSelector(offset) + )) + .collect(Collectors.toList()); valueProvider = () -> { List elements = fieldSelectors .stream() @@ -415,7 +416,7 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) { - List>> allFields = getAllParsedBaseFields(); + List>> allFields = getAllParsedNestedFields(); if (!logicalType.equals(ColumnType.NESTED_DATA) && allFields.size() == 1 && rootFieldPath.equals(Iterables.getOnlyElement(allFields).lhs.fieldName)) { @@ -519,7 +520,7 @@ public int getMaxVectorSize() @Override public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset offset) { - List allFields = getAllBaseFields(); + List allFields = getAllNestedFields(); if (!logicalType.equals(ColumnType.NESTED_DATA) && allFields.size() == 1 && rootFieldPath.equals(Iterables.getOnlyElement(allFields).fieldName)) { @@ -563,15 +564,15 @@ public DimensionSelector makeDimensionSelector( ReadableOffset readableOffset ) { - final Field field = getBaseOrArrayFieldFromPath(path); - if (field instanceof BaseField) { + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( - ((BaseField) field).fieldName, - ((BaseField) field).fieldIndex + ((NestedField) field).fieldName, + ((NestedField) field).fieldIndex ).getColumn(); return col.makeDimensionSelector(readableOffset, extractionFn); - } else if (field instanceof ArrayField) { - final ArrayField arrayField = (ArrayField) field; + } else if (field instanceof NestedArrayElement) { + final NestedArrayElement arrayField = (NestedArrayElement) field; final int elementNumber = arrayField.elementNumber; if (elementNumber < 0) { throw new IAE( @@ -580,8 +581,8 @@ public DimensionSelector makeDimensionSelector( ); } DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( - arrayField.baseField.fieldName, - arrayField.baseField.fieldIndex + arrayField.nestedField.fieldName, + arrayField.nestedField.fieldIndex ).getColumn(); ColumnValueSelector arraySelector = col.makeColumnValueSelector(readableOffset); return new BaseSingleValueDimensionSelector() @@ -629,13 +630,13 @@ public ColumnValueSelector makeColumnValueSelector( ReadableOffset readableOffset ) { - Field field = getBaseOrArrayFieldFromPath(path); - if (field instanceof BaseField) { - final BaseField baseField = (BaseField) field; - return getColumnHolder(baseField.fieldName, baseField.fieldIndex).getColumn() - .makeColumnValueSelector(readableOffset); - } else if (field instanceof ArrayField) { - final ArrayField arrayField = (ArrayField) field; + Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + final NestedField nestedField = (NestedField) field; + return getColumnHolder(nestedField.fieldName, nestedField.fieldIndex).getColumn() + .makeColumnValueSelector(readableOffset); + } else if (field instanceof NestedArrayElement) { + final NestedArrayElement arrayField = (NestedArrayElement) field; final int elementNumber = arrayField.elementNumber; if (elementNumber < 0) { throw DruidException.forPersona(DruidException.Persona.USER) @@ -646,8 +647,8 @@ public ColumnValueSelector makeColumnValueSelector( ); } DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( - arrayField.baseField.fieldName, - arrayField.baseField.fieldIndex + arrayField.nestedField.fieldName, + arrayField.nestedField.fieldIndex ).getColumn(); ColumnValueSelector arraySelector = col.makeColumnValueSelector(readableOffset); return new ColumnValueSelector<>() @@ -718,12 +719,12 @@ public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector ReadableVectorOffset readableOffset ) { - final Field field = getBaseOrArrayFieldFromPath(path); - if (field instanceof BaseField) { - BaseField baseField = (BaseField) field; + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + NestedField nestedField = (NestedField) field; DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( - baseField.fieldName, - baseField.fieldIndex + nestedField.fieldName, + nestedField.fieldIndex ).getColumn(); return col.makeSingleValueDimensionVectorSelector(readableOffset); } else { @@ -745,13 +746,13 @@ public VectorObjectSelector makeVectorObjectSelector( ReadableVectorOffset readableOffset ) { - final Field field = getBaseOrArrayFieldFromPath(path); - if (field instanceof BaseField) { - BaseField baseField = (BaseField) field; - return getColumnHolder(baseField.fieldName, baseField.fieldIndex).getColumn() - .makeVectorObjectSelector(readableOffset); - } else if (field instanceof ArrayField) { - final ArrayField arrayField = (ArrayField) field; + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + NestedField nestedField = (NestedField) field; + return getColumnHolder(nestedField.fieldName, nestedField.fieldIndex).getColumn() + .makeVectorObjectSelector(readableOffset); + } else if (field instanceof NestedArrayElement) { + final NestedArrayElement arrayField = (NestedArrayElement) field; final int elementNumber = arrayField.elementNumber; if (elementNumber < 0) { throw DruidException.forPersona(DruidException.Persona.USER) @@ -762,8 +763,8 @@ public VectorObjectSelector makeVectorObjectSelector( ); } VectorObjectSelector arraySelector = getColumnHolder( - arrayField.baseField.fieldName, - arrayField.baseField.fieldIndex + arrayField.nestedField.fieldName, + arrayField.nestedField.fieldIndex ).getColumn().makeVectorObjectSelector(readableOffset); return new VectorObjectSelector() { @@ -958,12 +959,12 @@ public int getCurrentVectorSize() @Override public Set getFieldTypes(List path) { - final Field field = getBaseOrArrayFieldFromPath(path); - if (field instanceof BaseField) { - return FieldTypeInfo.convertToSet(fieldInfo.getTypes(((BaseField) field).fieldIndex).getByteValue()); - } else if (field instanceof ArrayField) { - final ArrayField arrayField = (ArrayField) field; - final Set arrayFieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(arrayField.baseField.fieldIndex) + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + return FieldTypeInfo.convertToSet(fieldInfo.getTypes(((NestedField) field).fieldIndex).getByteValue()); + } else if (field instanceof NestedArrayElement) { + final NestedArrayElement arrayField = (NestedArrayElement) field; + final Set arrayFieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(arrayField.nestedField.fieldIndex) .getByteValue()); final Set elementTypes = Sets.newHashSetWithExpectedSize(arrayFieldTypes.size()); for (ColumnType type : arrayFieldTypes) { @@ -982,14 +983,14 @@ public Set getFieldTypes(List path) @Override public ColumnType getFieldLogicalType(List path) { - final Field field = getBaseOrArrayFieldFromPath(path); - if (field instanceof BaseField) { - final Set fieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(((BaseField) field).fieldIndex) + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + final Set fieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(((NestedField) field).fieldIndex) .getByteValue()); return ColumnType.leastRestrictiveType(fieldTypes); - } else if (field instanceof ArrayField) { - final ArrayField arrayField = (ArrayField) field; - final Set arrayFieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(arrayField.baseField.fieldIndex) + } else if (field instanceof NestedArrayElement) { + final NestedArrayElement arrayField = (NestedArrayElement) field; + final Set arrayFieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(arrayField.nestedField.fieldIndex) .getByteValue()); ColumnType leastRestrictiveType = null; for (ColumnType type : arrayFieldTypes) { @@ -1012,10 +1013,10 @@ public ColumnType getFieldLogicalType(List path) @Override public BaseColumnHolder getColumnHolder(List path) { - final Field field = getBaseOrArrayFieldFromPath(path); - if (field instanceof BaseField) { - final BaseField baseField = (BaseField) field; - return getColumnHolder(baseField.fieldName, baseField.fieldIndex); + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + final NestedField nestedField = (NestedField) field; + return getColumnHolder(nestedField.fieldName, nestedField.fieldIndex); } return null; } @@ -1024,11 +1025,11 @@ public BaseColumnHolder getColumnHolder(List path) @Override public ColumnIndexSupplier getColumnIndexSupplier(List path) { - final Field field = getBaseOrArrayFieldFromPath(path); - if (field instanceof BaseField) { - final BaseField baseField = (BaseField) field; - return getColumnHolder(baseField.fieldName, baseField.fieldIndex).getIndexSupplier(); - } else if (field instanceof ArrayField) { + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + final NestedField nestedField = (NestedField) field; + return getColumnHolder(nestedField.fieldName, nestedField.fieldIndex).getIndexSupplier(); + } else if (field instanceof NestedArrayElement) { return NoIndexesColumnIndexSupplier.getInstance(); } return null; @@ -1037,10 +1038,10 @@ public ColumnIndexSupplier getColumnIndexSupplier(List path) @Override public boolean isNumeric(List path) { - final Field field = getBaseOrArrayFieldFromPath(path); - if (field instanceof BaseField) { - final BaseField baseField = (BaseField) field; - return getColumnHolder(baseField.fieldName, baseField.fieldIndex).getCapabilities().isNumeric(); + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + final NestedField nestedField = (NestedField) field; + return getColumnHolder(nestedField.fieldName, nestedField.fieldIndex).getCapabilities().isNumeric(); } return true; } @@ -1246,30 +1247,36 @@ public int compare(Object o1, Object o2) } } - private List getAllBaseFields() + private List getAllNestedFields() { TKeyDictionary fields = fieldsSupplier.get(); - List allFields = new ArrayList<>(fields.size()); + List allFields = new ArrayList<>(fields.size()); for (int i = 0; i < fields.size(); i++) { String field = StringUtils.fromUtf8(fields.get(i)); - allFields.add(new BaseField(field, i)); + allFields.add(new NestedField(field, i)); } return allFields; } - private List>> getAllParsedBaseFields() + private List>> getAllParsedNestedFields() { TKeyDictionary fields = fieldsSupplier.get(); - List>> allFields = new ArrayList<>(fields.size()); + List>> allFields = new ArrayList<>(fields.size()); for (int i = 0; i < fields.size(); i++) { String field = StringUtils.fromUtf8(fields.get(i)); - allFields.add(Pair.of(new BaseField(field, i), parsePath(field))); + allFields.add(Pair.of(new NestedField(field, i), parsePath(field))); } return allFields; } + + /** + * Returns a representation of a field or array element within a nested object structure, given a path. + *

+ * Returns null if the path does not correspond to any field or array element. + */ @Nullable - private Field getBaseOrArrayFieldFromPath(List path) + private Field getNestedFieldOrNestedArrayElementFromPath(List path) { TKeyDictionary fields = fieldsSupplier.get(); List> parsed = new ArrayList<>(fields.size()); @@ -1277,15 +1284,15 @@ private Field getBaseOrArrayFieldFromPath(List path) String field = StringUtils.fromUtf8(fields.get(i)); parsed.add(parsePath(field)); if (parsed.get(i).equals(path)) { - return new BaseField(field, i); + return new NestedField(field, i); } } if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { List arrayPath = path.subList(0, path.size() - 1); for (int i = 0; i < fields.size(); i++) { if (parsed.get(i).equals(arrayPath)) { - return new ArrayField( - new BaseField(StringUtils.fromUtf8(fields.get(i)), i), + return new NestedArrayElement( + new NestedField(StringUtils.fromUtf8(fields.get(i)), i), ((NestedPathArrayElement) path.get(path.size() - 1)).getIndex() ); } @@ -1294,53 +1301,40 @@ private Field getBaseOrArrayFieldFromPath(List path) return null; } + /** + * Represents a single target element within a nested object structure. + */ interface Field { } - private static class BaseField implements Field + /** + * Represents a field located within a nested object hierarchy, could be scalar or array. + */ + private static class NestedField implements Field { private final String fieldName; private final int fieldIndex; - BaseField(String fieldName, int fieldIndex) + NestedField(String fieldName, int fieldIndex) { this.fieldName = fieldName; this.fieldIndex = fieldIndex; } } - private static class ArrayField implements Field + /** + * Represents an element located within an array field inside a nested object hierarchy. + */ + private static class NestedArrayElement implements Field { - private final BaseField baseField; + private final NestedField nestedField; private final int elementNumber; - ArrayField(BaseField baseField, int elementNumber) + NestedArrayElement(NestedField nestedField, int elementNumber) { - this.baseField = baseField; + this.nestedField = nestedField; this.elementNumber = elementNumber; } } - - private static class AtomicIntegerReadableOffset implements ReadableOffset - { - private final AtomicInteger offset; - - AtomicIntegerReadableOffset(AtomicInteger offset) - { - this.offset = offset; - } - - @Override - public int getOffset() - { - return offset.get(); - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - - } - } } From fa9cad09a1d1f4a47f74d3865d166e89dbaa1a92 Mon Sep 17 00:00:00 2001 From: cecemei Date: Tue, 7 Oct 2025 15:29:33 -0700 Subject: [PATCH 14/19] get-row-value --- .../CompressedNestedDataComplexColumn.java | 2 +- .../nested/NestedDataColumnSupplierTest.java | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index e404512e66e6..3d671daaaccf 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -340,7 +340,7 @@ public Object getRowValue(int rowNum) pair.lhs.fieldName, pair.lhs.fieldIndex ).getColumn(); - return StructuredDataBuilder.Element.of(pair.rhs, column.lookupObject(rowNum)); + return StructuredDataBuilder.Element.of(pair.rhs, column.lookupObject(column.getSingleValueRow(rowNum))); }) .collect(Collectors.toList()); return new StructuredDataBuilder(elements).build(); diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java index 39109f5ed049..9003afaf1211 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java @@ -170,11 +170,15 @@ public static Collection constructorFeeder() .setLongColumnCompression(CompressionStrategy.LZF) .setDoubleColumnCompression(CompressionStrategy.LZF) .build(); + + NestedCommonFormatColumnFormatSpec noRawStorage = + NestedCommonFormatColumnFormatSpec.builder().setObjectStorageEncoding(ObjectStorageEncoding.NONE).build(); final List constructors = ImmutableList.of( new Object[]{defaultSpec}, new Object[]{frontCodedKeysAndDicts}, new Object[]{zstdRaw}, - new Object[]{lzf} + new Object[]{lzf}, + new Object[]{noRawStorage} ); return constructors; @@ -455,11 +459,22 @@ private void smokeTest(NestedDataComplexColumn column) throws IOException Assert.assertEquals(ImmutableList.of(nullishPath, vPath, xPath, yPath, zPath), column.getNestedFields()); for (int i = 0; i < DATA.size(); i++) { - Map row = DATA.get(i); + final Map row; + if (ObjectStorageEncoding.NONE.equals(columnFormatSpec.getObjectStorageEncoding())) { + // if raw object is not stored, the derived object will have sorted key and no nulls + row = new TreeMap<>(DATA.get(i)); + row.entrySet().removeIf(entry -> entry.getValue() == null); + } else { + row = DATA.get(i); + } Assert.assertEquals( JSON_MAPPER.writeValueAsString(row), JSON_MAPPER.writeValueAsString(StructuredData.unwrap(rawSelector.getObject())) ); + Assert.assertEquals( + JSON_MAPPER.writeValueAsString(row), + JSON_MAPPER.writeValueAsString(StructuredData.unwrap(column.getRowValue(i))) + ); testPath(row, i, "v", vSelector, vDimSelector, vValueIndex, vPredicateIndex, vNulls, null); testPath(row, i, "x", xSelector, xDimSelector, xValueIndex, xPredicateIndex, xNulls, ColumnType.LONG); From 48c0f2c717fd42e50189db3fe46091a36394a391 Mon Sep 17 00:00:00 2001 From: cecemei Date: Tue, 7 Oct 2025 16:27:39 -0700 Subject: [PATCH 15/19] nested --- .../nested/NestedDataColumnSupplierTest.java | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java index 9003afaf1211..9338a8415bac 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java @@ -609,11 +609,22 @@ private void smokeTestArrays(NestedDataComplexColumn column) throws IOException int rowCounter = 0; while (offset.withinBounds()) { - Map row = ARRAY_TEST_DATA.get(rowCounter); + final Map row; + if (ObjectStorageEncoding.NONE.equals(columnFormatSpec.getObjectStorageEncoding())) { + // if raw object is not stored, the derived object will have sorted key and no nulls + row = new TreeMap<>(ARRAY_TEST_DATA.get(rowCounter)); + row.entrySet().removeIf(entry -> entry.getValue() == null); + } else { + row = ARRAY_TEST_DATA.get(rowCounter); + } Assert.assertEquals( JSON_MAPPER.writeValueAsString(row), JSON_MAPPER.writeValueAsString(StructuredData.unwrap(rawSelector.getObject())) ); + Assert.assertEquals( + JSON_MAPPER.writeValueAsString(row), + JSON_MAPPER.writeValueAsString(StructuredData.unwrap(column.getRowValue(rowCounter))) + ); Object[] s = (Object[]) row.get("s"); Object[] l = (Object[]) row.get("l"); @@ -665,7 +676,14 @@ private void smokeTestArrays(NestedDataComplexColumn column) throws IOException for (int i = 0; i < vectorOffset.getCurrentVectorSize(); i++, rowCounter++) { - Map row = ARRAY_TEST_DATA.get(rowCounter); + final Map row; + if (ObjectStorageEncoding.NONE.equals(columnFormatSpec.getObjectStorageEncoding())) { + // if raw object is not stored, the derived object will have sorted key and no nulls + row = new TreeMap<>(ARRAY_TEST_DATA.get(rowCounter)); + row.entrySet().removeIf(entry -> entry.getValue() == null); + } else { + row = ARRAY_TEST_DATA.get(rowCounter); + } Assert.assertEquals( JSON_MAPPER.writeValueAsString(row), JSON_MAPPER.writeValueAsString(StructuredData.unwrap(rawVector[i])) @@ -714,7 +732,14 @@ private void smokeTestArrays(NestedDataComplexColumn column) throws IOException final boolean[] dElementNulls = dElementFilteredVectorSelector.getNullVector(); for (int i = 0; i < bitmapVectorOffset.getCurrentVectorSize(); i++, rowCounter += 2) { - Map row = ARRAY_TEST_DATA.get(rowCounter); + final Map row; + if (ObjectStorageEncoding.NONE.equals(columnFormatSpec.getObjectStorageEncoding())) { + // if raw object is not stored, the derived object will have sorted key and no nulls + row = new TreeMap<>(ARRAY_TEST_DATA.get(rowCounter)); + row.entrySet().removeIf(entry -> entry.getValue() == null); + } else { + row = ARRAY_TEST_DATA.get(rowCounter); + } Assert.assertEquals( JSON_MAPPER.writeValueAsString(row), JSON_MAPPER.writeValueAsString(StructuredData.unwrap(rawVector[i])) From 7cfff4f005014031b0676d721cd7d7fdbbdd63e7 Mon Sep 17 00:00:00 2001 From: cecemei Date: Tue, 7 Oct 2025 17:15:54 -0700 Subject: [PATCH 16/19] format --- .../CompressedNestedDataComplexColumn.java | 205 +++++++++--------- 1 file changed, 103 insertions(+), 102 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index 3d671daaaccf..906a2770b1b9 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -826,130 +826,132 @@ public VectorValueSelector makeVectorValueSelector( ReadableVectorOffset readableOffset ) { - final BaseColumnHolder columnHolder = getColumnHolder(path); - if (columnHolder != null) { - return columnHolder.getColumn().makeVectorValueSelector(readableOffset); - } - if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final BaseColumnHolder arrayColumnHolder = getColumnHolder(path.subList(0, path.size() - 1)); - if (arrayColumnHolder != null) { - final int elementNumber = ((NestedPathArrayElement) path.get(path.size() - 1)).getIndex(); - if (elementNumber < 0) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build( - "Cannot make array element selector for path [%s], negative array index not supported for this selector", - path - ); - } - VectorObjectSelector arraySelector = arrayColumnHolder.getColumn().makeVectorObjectSelector(readableOffset); + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + NestedField nestedField = (NestedField) field; + return getColumnHolder(nestedField.fieldName, nestedField.fieldIndex).getColumn() + .makeVectorValueSelector(readableOffset); + } else if (field instanceof NestedArrayElement) { + final NestedArrayElement arrayField = (NestedArrayElement) field; + final int elementNumber = arrayField.elementNumber; + if (elementNumber < 0) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build( + "Cannot make array element selector for path [%s], negative array index not supported for this selector", + path + ); + } + VectorObjectSelector arraySelector = getColumnHolder( + arrayField.nestedField.fieldName, + arrayField.nestedField.fieldIndex + ).getColumn().makeVectorObjectSelector(readableOffset); - return new VectorValueSelector() + return new VectorValueSelector() + { + private final long[] longs = new long[readableOffset.getMaxVectorSize()]; + private final double[] doubles = new double[readableOffset.getMaxVectorSize()]; + private final float[] floats = new float[readableOffset.getMaxVectorSize()]; + private final boolean[] nulls = new boolean[readableOffset.getMaxVectorSize()]; + private int id = ReadableVectorInspector.NULL_ID; + + private void computeNumbers() { - private final long[] longs = new long[readableOffset.getMaxVectorSize()]; - private final double[] doubles = new double[readableOffset.getMaxVectorSize()]; - private final float[] floats = new float[readableOffset.getMaxVectorSize()]; - private final boolean[] nulls = new boolean[readableOffset.getMaxVectorSize()]; - private int id = ReadableVectorInspector.NULL_ID; - - private void computeNumbers() - { - if (readableOffset.getId() != id) { - final Object[] maybeArrays = arraySelector.getObjectVector(); - for (int i = 0; i < arraySelector.getCurrentVectorSize(); i++) { - Object maybeArray = maybeArrays[i]; - if (maybeArray instanceof Object[]) { - Object[] anArray = (Object[]) maybeArray; - if (elementNumber < anArray.length) { - if (anArray[elementNumber] instanceof Number) { - Number n = (Number) anArray[elementNumber]; - longs[i] = n.longValue(); - doubles[i] = n.doubleValue(); - floats[i] = n.floatValue(); + if (readableOffset.getId() != id) { + final Object[] maybeArrays = arraySelector.getObjectVector(); + for (int i = 0; i < arraySelector.getCurrentVectorSize(); i++) { + Object maybeArray = maybeArrays[i]; + if (maybeArray instanceof Object[]) { + Object[] anArray = (Object[]) maybeArray; + if (elementNumber < anArray.length) { + if (anArray[elementNumber] instanceof Number) { + Number n = (Number) anArray[elementNumber]; + longs[i] = n.longValue(); + doubles[i] = n.doubleValue(); + floats[i] = n.floatValue(); + nulls[i] = false; + } else { + Double d = anArray[elementNumber] instanceof String + ? Doubles.tryParse((String) anArray[elementNumber]) + : null; + if (d != null) { + longs[i] = d.longValue(); + doubles[i] = d; + floats[i] = d.floatValue(); nulls[i] = false; } else { - Double d = anArray[elementNumber] instanceof String - ? Doubles.tryParse((String) anArray[elementNumber]) - : null; - if (d != null) { - longs[i] = d.longValue(); - doubles[i] = d; - floats[i] = d.floatValue(); - nulls[i] = false; - } else { - nullElement(i); - } + nullElement(i); } - } else { - nullElement(i); } } else { - // not an array? nullElement(i); } + } else { + // not an array? + nullElement(i); } - id = readableOffset.getId(); } + id = readableOffset.getId(); } + } - private void nullElement(int i) - { - longs[i] = 0L; - doubles[i] = 0L; - floats[i] = 0L; - nulls[i] = true; - } + private void nullElement(int i) + { + longs[i] = 0L; + doubles[i] = 0L; + floats[i] = 0L; + nulls[i] = true; + } - @Override - public long[] getLongVector() - { - if (readableOffset.getId() != id) { - computeNumbers(); - } - return longs; + @Override + public long[] getLongVector() + { + if (readableOffset.getId() != id) { + computeNumbers(); } + return longs; + } - @Override - public float[] getFloatVector() - { - if (readableOffset.getId() != id) { - computeNumbers(); - } - return floats; + @Override + public float[] getFloatVector() + { + if (readableOffset.getId() != id) { + computeNumbers(); } + return floats; + } - @Override - public double[] getDoubleVector() - { - if (readableOffset.getId() != id) { - computeNumbers(); - } - return doubles; + @Override + public double[] getDoubleVector() + { + if (readableOffset.getId() != id) { + computeNumbers(); } + return doubles; + } - @Nullable - @Override - public boolean[] getNullVector() - { - if (readableOffset.getId() != id) { - computeNumbers(); - } - return nulls; + @Nullable + @Override + public boolean[] getNullVector() + { + if (readableOffset.getId() != id) { + computeNumbers(); } + return nulls; + } - @Override - public int getMaxVectorSize() - { - return arraySelector.getMaxVectorSize(); - } + @Override + public int getMaxVectorSize() + { + return arraySelector.getMaxVectorSize(); + } - @Override - public int getCurrentVectorSize() - { - return arraySelector.getCurrentVectorSize(); - } - }; - } + @Override + public int getCurrentVectorSize() + { + return arraySelector.getCurrentVectorSize(); + } + }; } return NilVectorSelector.create(readableOffset); } @@ -1269,7 +1271,6 @@ private List>> getAllParsedNestedFields() return allFields; } - /** * Returns a representation of a field or array element within a nested object structure, given a path. *

From f97a5c81d1d41e8eac9d666241b3e69c51f7c938 Mon Sep 17 00:00:00 2001 From: cecemei Date: Thu, 9 Oct 2025 12:53:57 -0700 Subject: [PATCH 17/19] test --- .../calcite/CalciteNestedDataQueryTest.java | 117 ++++++++++++------ 1 file changed, 78 insertions(+), 39 deletions(-) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 294815af5e8a..5eabf333e044 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -81,7 +81,9 @@ import org.junit.Assert; import org.junit.internal.matchers.ThrowableMessageMatcher; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; +import org.mockito.Mockito; import java.util.Arrays; import java.util.Collections; @@ -91,7 +93,7 @@ import static org.hamcrest.MatcherAssert.assertThat; @SqlTestFrameworkConfig.ComponentSupplier(NestedComponentSupplier.class) -public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest +public abstract class CalciteNestedDataQueryTest extends BaseCalciteQueryTest { public static final String DATA_SOURCE = "nested"; public static final String DATA_SOURCE_MIXED = "nested_mix"; @@ -152,41 +154,78 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .build() ); - private static final NestedCommonFormatColumnFormatSpec NONE_OBJECT_STORAGE = - NestedCommonFormatColumnFormatSpec.builder().setObjectStorageEncoding(ObjectStorageEncoding.NONE).build(); - - public static final InputRowSchema ALL_JSON_COLUMNS = new InputRowSchema( - new TimestampSpec("t", "iso", null), - DimensionsSpec.builder().setDimensions( - ImmutableList.builder() - .add(new AutoTypeColumnSchema("string", null, NONE_OBJECT_STORAGE)) - .add(new AutoTypeColumnSchema("nest", null, NONE_OBJECT_STORAGE)) - .add(new AutoTypeColumnSchema("nester", null, NONE_OBJECT_STORAGE)) - .add(new AutoTypeColumnSchema("long", null, NONE_OBJECT_STORAGE)) - .add(new AutoTypeColumnSchema("string_sparse", null, NONE_OBJECT_STORAGE)) - .build() - ).build(), - null - ); + @Nested + public static class DefaultCalciteNestedDataQueryTest extends CalciteNestedDataQueryTest + { + } - public static final InputRowSchema JSON_AND_SCALAR_MIX = new InputRowSchema( - new TimestampSpec("t", "iso", null), - DimensionsSpec.builder().setDimensions( - ImmutableList.builder() - .add(new StringDimensionSchema("string")) - .add(new AutoTypeColumnSchema("nest", null, NONE_OBJECT_STORAGE)) - .add(new AutoTypeColumnSchema("nester", null, NONE_OBJECT_STORAGE)) - .add(new LongDimensionSchema("long")) - .add(new StringDimensionSchema("string_sparse")) - .build() - ).build(), - null - ); - public static final List ROWS = - RAW_ROWS.stream().map(raw -> TestDataBuilder.createRow(raw, ALL_JSON_COLUMNS)).collect(Collectors.toList()); + @Nested + public static class NoneObjectStorageCalciteNestedDataQueryTest extends CalciteNestedDataQueryTest + { + public NoneObjectStorageCalciteNestedDataQueryTest() + { + super(); + // Override with none object storage + NestedCommonFormatColumnFormatSpec noneObjectStorage = + NestedCommonFormatColumnFormatSpec.builder().setObjectStorageEncoding(ObjectStorageEncoding.NONE).build(); + Mockito.when(ALL_JSON_COLUMNS.getDimensionsSpec()).thenReturn( + DimensionsSpec.builder().setDimensions( + ImmutableList.builder() + .add(new AutoTypeColumnSchema("string", null, noneObjectStorage)) + .add(new AutoTypeColumnSchema("nest", null, noneObjectStorage)) + .add(new AutoTypeColumnSchema("nester", null, noneObjectStorage)) + .add(new AutoTypeColumnSchema("long", null, noneObjectStorage)) + .add(new AutoTypeColumnSchema("string_sparse", null, noneObjectStorage)) + .build() + ).build()); + Mockito.when(JSON_AND_SCALAR_MIX.getDimensionsSpec()).thenReturn( + DimensionsSpec.builder().setDimensions( + ImmutableList.builder() + .add(new StringDimensionSchema("string")) + .add(new AutoTypeColumnSchema("nest", null, noneObjectStorage)) + .add(new AutoTypeColumnSchema("nester", null, noneObjectStorage)) + .add(new LongDimensionSchema("long")) + .add(new StringDimensionSchema("string_sparse")) + .build() + ).build()); + } + } - public static final List ROWS_MIX = - RAW_ROWS.stream().map(raw -> TestDataBuilder.createRow(raw, JSON_AND_SCALAR_MIX)).collect(Collectors.toList()); + public static final InputRowSchema ALL_JSON_COLUMNS = Mockito.mock(InputRowSchema.class); + + public static final InputRowSchema JSON_AND_SCALAR_MIX = Mockito.mock(InputRowSchema.class); + + public CalciteNestedDataQueryTest() + { + Mockito.when(ALL_JSON_COLUMNS.getTimestampSpec()).thenReturn( + new TimestampSpec("t", "iso", null)); + Mockito.when(ALL_JSON_COLUMNS.getDimensionsSpec()).thenReturn( + DimensionsSpec.builder().setDimensions( + ImmutableList.builder() + .add(AutoTypeColumnSchema.of("string")) + .add(AutoTypeColumnSchema.of("nest")) + .add(AutoTypeColumnSchema.of("nester")) + .add(AutoTypeColumnSchema.of("long")) + .add(AutoTypeColumnSchema.of("string_sparse")) + .build() + ).build()); + Mockito.when(JSON_AND_SCALAR_MIX.getTimestampSpec()).thenReturn(new TimestampSpec("t", "iso", null)); + Mockito.when(JSON_AND_SCALAR_MIX.getDimensionsSpec()).thenReturn( + DimensionsSpec.builder().setDimensions( + ImmutableList.builder() + .add(new StringDimensionSchema("string")) + .add(AutoTypeColumnSchema.of("nest")) + .add(AutoTypeColumnSchema.of("nester")) + .add(new LongDimensionSchema("long")) + .add(new StringDimensionSchema("string_sparse")) + .build() + ).build()); + } + + public static List constructInputRows(InputRowSchema inputRowSchema) + { + return RAW_ROWS.stream().map(raw -> TestDataBuilder.createRow(raw, inputRowSchema)).collect(Collectors.toList()); + } public static class NestedComponentSupplier extends StandardComponentSupplier { @@ -212,7 +251,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .withRollup(false) .build() ) - .rows(ROWS) + .rows(constructInputRows(ALL_JSON_COLUMNS)) .buildMMappedIndex(); final QueryableIndex indexMix11 = @@ -228,7 +267,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .withRollup(false) .build() ) - .rows(ROWS) + .rows(constructInputRows(ALL_JSON_COLUMNS)) .buildMMappedIndex(); @@ -245,7 +284,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .withRollup(false) .build() ) - .rows(ROWS_MIX) + .rows(constructInputRows(JSON_AND_SCALAR_MIX)) .buildMMappedIndex(); final QueryableIndex indexMix21 = @@ -261,7 +300,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .withRollup(false) .build() ) - .rows(ROWS_MIX) + .rows(constructInputRows(JSON_AND_SCALAR_MIX)) .buildMMappedIndex(); final QueryableIndex indexMix22 = @@ -277,7 +316,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .withRollup(false) .build() ) - .rows(ROWS) + .rows(constructInputRows(ALL_JSON_COLUMNS)) .buildMMappedIndex(); final QueryableIndex indexArrays = From 1b8c5e47eed7cc3567e4cb2738b655ca04c85825 Mon Sep 17 00:00:00 2001 From: cecemei Date: Thu, 9 Oct 2025 14:18:28 -0700 Subject: [PATCH 18/19] trigger ci / empty commit From 7d83b04e1bd73aea9b5682da21cbae454a6d0dde Mon Sep 17 00:00:00 2001 From: cecemei Date: Thu, 9 Oct 2025 15:23:34 -0700 Subject: [PATCH 19/19] static --- .../calcite/CalciteNestedDataQueryTest.java | 125 ++++++++++++------ 1 file changed, 83 insertions(+), 42 deletions(-) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 5eabf333e044..1325dc933731 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -195,33 +195,6 @@ public NoneObjectStorageCalciteNestedDataQueryTest() public static final InputRowSchema JSON_AND_SCALAR_MIX = Mockito.mock(InputRowSchema.class); - public CalciteNestedDataQueryTest() - { - Mockito.when(ALL_JSON_COLUMNS.getTimestampSpec()).thenReturn( - new TimestampSpec("t", "iso", null)); - Mockito.when(ALL_JSON_COLUMNS.getDimensionsSpec()).thenReturn( - DimensionsSpec.builder().setDimensions( - ImmutableList.builder() - .add(AutoTypeColumnSchema.of("string")) - .add(AutoTypeColumnSchema.of("nest")) - .add(AutoTypeColumnSchema.of("nester")) - .add(AutoTypeColumnSchema.of("long")) - .add(AutoTypeColumnSchema.of("string_sparse")) - .build() - ).build()); - Mockito.when(JSON_AND_SCALAR_MIX.getTimestampSpec()).thenReturn(new TimestampSpec("t", "iso", null)); - Mockito.when(JSON_AND_SCALAR_MIX.getDimensionsSpec()).thenReturn( - DimensionsSpec.builder().setDimensions( - ImmutableList.builder() - .add(new StringDimensionSchema("string")) - .add(AutoTypeColumnSchema.of("nest")) - .add(AutoTypeColumnSchema.of("nester")) - .add(new LongDimensionSchema("long")) - .add(new StringDimensionSchema("string_sparse")) - .build() - ).build()); - } - public static List constructInputRows(InputRowSchema inputRowSchema) { return RAW_ROWS.stream().map(raw -> TestDataBuilder.createRow(raw, inputRowSchema)).collect(Collectors.toList()); @@ -229,6 +202,32 @@ public static List constructInputRows(InputRowSchema inputRowSchema) public static class NestedComponentSupplier extends StandardComponentSupplier { + static { + Mockito.when(ALL_JSON_COLUMNS.getTimestampSpec()).thenReturn( + new TimestampSpec("t", "iso", null)); + Mockito.when(ALL_JSON_COLUMNS.getDimensionsSpec()).thenReturn( + DimensionsSpec.builder().setDimensions( + ImmutableList.builder() + .add(AutoTypeColumnSchema.of("string")) + .add(AutoTypeColumnSchema.of("nest")) + .add(AutoTypeColumnSchema.of("nester")) + .add(AutoTypeColumnSchema.of("long")) + .add(AutoTypeColumnSchema.of("string_sparse")) + .build() + ).build()); + Mockito.when(JSON_AND_SCALAR_MIX.getTimestampSpec()).thenReturn(new TimestampSpec("t", "iso", null)); + Mockito.when(JSON_AND_SCALAR_MIX.getDimensionsSpec()).thenReturn( + DimensionsSpec.builder().setDimensions( + ImmutableList.builder() + .add(new StringDimensionSchema("string")) + .add(AutoTypeColumnSchema.of("nest")) + .add(AutoTypeColumnSchema.of("nester")) + .add(new LongDimensionSchema("long")) + .add(new StringDimensionSchema("string_sparse")) + .build() + ).build()); + } + public NestedComponentSupplier(TempDirProducer tempFolderProducer) { super(tempFolderProducer); @@ -6049,7 +6048,44 @@ public void testScanAllTypesAuto() "cObjectArray", "cnt" ) - .columnTypes(ColumnType.LONG, ColumnType.STRING, ColumnType.LONG, ColumnType.DOUBLE, ColumnType.LONG, ColumnType.STRING, ColumnType.DOUBLE, ColumnType.ofComplex("json"), ColumnType.LONG_ARRAY, ColumnType.STRING_ARRAY, ColumnType.ofComplex("json"), ColumnType.ofComplex("json"), ColumnType.STRING_ARRAY, ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY, ColumnType.LONG_ARRAY, ColumnType.DOUBLE_ARRAY, ColumnType.DOUBLE_ARRAY, ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY, ColumnType.ofComplex("json"), ColumnType.ofComplex("json"), ColumnType.STRING, ColumnType.STRING, ColumnType.LONG, ColumnType.DOUBLE, ColumnType.ofComplex("json"), ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY, ColumnType.DOUBLE_ARRAY, ColumnType.LONG_ARRAY, ColumnType.ofComplex("json"), ColumnType.LONG_ARRAY, ColumnType.ofComplex("json"), ColumnType.ofComplex("json"), ColumnType.LONG) + .columnTypes( + ColumnType.LONG, + ColumnType.STRING, + ColumnType.LONG, + ColumnType.DOUBLE, + ColumnType.LONG, + ColumnType.STRING, + ColumnType.DOUBLE, + ColumnType.ofComplex("json"), + ColumnType.LONG_ARRAY, + ColumnType.STRING_ARRAY, + ColumnType.ofComplex("json"), + ColumnType.ofComplex("json"), + ColumnType.STRING_ARRAY, + ColumnType.STRING_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.DOUBLE_ARRAY, + ColumnType.DOUBLE_ARRAY, + ColumnType.STRING_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.ofComplex("json"), + ColumnType.ofComplex("json"), + ColumnType.STRING, + ColumnType.STRING, + ColumnType.LONG, + ColumnType.DOUBLE, + ColumnType.ofComplex("json"), + ColumnType.STRING_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.DOUBLE_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.ofComplex("json"), + ColumnType.LONG_ARRAY, + ColumnType.ofComplex("json"), + ColumnType.ofComplex("json"), + ColumnType.LONG + ) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .build() ), @@ -6431,20 +6467,25 @@ public void testCoalesceOnNestedColumns() .expectedQueries( ImmutableList.of( Druids.newScanQueryBuilder() - .dataSource( - UnnestDataSource.create( - new TableDataSource(DATA_SOURCE_ALL), - new NestedFieldVirtualColumn("arrayNestedLong", "$[1]", "j0.unnest", ColumnType.LONG_ARRAY), - null - ) - ) - .virtualColumns(expressionVirtualColumn("v0", "nvl(\"j0.unnest\",\"long\")", ColumnType.LONG)) - .intervals(querySegmentSpec(Filtration.eternity())) - .columns("j0.unnest", "long", "v0") - .columnTypes(ColumnType.LONG, ColumnType.LONG, ColumnType.LONG) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .context(QUERY_CONTEXT_DEFAULT) - .build() + .dataSource( + UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ALL), + new NestedFieldVirtualColumn( + "arrayNestedLong", + "$[1]", + "j0.unnest", + ColumnType.LONG_ARRAY + ), + null + ) + ) + .virtualColumns(expressionVirtualColumn("v0", "nvl(\"j0.unnest\",\"long\")", ColumnType.LONG)) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("j0.unnest", "long", "v0") + .columnTypes(ColumnType.LONG, ColumnType.LONG, ColumnType.LONG) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .context(QUERY_CONTEXT_DEFAULT) + .build() ) ) .expectedResults(