diff --git a/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnHolderRACColumn.java b/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnHolderRACColumn.java index 5e299f6aa98f..fe6b117653b2 100644 --- a/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnHolderRACColumn.java +++ b/processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnHolderRACColumn.java @@ -19,14 +19,13 @@ package org.apache.druid.query.rowsandcols.concrete; -import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.query.rowsandcols.column.Column; import org.apache.druid.query.rowsandcols.column.ColumnAccessor; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.BaseColumnHolder; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.data.AtomicIntegerReadableOffset; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -161,26 +160,4 @@ public BaseColumn getBaseColumn() } return baseColumn; } - - private static class AtomicIntegerReadableOffset implements ReadableOffset - { - private final AtomicInteger offset; - - public AtomicIntegerReadableOffset(AtomicInteger offset) - { - this.offset = offset; - } - - @Override - public int getOffset() - { - return offset.get(); - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - - } - } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/AtomicIntegerReadableOffset.java b/processing/src/main/java/org/apache/druid/segment/data/AtomicIntegerReadableOffset.java new file mode 100644 index 000000000000..e063c6b0e21b --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/data/AtomicIntegerReadableOffset.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.data; + +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; + +import java.util.concurrent.atomic.AtomicInteger; + +/** + * A {@link ReadableOffset} implementation that wraps an AtomicInteger. + */ +public class AtomicIntegerReadableOffset implements ReadableOffset +{ + private final AtomicInteger offset; + + public AtomicIntegerReadableOffset(AtomicInteger offset) + { + this.offset = offset; + } + + @Override + public int getOffset() + { + return offset.get(); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index 4777dee66daa..906a2770b1b9 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -19,9 +19,9 @@ package org.apache.druid.segment.nested; -import com.google.common.base.Preconditions; import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; import com.google.common.collect.Sets; import com.google.common.primitives.Doubles; import org.apache.druid.collections.bitmap.ImmutableBitmap; @@ -29,6 +29,7 @@ import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.io.Closer; @@ -41,17 +42,16 @@ import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.NilColumnValueSelector; import org.apache.druid.segment.ObjectColumnSelector; -import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.BaseColumnHolder; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnConfig; -import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.StringEncodingStrategies; import org.apache.druid.segment.column.TypeStrategies; import org.apache.druid.segment.column.TypeStrategy; +import org.apache.druid.segment.data.AtomicIntegerReadableOffset; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.ColumnarDoubles; import org.apache.druid.segment.data.ColumnarInts; @@ -93,7 +93,9 @@ import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; +import java.util.stream.Collectors; /** * Implementation of {@link NestedDataComplexColumn} which uses a {@link CompressedVariableSizedBlobColumn} for the @@ -117,6 +119,7 @@ public abstract class CompressedNestedDataComplexColumn fieldsSupplier; @@ -139,7 +142,7 @@ public CompressedNestedDataComplexColumn( String columnName, ColumnType logicalType, @SuppressWarnings("unused") ColumnConfig columnConfig, - CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier, + @Nullable CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier, ImmutableBitmap nullValues, Supplier fieldsSupplier, FieldTypeInfo fieldInfo, @@ -180,12 +183,10 @@ public CompressedNestedDataComplexColumn( @Override public SortedMap getFieldTypeInfo() { - final TKeyDictionary fields = fieldsSupplier.get(); - final SortedMap fieldMap = new TreeMap<>(); - for (int i = 0; i < fields.size(); i++) { - String fieldPath = StringUtils.fromUtf8(fields.get(i)); - FieldTypeInfo.TypeSet types = fieldInfo.getTypes(i); - fieldMap.put(fieldPath, new FieldTypeInfo.MutableTypeSet(types.getByteValue())); + SortedMap fieldMap = new TreeMap<>(); + for (NestedField field : getAllNestedFields()) { + FieldTypeInfo.TypeSet types = fieldInfo.getTypes(field.fieldIndex); + fieldMap.put(field.fieldName, new FieldTypeInfo.MutableTypeSet(types.getByteValue())); } return fieldMap; } @@ -199,15 +200,9 @@ public ColumnType getLogicalType() @Override public List> getNestedFields() { - final TKeyDictionary fields = fieldsSupplier.get(); - final List> fieldParts = new ArrayList<>(fields.size()); - for (int i = 0; i < fields.size(); i++) { - fieldParts.add(parsePath(StringUtils.fromUtf8(fields.get(i)))); - } - return fieldParts; + return getAllParsedNestedFields().stream().map(pair -> pair.rhs).collect(Collectors.toList()); } - public TStringDictionary getUtf8BytesDictionary() { return stringDictionarySupplier.get(); @@ -329,29 +324,69 @@ public Object getRowValue(int rowNum) return null; } - if (compressedRawColumn == null) { + if (compressedRawColumn == null && compressedRawColumnSupplier != null) { compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); } - final ByteBuffer valueBuffer = compressedRawColumn.get(rowNum); - return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); + if (compressedRawColumnSupplier != null) { + final ByteBuffer valueBuffer = compressedRawColumn.get(rowNum); + return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); + } + + final List elements = getAllParsedNestedFields() + .stream() + .map(pair -> { + NestedFieldDictionaryEncodedColumn column = (NestedFieldDictionaryEncodedColumn) getColumnHolder( + pair.lhs.fieldName, + pair.lhs.fieldIndex + ).getColumn(); + return StructuredDataBuilder.Element.of(pair.rhs, column.lookupObject(column.getSingleValueRow(rowNum))); + }) + .collect(Collectors.toList()); + return new StructuredDataBuilder(elements).build(); } @Override public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) { - final TKeyDictionary fields = fieldsSupplier.get(); - if (!logicalType.equals(ColumnType.NESTED_DATA) && fields.size() == 1 && rootFieldPath.equals(StringUtils.fromUtf8(fields.get(0)))) { + List allFields = getAllNestedFields(); + if (!logicalType.equals(ColumnType.NESTED_DATA) + && allFields.size() == 1 + && rootFieldPath.equals(Iterables.getOnlyElement(allFields).fieldName)) { return makeColumnValueSelector( ImmutableList.of(), null /* not used */, offset ); } - if (compressedRawColumn == null) { - compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); + final Supplier valueProvider; + if (compressedRawColumnSupplier != null) { + if (compressedRawColumn == null) { + compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); + } + valueProvider = () -> { + final ByteBuffer valueBuffer = compressedRawColumn.get(offset.getOffset()); + return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); + }; + } else { + List, ColumnValueSelector>> fieldSelectors = + getAllParsedNestedFields().stream() + .map(pair -> Pair.of( + pair.rhs, + ((DictionaryEncodedColumn) getColumnHolder( + pair.lhs.fieldName, + pair.lhs.fieldIndex + ).getColumn()).makeColumnValueSelector(offset) + )) + .collect(Collectors.toList()); + valueProvider = () -> { + List elements = fieldSelectors + .stream() + .map(c -> StructuredDataBuilder.Element.of(c.lhs, c.rhs.getObject())) + .collect(Collectors.toList()); + return new StructuredDataBuilder(elements).build(); + }; } - return new ObjectColumnSelector() { @Nullable @@ -361,8 +396,7 @@ public Object getObject() if (nullValues.get(offset.getOffset())) { return null; } - final ByteBuffer valueBuffer = compressedRawColumn.get(offset.getOffset()); - return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); + return valueProvider.get(); } @Override @@ -382,17 +416,48 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) { - final TKeyDictionary fields = fieldsSupplier.get(); - if (!logicalType.equals(ColumnType.NESTED_DATA) && fields.size() == 1 && rootFieldPath.equals(StringUtils.fromUtf8(fields.get(0)))) { + List>> allFields = getAllParsedNestedFields(); + if (!logicalType.equals(ColumnType.NESTED_DATA) + && allFields.size() == 1 + && rootFieldPath.equals(Iterables.getOnlyElement(allFields).lhs.fieldName)) { return makeVectorObjectSelector( Collections.emptyList(), null /* not used */, offset ); } - if (compressedRawColumn == null) { - compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); + + AtomicInteger atomicOffset = new AtomicInteger(-1); + final Supplier valueProvider; + if (compressedRawColumnSupplier != null) { + if (compressedRawColumn == null) { + compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); + } + valueProvider = () -> { + final ByteBuffer valueBuffer = compressedRawColumn.get(atomicOffset.get()); + return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); + }; + } else { + AtomicIntegerReadableOffset readableAtomicOffset = new AtomicIntegerReadableOffset(atomicOffset); + final List, ColumnValueSelector>> fieldSelectors = + allFields.stream() + .map(pair -> Pair.of( + pair.rhs, + ((DictionaryEncodedColumn) getColumnHolder( + pair.lhs.fieldName, + pair.lhs.fieldIndex + ).getColumn()).makeColumnValueSelector(readableAtomicOffset) + )) + .collect(Collectors.toList()); + valueProvider = () -> { + List elements = fieldSelectors + .stream() + .map(c -> StructuredDataBuilder.Element.of(c.lhs, c.rhs.getObject())) + .collect(Collectors.toList()); + return new StructuredDataBuilder(elements).build(); + }; } + return new VectorObjectSelector() { final Object[] vector = new Object[offset.getMaxVectorSize()]; @@ -434,8 +499,8 @@ private Object getForOffset(int offset) // maybe someday can use bitmap batch operations for nulls? return null; } - final ByteBuffer valueBuffer = compressedRawColumn.get(offset); - return STRATEGY.fromByteBuffer(valueBuffer, valueBuffer.remaining()); + atomicOffset.set(offset); + return valueProvider.get(); } @Override @@ -455,8 +520,10 @@ public int getMaxVectorSize() @Override public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset offset) { - final TKeyDictionary fields = fieldsSupplier.get(); - if (!logicalType.equals(ColumnType.NESTED_DATA) && fields.size() == 1 && rootFieldPath.equals(StringUtils.fromUtf8(fields.get(0)))) { + List allFields = getAllNestedFields(); + if (!logicalType.equals(ColumnType.NESTED_DATA) + && allFields.size() == 1 + && rootFieldPath.equals(Iterables.getOnlyElement(allFields).fieldName)) { return makeVectorValueSelector( Collections.emptyList(), null /* not used */, @@ -469,10 +536,10 @@ public VectorValueSelector makeVectorValueSelector(ReadableVectorOffset offset) @Override public int getLength() { - if (compressedRawColumn == null) { + if (compressedRawColumn == null && compressedRawColumnSupplier != null) { compressedRawColumn = closer.register(compressedRawColumnSupplier.get()); } - return compressedRawColumn.size(); + return compressedRawColumnSupplier != null ? compressedRawColumn.size() : -1; } @Override @@ -481,6 +548,7 @@ public void close() CloseableUtils.closeAndWrapExceptions(closer); } + /** * Create a selector for a nested path. * @@ -496,52 +564,54 @@ public DimensionSelector makeDimensionSelector( ReadableOffset readableOffset ) { - final TKeyDictionary fields = fieldsSupplier.get(); - final String field = getField(path); - Preconditions.checkNotNull(field, "Null field"); - final int fieldIndex = fields.indexOf(StringUtils.toUtf8ByteBuffer(field)); - if (fieldIndex >= 0) { - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder(field, fieldIndex).getColumn(); + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( + ((NestedField) field).fieldName, + ((NestedField) field).fieldIndex + ).getColumn(); return col.makeDimensionSelector(readableOffset, extractionFn); - } - if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final NestedPathPart lastPath = path.get(path.size() - 1); - final String arrayField = getField(path.subList(0, path.size() - 1)); - final int arrayFieldIndex = fields.indexOf(StringUtils.toUtf8ByteBuffer(arrayField)); - if (arrayFieldIndex >= 0) { - final int elementNumber = ((NestedPathArrayElement) lastPath).getIndex(); - if (elementNumber < 0) { - throw new IAE("Cannot make array element selector for path [%s], negative array index not supported for this selector", path); - } - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder(arrayField, arrayFieldIndex).getColumn(); - ColumnValueSelector arraySelector = col.makeColumnValueSelector(readableOffset); - return new BaseSingleValueDimensionSelector() + } else if (field instanceof NestedArrayElement) { + final NestedArrayElement arrayField = (NestedArrayElement) field; + final int elementNumber = arrayField.elementNumber; + if (elementNumber < 0) { + throw new IAE( + "Cannot make array element selector for path [%s], negative array index not supported for this selector", + path + ); + } + DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( + arrayField.nestedField.fieldName, + arrayField.nestedField.fieldIndex + ).getColumn(); + ColumnValueSelector arraySelector = col.makeColumnValueSelector(readableOffset); + return new BaseSingleValueDimensionSelector() + { + @Nullable + @Override + protected String getValue() { - @Nullable - @Override - protected String getValue() - { - Object o = arraySelector.getObject(); - if (o instanceof Object[]) { - Object[] array = (Object[]) o; - if (elementNumber < array.length) { - Object element = array[elementNumber]; - if (element == null) { - return null; - } - return String.valueOf(element); + Object o = arraySelector.getObject(); + if (o instanceof Object[]) { + Object[] array = (Object[]) o; + if (elementNumber < array.length) { + Object element = array[elementNumber]; + if (element == null) { + return null; } + return String.valueOf(element); } - return null; } + return null; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + arraySelector.inspectRuntimeShape(inspector); + } + }; - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - arraySelector.inspectRuntimeShape(inspector); - } - }; - } } return DimensionSelector.constant(null); } @@ -560,88 +630,84 @@ public ColumnValueSelector makeColumnValueSelector( ReadableOffset readableOffset ) { - final TKeyDictionary fields = fieldsSupplier.get(); - final String field = getField(path); - - Preconditions.checkNotNull(field, "Null field"); - final int fieldIndex = fields.indexOf(StringUtils.toUtf8ByteBuffer(field)); - if (fieldIndex >= 0) { - BaseColumn col = getColumnHolder(field, fieldIndex).getColumn(); - return col.makeColumnValueSelector(readableOffset); - } - if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final NestedPathPart lastPath = path.get(path.size() - 1); - final String arrayField = getField(path.subList(0, path.size() - 1)); - final int arrayFieldIndex = fields.indexOf(StringUtils.toUtf8ByteBuffer(arrayField)); - if (arrayFieldIndex >= 0) { - final int elementNumber = ((NestedPathArrayElement) lastPath).getIndex(); - if (elementNumber < 0) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build("Cannot make array element selector for path [%s], negative array index not supported for this selector", path); - } - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( - arrayField, - arrayFieldIndex - ).getColumn(); - ColumnValueSelector arraySelector = col.makeColumnValueSelector(readableOffset); - return new ColumnValueSelector<>() + Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + final NestedField nestedField = (NestedField) field; + return getColumnHolder(nestedField.fieldName, nestedField.fieldIndex).getColumn() + .makeColumnValueSelector(readableOffset); + } else if (field instanceof NestedArrayElement) { + final NestedArrayElement arrayField = (NestedArrayElement) field; + final int elementNumber = arrayField.elementNumber; + if (elementNumber < 0) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build( + "Cannot make array element selector for path [%s], negative array index not supported for this selector", + path + ); + } + DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( + arrayField.nestedField.fieldName, + arrayField.nestedField.fieldIndex + ).getColumn(); + ColumnValueSelector arraySelector = col.makeColumnValueSelector(readableOffset); + return new ColumnValueSelector<>() + { + @Override + public boolean isNull() { - @Override - public boolean isNull() - { - Object o = getObject(); - return !(o instanceof Number); - } + Object o = getObject(); + return !(o instanceof Number); + } - @Override - public long getLong() - { - Object o = getObject(); - return o instanceof Number ? ((Number) o).longValue() : 0L; - } + @Override + public long getLong() + { + Object o = getObject(); + return o instanceof Number ? ((Number) o).longValue() : 0L; + } - @Override - public float getFloat() - { - Object o = getObject(); - return o instanceof Number ? ((Number) o).floatValue() : 0f; - } + @Override + public float getFloat() + { + Object o = getObject(); + return o instanceof Number ? ((Number) o).floatValue() : 0f; + } - @Override - public double getDouble() - { - Object o = getObject(); - return o instanceof Number ? ((Number) o).doubleValue() : 0.0; - } + @Override + public double getDouble() + { + Object o = getObject(); + return o instanceof Number ? ((Number) o).doubleValue() : 0.0; + } - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - arraySelector.inspectRuntimeShape(inspector); - } + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + arraySelector.inspectRuntimeShape(inspector); + } - @Nullable - @Override - public Object getObject() - { - Object o = arraySelector.getObject(); - if (o instanceof Object[]) { - Object[] array = (Object[]) o; - if (elementNumber < array.length) { - return array[elementNumber]; - } + @Nullable + @Override + public Object getObject() + { + Object o = arraySelector.getObject(); + if (o instanceof Object[]) { + Object[] array = (Object[]) o; + if (elementNumber < array.length) { + return array[elementNumber]; } - return null; } + return null; + } + + @Override + public Class classOfObject() + { + return Object.class; + } + }; - @Override - public Class classOfObject() - { - return Object.class; - } - }; - } } return NilColumnValueSelector.instance(); } @@ -653,12 +719,13 @@ public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector ReadableVectorOffset readableOffset ) { - final TKeyDictionary fields = fieldsSupplier.get(); - final String field = getField(path); - Preconditions.checkNotNull(field, "Null field"); - final int fieldIndex = fields.indexOf(StringUtils.toUtf8ByteBuffer(field)); - if (fieldIndex >= 0) { - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder(field, fieldIndex).getColumn(); + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + NestedField nestedField = (NestedField) field; + DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( + nestedField.fieldName, + nestedField.fieldIndex + ).getColumn(); return col.makeSingleValueDimensionVectorSelector(readableOffset); } else { return NilVectorSelector.create(readableOffset); @@ -679,74 +746,69 @@ public VectorObjectSelector makeVectorObjectSelector( ReadableVectorOffset readableOffset ) { - final TKeyDictionary fields = fieldsSupplier.get(); - final String field = getField(path); - Preconditions.checkNotNull(field, "Null field"); - final int fieldIndex = fields.indexOf(StringUtils.toUtf8ByteBuffer(field)); - if (fieldIndex >= 0) { - BaseColumn col = getColumnHolder(field, fieldIndex).getColumn(); - return col.makeVectorObjectSelector(readableOffset); - } - if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final NestedPathPart lastPath = path.get(path.size() - 1); - final String arrayField = getField(path.subList(0, path.size() - 1)); - final int arrayFieldIndex = fields.indexOf(StringUtils.toUtf8ByteBuffer(arrayField)); - if (arrayFieldIndex >= 0) { - final int elementNumber = ((NestedPathArrayElement) lastPath).getIndex(); - if (elementNumber < 0) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build("Cannot make array element selector for path [%s], negative array index not supported for this selector", path); - } - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( - arrayField, - arrayFieldIndex - ).getColumn(); - VectorObjectSelector arraySelector = col.makeVectorObjectSelector(readableOffset); + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + NestedField nestedField = (NestedField) field; + return getColumnHolder(nestedField.fieldName, nestedField.fieldIndex).getColumn() + .makeVectorObjectSelector(readableOffset); + } else if (field instanceof NestedArrayElement) { + final NestedArrayElement arrayField = (NestedArrayElement) field; + final int elementNumber = arrayField.elementNumber; + if (elementNumber < 0) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build( + "Cannot make array element selector for path [%s], negative array index not supported for this selector", + path + ); + } + VectorObjectSelector arraySelector = getColumnHolder( + arrayField.nestedField.fieldName, + arrayField.nestedField.fieldIndex + ).getColumn().makeVectorObjectSelector(readableOffset); + return new VectorObjectSelector() + { + private final Object[] elements = new Object[arraySelector.getMaxVectorSize()]; + private int id = ReadableVectorInspector.NULL_ID; - return new VectorObjectSelector() + @Override + public Object[] getObjectVector() { - private final Object[] elements = new Object[arraySelector.getMaxVectorSize()]; - private int id = ReadableVectorInspector.NULL_ID; - - @Override - public Object[] getObjectVector() - { - if (readableOffset.getId() != id) { - final Object[] delegate = arraySelector.getObjectVector(); - for (int i = 0; i < arraySelector.getCurrentVectorSize(); i++) { - Object maybeArray = delegate[i]; - if (maybeArray instanceof Object[]) { - Object[] anArray = (Object[]) maybeArray; - if (elementNumber < anArray.length) { - final Object element = anArray[elementNumber]; - elements[i] = element; - } else { - elements[i] = null; - } + if (readableOffset.getId() != id) { + final Object[] delegate = arraySelector.getObjectVector(); + for (int i = 0; i < arraySelector.getCurrentVectorSize(); i++) { + Object maybeArray = delegate[i]; + if (maybeArray instanceof Object[]) { + Object[] anArray = (Object[]) maybeArray; + if (elementNumber < anArray.length) { + final Object element = anArray[elementNumber]; + elements[i] = element; } else { elements[i] = null; } + } else { + elements[i] = null; } - id = readableOffset.getId(); } - return elements; + id = readableOffset.getId(); } + return elements; + } - @Override - public int getMaxVectorSize() - { - return arraySelector.getMaxVectorSize(); - } + @Override + public int getMaxVectorSize() + { + return arraySelector.getMaxVectorSize(); + } - @Override - public int getCurrentVectorSize() - { - return arraySelector.getCurrentVectorSize(); - } - }; - } + @Override + public int getCurrentVectorSize() + { + return arraySelector.getCurrentVectorSize(); + } + }; } + return NilVectorSelector.create(readableOffset); } @@ -764,137 +826,132 @@ public VectorValueSelector makeVectorValueSelector( ReadableVectorOffset readableOffset ) { - final TKeyDictionary fields = fieldsSupplier.get(); - final String field = getField(path); - Preconditions.checkNotNull(field, "Null field"); - final int fieldIndex = fields.indexOf(StringUtils.toUtf8ByteBuffer(field)); - if (fieldIndex >= 0) { - BaseColumn col = getColumnHolder(field, fieldIndex).getColumn(); - return col.makeVectorValueSelector(readableOffset); - } - if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final NestedPathPart lastPath = path.get(path.size() - 1); - final String arrayField = getField(path.subList(0, path.size() - 1)); - final int arrayFieldIndex = fields.indexOf(StringUtils.toUtf8ByteBuffer(arrayField)); - if (arrayFieldIndex >= 0) { - final int elementNumber = ((NestedPathArrayElement) lastPath).getIndex(); - if (elementNumber < 0) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build("Cannot make array element selector for path [%s], negative array index not supported for this selector", path); - } - DictionaryEncodedColumn col = (DictionaryEncodedColumn) getColumnHolder( - arrayField, - arrayFieldIndex - ).getColumn(); - VectorObjectSelector arraySelector = col.makeVectorObjectSelector(readableOffset); + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + NestedField nestedField = (NestedField) field; + return getColumnHolder(nestedField.fieldName, nestedField.fieldIndex).getColumn() + .makeVectorValueSelector(readableOffset); + } else if (field instanceof NestedArrayElement) { + final NestedArrayElement arrayField = (NestedArrayElement) field; + final int elementNumber = arrayField.elementNumber; + if (elementNumber < 0) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build( + "Cannot make array element selector for path [%s], negative array index not supported for this selector", + path + ); + } + VectorObjectSelector arraySelector = getColumnHolder( + arrayField.nestedField.fieldName, + arrayField.nestedField.fieldIndex + ).getColumn().makeVectorObjectSelector(readableOffset); + + return new VectorValueSelector() + { + private final long[] longs = new long[readableOffset.getMaxVectorSize()]; + private final double[] doubles = new double[readableOffset.getMaxVectorSize()]; + private final float[] floats = new float[readableOffset.getMaxVectorSize()]; + private final boolean[] nulls = new boolean[readableOffset.getMaxVectorSize()]; + private int id = ReadableVectorInspector.NULL_ID; - return new VectorValueSelector() + private void computeNumbers() { - private final long[] longs = new long[readableOffset.getMaxVectorSize()]; - private final double[] doubles = new double[readableOffset.getMaxVectorSize()]; - private final float[] floats = new float[readableOffset.getMaxVectorSize()]; - private final boolean[] nulls = new boolean[readableOffset.getMaxVectorSize()]; - private int id = ReadableVectorInspector.NULL_ID; - - private void computeNumbers() - { - if (readableOffset.getId() != id) { - final Object[] maybeArrays = arraySelector.getObjectVector(); - for (int i = 0; i < arraySelector.getCurrentVectorSize(); i++) { - Object maybeArray = maybeArrays[i]; - if (maybeArray instanceof Object[]) { - Object[] anArray = (Object[]) maybeArray; - if (elementNumber < anArray.length) { - if (anArray[elementNumber] instanceof Number) { - Number n = (Number) anArray[elementNumber]; - longs[i] = n.longValue(); - doubles[i] = n.doubleValue(); - floats[i] = n.floatValue(); + if (readableOffset.getId() != id) { + final Object[] maybeArrays = arraySelector.getObjectVector(); + for (int i = 0; i < arraySelector.getCurrentVectorSize(); i++) { + Object maybeArray = maybeArrays[i]; + if (maybeArray instanceof Object[]) { + Object[] anArray = (Object[]) maybeArray; + if (elementNumber < anArray.length) { + if (anArray[elementNumber] instanceof Number) { + Number n = (Number) anArray[elementNumber]; + longs[i] = n.longValue(); + doubles[i] = n.doubleValue(); + floats[i] = n.floatValue(); + nulls[i] = false; + } else { + Double d = anArray[elementNumber] instanceof String + ? Doubles.tryParse((String) anArray[elementNumber]) + : null; + if (d != null) { + longs[i] = d.longValue(); + doubles[i] = d; + floats[i] = d.floatValue(); nulls[i] = false; } else { - Double d = anArray[elementNumber] instanceof String - ? Doubles.tryParse((String) anArray[elementNumber]) - : null; - if (d != null) { - longs[i] = d.longValue(); - doubles[i] = d; - floats[i] = d.floatValue(); - nulls[i] = false; - } else { - nullElement(i); - } + nullElement(i); } - } else { - nullElement(i); } } else { - // not an array? nullElement(i); } + } else { + // not an array? + nullElement(i); } - id = readableOffset.getId(); } + id = readableOffset.getId(); } + } - private void nullElement(int i) - { - longs[i] = 0L; - doubles[i] = 0L; - floats[i] = 0L; - nulls[i] = true; - } + private void nullElement(int i) + { + longs[i] = 0L; + doubles[i] = 0L; + floats[i] = 0L; + nulls[i] = true; + } - @Override - public long[] getLongVector() - { - if (readableOffset.getId() != id) { - computeNumbers(); - } - return longs; + @Override + public long[] getLongVector() + { + if (readableOffset.getId() != id) { + computeNumbers(); } + return longs; + } - @Override - public float[] getFloatVector() - { - if (readableOffset.getId() != id) { - computeNumbers(); - } - return floats; + @Override + public float[] getFloatVector() + { + if (readableOffset.getId() != id) { + computeNumbers(); } + return floats; + } - @Override - public double[] getDoubleVector() - { - if (readableOffset.getId() != id) { - computeNumbers(); - } - return doubles; + @Override + public double[] getDoubleVector() + { + if (readableOffset.getId() != id) { + computeNumbers(); } + return doubles; + } - @Nullable - @Override - public boolean[] getNullVector() - { - if (readableOffset.getId() != id) { - computeNumbers(); - } - return nulls; + @Nullable + @Override + public boolean[] getNullVector() + { + if (readableOffset.getId() != id) { + computeNumbers(); } + return nulls; + } - @Override - public int getMaxVectorSize() - { - return arraySelector.getMaxVectorSize(); - } + @Override + public int getMaxVectorSize() + { + return arraySelector.getMaxVectorSize(); + } - @Override - public int getCurrentVectorSize() - { - return arraySelector.getCurrentVectorSize(); - } - }; - } + @Override + public int getCurrentVectorSize() + { + return arraySelector.getCurrentVectorSize(); + } + }; } return NilVectorSelector.create(readableOffset); } @@ -904,19 +961,13 @@ public int getCurrentVectorSize() @Override public Set getFieldTypes(List path) { - final TKeyDictionary fields = fieldsSupplier.get(); - String field = getField(path); - int index = fields.indexOf(StringUtils.toUtf8ByteBuffer(field)); - // if index is negative, check for an array element accessor in the path - if (index < 0) { - if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final String arrayField = getField(path.subList(0, path.size() - 1)); - index = fields.indexOf(StringUtils.toUtf8ByteBuffer(arrayField)); - } - if (index < 0) { - return null; - } - final Set arrayFieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(index).getByteValue()); + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + return FieldTypeInfo.convertToSet(fieldInfo.getTypes(((NestedField) field).fieldIndex).getByteValue()); + } else if (field instanceof NestedArrayElement) { + final NestedArrayElement arrayField = (NestedArrayElement) field; + final Set arrayFieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(arrayField.nestedField.fieldIndex) + .getByteValue()); final Set elementTypes = Sets.newHashSetWithExpectedSize(arrayFieldTypes.size()); for (ColumnType type : arrayFieldTypes) { if (type.isArray()) { @@ -927,27 +978,24 @@ public Set getFieldTypes(List path) } return elementTypes; } - return FieldTypeInfo.convertToSet(fieldInfo.getTypes(index).getByteValue()); + return null; } @Nullable @Override public ColumnType getFieldLogicalType(List path) { - final String field = getField(path); - final Set fieldTypes; - int index = fieldsSupplier.get().indexOf(StringUtils.toUtf8ByteBuffer(field)); - ColumnType leastRestrictiveType = null; - if (index < 0) { - if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final String arrayField = getField(path.subList(0, path.size() - 1)); - index = fieldsSupplier.get().indexOf(StringUtils.toUtf8ByteBuffer(arrayField)); - } - if (index < 0) { - return null; - } - fieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(index).getByteValue()); - for (ColumnType type : fieldTypes) { + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + final Set fieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(((NestedField) field).fieldIndex) + .getByteValue()); + return ColumnType.leastRestrictiveType(fieldTypes); + } else if (field instanceof NestedArrayElement) { + final NestedArrayElement arrayField = (NestedArrayElement) field; + final Set arrayFieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(arrayField.nestedField.fieldIndex) + .getByteValue()); + ColumnType leastRestrictiveType = null; + for (ColumnType type : arrayFieldTypes) { if (type.isArray()) { leastRestrictiveType = ColumnType.leastRestrictiveType( leastRestrictiveType, @@ -957,39 +1005,34 @@ public ColumnType getFieldLogicalType(List path) leastRestrictiveType = ColumnType.leastRestrictiveType(leastRestrictiveType, type); } } - } else { - fieldTypes = FieldTypeInfo.convertToSet(fieldInfo.getTypes(index).getByteValue()); - leastRestrictiveType = ColumnType.leastRestrictiveType(fieldTypes); + return leastRestrictiveType; + } - return leastRestrictiveType; + return null; } @Nullable @Override - public ColumnHolder getColumnHolder(List path) + public BaseColumnHolder getColumnHolder(List path) { - final TKeyDictionary fields = fieldsSupplier.get(); - final String field = getField(path); - final int fieldIndex = fields.indexOf(StringUtils.toUtf8ByteBuffer(field)); - return getColumnHolder(field, fieldIndex); + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + final NestedField nestedField = (NestedField) field; + return getColumnHolder(nestedField.fieldName, nestedField.fieldIndex); + } + return null; } @Nullable @Override public ColumnIndexSupplier getColumnIndexSupplier(List path) { - final TKeyDictionary fields = fieldsSupplier.get(); - final String field = getField(path); - int fieldIndex = fields.indexOf(StringUtils.toUtf8ByteBuffer(field)); - if (fieldIndex >= 0) { - return getColumnHolder(field, fieldIndex).getIndexSupplier(); - } - if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { - final String arrayField = getField(path.subList(0, path.size() - 1)); - final int arrayFieldIndex = fields.indexOf(StringUtils.toUtf8ByteBuffer(arrayField)); - if (arrayFieldIndex >= 0) { - return NoIndexesColumnIndexSupplier.getInstance(); - } + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + final NestedField nestedField = (NestedField) field; + return getColumnHolder(nestedField.fieldName, nestedField.fieldIndex).getIndexSupplier(); + } else if (field instanceof NestedArrayElement) { + return NoIndexesColumnIndexSupplier.getInstance(); } return null; } @@ -997,13 +1040,12 @@ public ColumnIndexSupplier getColumnIndexSupplier(List path) @Override public boolean isNumeric(List path) { - final TKeyDictionary fields = fieldsSupplier.get(); - final String field = getField(path); - final int fieldIndex = fields.indexOf(StringUtils.toUtf8ByteBuffer(field)); - if (fieldIndex < 0) { - return true; + final Field field = getNestedFieldOrNestedArrayElementFromPath(path); + if (field instanceof NestedField) { + final NestedField nestedField = (NestedField) field; + return getColumnHolder(nestedField.fieldName, nestedField.fieldIndex).getCapabilities().isNumeric(); } - return getColumnHolder(field, fieldIndex).getCapabilities().isNumeric(); + return true; } @SuppressWarnings("unchecked") @@ -1206,4 +1248,94 @@ public int compare(Object o1, Object o2) return Integer.compare(((Number) o1).intValue(), ((Number) o2).intValue()); } } + + private List getAllNestedFields() + { + TKeyDictionary fields = fieldsSupplier.get(); + List allFields = new ArrayList<>(fields.size()); + for (int i = 0; i < fields.size(); i++) { + String field = StringUtils.fromUtf8(fields.get(i)); + allFields.add(new NestedField(field, i)); + } + return allFields; + } + + private List>> getAllParsedNestedFields() + { + TKeyDictionary fields = fieldsSupplier.get(); + List>> allFields = new ArrayList<>(fields.size()); + for (int i = 0; i < fields.size(); i++) { + String field = StringUtils.fromUtf8(fields.get(i)); + allFields.add(Pair.of(new NestedField(field, i), parsePath(field))); + } + return allFields; + } + + /** + * Returns a representation of a field or array element within a nested object structure, given a path. + *

+ * Returns null if the path does not correspond to any field or array element. + */ + @Nullable + private Field getNestedFieldOrNestedArrayElementFromPath(List path) + { + TKeyDictionary fields = fieldsSupplier.get(); + List> parsed = new ArrayList<>(fields.size()); + for (int i = 0; i < fields.size(); i++) { + String field = StringUtils.fromUtf8(fields.get(i)); + parsed.add(parsePath(field)); + if (parsed.get(i).equals(path)) { + return new NestedField(field, i); + } + } + if (!path.isEmpty() && path.get(path.size() - 1) instanceof NestedPathArrayElement) { + List arrayPath = path.subList(0, path.size() - 1); + for (int i = 0; i < fields.size(); i++) { + if (parsed.get(i).equals(arrayPath)) { + return new NestedArrayElement( + new NestedField(StringUtils.fromUtf8(fields.get(i)), i), + ((NestedPathArrayElement) path.get(path.size() - 1)).getIndex() + ); + } + } + } + return null; + } + + /** + * Represents a single target element within a nested object structure. + */ + interface Field + { + } + + /** + * Represents a field located within a nested object hierarchy, could be scalar or array. + */ + private static class NestedField implements Field + { + private final String fieldName; + private final int fieldIndex; + + NestedField(String fieldName, int fieldIndex) + { + this.fieldName = fieldName; + this.fieldIndex = fieldIndex; + } + } + + /** + * Represents an element located within an array field inside a nested object hierarchy. + */ + private static class NestedArrayElement implements Field + { + private final NestedField nestedField; + private final int elementNumber; + + NestedArrayElement(NestedField nestedField, int elementNumber) + { + this.nestedField = nestedField; + this.elementNumber = elementNumber; + } + } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java index 157750b3c7b2..bae670f05545 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java @@ -142,7 +142,7 @@ public ProcessedValue processArrayField( private FixedIndexedWriter longDictionaryWriter; private FixedIndexedWriter doubleDictionaryWriter; private FrontCodedIntArrayIndexedWriter arrayDictionaryWriter; - private CompressedVariableSizedBlobColumnSerializer rawWriter; + @Nullable private CompressedVariableSizedBlobColumnSerializer rawWriter; private ByteBufferWriter nullBitmapWriter; private MutableBitmap nullRowsBitmap; private Map> fieldWriters; @@ -243,12 +243,19 @@ public void openDictionaryWriter(File segmentBaseDir) throws IOException @Override public void open() throws IOException { - rawWriter = new CompressedVariableSizedBlobColumnSerializer( - ColumnSerializerUtils.getInternalFileName(name, RAW_FILE_NAME), - segmentWriteOutMedium, - columnFormatSpec.getObjectStorageCompression() - ); - rawWriter.open(); + if (ObjectStorageEncoding.NONE.equals(columnFormatSpec.getObjectStorageEncoding())) { + rawWriter = null; + } else if (ObjectStorageEncoding.SMILE.equals(columnFormatSpec.getObjectStorageEncoding())) { + rawWriter = new CompressedVariableSizedBlobColumnSerializer( + ColumnSerializerUtils.getInternalFileName(name, RAW_FILE_NAME), + segmentWriteOutMedium, + columnFormatSpec.getObjectStorageCompression() + ); + rawWriter.open(); + } else { + throw DruidException.defensive("Unknown object storage encoding [%s]", columnFormatSpec.getObjectStorageEncoding() + ); + } nullBitmapWriter = new ByteBufferWriter<>( segmentWriteOutMedium, @@ -339,7 +346,9 @@ public void serialize(StructuredData data) throws IOException if (data == null) { nullRowsBitmap.add(rowCount); } - rawWriter.addValue(NestedDataComplexTypeSerde.INSTANCE.toBytes(data)); + if (rawWriter != null) { + rawWriter.addValue(NestedDataComplexTypeSerde.INSTANCE.toBytes(data)); + } if (data != null) { fieldProcessor.processFields(data.getValue()); } @@ -410,7 +419,9 @@ public void writeTo( writeInternal(smoosher, arrayDictionaryWriter, ColumnSerializerUtils.ARRAY_DICTIONARY_FILE_NAME); } } - writeInternal(smoosher, rawWriter, RAW_FILE_NAME); + if (rawWriter != null) { + writeInternal(smoosher, rawWriter, RAW_FILE_NAME); + } if (!nullRowsBitmap.isEmpty()) { writeInternal(smoosher, nullBitmapWriter, ColumnSerializerUtils.NULL_BITMAP_FILE_NAME); } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java index 204f8d2b3ba8..816c7fe5f140 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java @@ -75,7 +75,6 @@ public static NestedDataColumnSupplier read( final Supplier arrayDictionarySupplier; - if (parent != null) { fieldsSupplier = parent.fieldSupplier; fieldInfo = parent.fieldInfo; @@ -130,22 +129,23 @@ public static NestedDataColumnSupplier read( ); } - final ByteBuffer rawBuffer = NestedCommonFormatColumnPartSerde.loadInternalFile( mapper, columnName, NestedCommonFormatColumnSerializer.RAW_FILE_NAME ); - compressedRawColumnSupplier = CompressedVariableSizedBlobColumnSupplier.fromByteBuffer( - ColumnSerializerUtils.getInternalFileName( - columnName, - NestedCommonFormatColumnSerializer.RAW_FILE_NAME - ), - rawBuffer, - byteOrder, - byteOrder, // byte order doesn't matter since serde is byte blobs - mapper - ); + compressedRawColumnSupplier = rawBuffer == null + ? null + : CompressedVariableSizedBlobColumnSupplier.fromByteBuffer( + ColumnSerializerUtils.getInternalFileName( + columnName, + NestedCommonFormatColumnSerializer.RAW_FILE_NAME + ), + rawBuffer, + byteOrder, + byteOrder, // byte order doesn't matter since serde is byte blobs + mapper + ); if (hasNulls) { columnBuilder.setHasNulls(true); final ByteBuffer nullIndexBuffer = NestedCommonFormatColumnPartSerde.loadInternalFile( @@ -186,6 +186,7 @@ public static NestedDataColumnSupplier read( private final String columnName; private final Supplier> fieldSupplier; private final FieldTypeInfo fieldInfo; + @Nullable private final CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier; private final ImmutableBitmap nullValues; private final Supplier> stringDictionarySupplier; @@ -204,7 +205,7 @@ private NestedDataColumnSupplier( String columnName, Supplier> fieldSupplier, FieldTypeInfo fieldInfo, - CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier, + @Nullable CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier, ImmutableBitmap nullValues, Supplier> stringDictionarySupplier, Supplier> longDictionarySupplier, diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV5.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV5.java index bdc90b9d9b4a..2a375538422c 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV5.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV5.java @@ -31,6 +31,7 @@ import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.serde.ColumnSerializerUtils; +import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.List; @@ -51,7 +52,7 @@ public NestedDataColumnV5( String columnName, ColumnType logicalType, ColumnConfig columnConfig, - CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier, + @Nullable CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier, ImmutableBitmap nullValues, Supplier fields, FieldTypeInfo fieldInfo, diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java index 7f584a980698..17d782785a8e 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java @@ -20,7 +20,7 @@ package org.apache.druid.segment.nested; -import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.BaseColumnHolder; import org.apache.druid.segment.column.ComplexColumn; import javax.annotation.Nullable; @@ -40,10 +40,10 @@ public abstract class NestedDataComplexColumn implements NestedVectorColumnSelectorFactory { /** - * Get a {@link ColumnHolder} for a nested field column to retrieve metadata, the column itself, or indexes. + * Get a {@link BaseColumnHolder} for a nested field column to retrieve metadata, the column itself, or indexes. */ @Nullable - public abstract ColumnHolder getColumnHolder(List path); + public abstract BaseColumnHolder getColumnHolder(List path); @Override public Class getClazz() diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldDictionaryEncodedColumn.java index 11b21adbf9d2..261da5ddfc73 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldDictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldDictionaryEncodedColumn.java @@ -169,6 +169,23 @@ public String lookupName(int id) return null; } + public Object lookupObject(int id) + { + final int globalId = dictionary.get(id); + if (globalId < adjustArrayId) { + return lookupGlobalScalarObject(globalId); + } + int[] arr = globalArrayDictionary.get(globalId - adjustArrayId); + if (arr == null) { + return null; + } + final Object[] array = new Object[arr.length]; + for (int i = 0; i < arr.length; i++) { + array[i] = lookupGlobalScalarObject(arr[i]); + } + return array; + } + @Override public int lookupId(String name) { @@ -979,6 +996,7 @@ private void computeVectorsIfNeeded() @Nullable private PeekableIntIterator nullIterator = nullBitmap != null ? nullBitmap.peekableIterator() : null; private int offsetMark = -1; + @Override public double[] getDoubleVector() { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ObjectStorageEncoding.java b/processing/src/main/java/org/apache/druid/segment/nested/ObjectStorageEncoding.java index 3b1c823dbe44..2f21cbba64b9 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ObjectStorageEncoding.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ObjectStorageEncoding.java @@ -25,6 +25,7 @@ public enum ObjectStorageEncoding { + NONE, SMILE; @JsonValue diff --git a/processing/src/main/java/org/apache/druid/segment/nested/StructuredDataBuilder.java b/processing/src/main/java/org/apache/druid/segment/nested/StructuredDataBuilder.java new file mode 100644 index 000000000000..584ca24d18c0 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/nested/StructuredDataBuilder.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.nested; + +import com.google.common.collect.LinkedListMultimap; +import com.google.common.collect.Maps; +import com.google.common.collect.Multimap; +import org.apache.druid.error.DruidException; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class StructuredDataBuilder +{ + + private final List elements; + + StructuredDataBuilder(StructuredDataBuilder.Element... elements) + { + this(List.of(elements)); + } + + StructuredDataBuilder(List elements) + { + this.elements = elements; + } + + /** + * Creates a StructuredDataBuilder from a list of paths and corresponding objects. + */ + StructuredDataBuilder(List> parts, List objects) + { + List elements = new ArrayList<>(); + for (int i = 0; i < parts.size(); i++) { + elements.add(Element.of(parts.get(i), objects.get(i))); + } + this.elements = elements; + } + + public StructuredData build() + { + Object subtree = buildObject(); + return StructuredData.wrap(subtree == null ? Map.of() : subtree); + } + + @Nullable + private Object buildObject() + { + Object simpleObject = null; + Multimap map = LinkedListMultimap.create(); + ArrayList> list = new ArrayList<>(); + + for (Element element : elements) { + if (element.getValue() == null) { + // we can't distinguish between null and missing values in structured data + continue; + } + + if (element.endOfPath()) { + simpleObject = element.getValue(); + continue; + } + + NestedPathPart currentPath = element.getCurrentPath(); + if (currentPath instanceof NestedPathField) { + map.put(((NestedPathField) currentPath).getField(), element.next()); + } else if (currentPath instanceof NestedPathArrayElement) { + int index = ((NestedPathArrayElement) currentPath).getIndex(); + while (list.size() <= index) { + list.add(new ArrayList<>()); + } + list.get(index).add(element.next()); + } + } + + if (simpleObject != null) { + if (!(map.isEmpty() && list.isEmpty())) { + throw DruidException.defensive( + "Error building structured data from paths[%s], cannot have map or array elements when root value is set", + elements + ); + } + return simpleObject; + } else if (!map.isEmpty()) { + if (!list.isEmpty()) { + throw DruidException.defensive( + "Error building structured data from paths[%s], cannot have both map and array elements at the same level", + elements + ); + } + return Maps.transformValues( + map.asMap(), + (mapElements) -> new StructuredDataBuilder(new ArrayList<>(mapElements)).buildObject() + ); + } else if (!list.isEmpty()) { + List resultList = new ArrayList<>(list.size()); + for (List elementList : list) { + resultList.add(new StructuredDataBuilder(elementList).buildObject()); + } + return resultList; + } + return null; + } + + public static class Element + { + final List path; + @Nullable + final Object value; + final int depth; + + Element(List path, Object value, int depth) + { + this.path = path; + this.value = value; + this.depth = depth; + } + + static Element of(List path, Object value) + { + return new Element(path, value, 0); + } + + @Nullable + Object getValue() + { + return value; + } + + NestedPathPart getCurrentPath() + { + return path.get(depth); + } + + boolean endOfPath() + { + return path.size() == depth; + } + + Element next() + { + return new Element(path, value, depth + 1); + } + + @Override + public String toString() + { + return "Element{" + + "path=" + path + + ", value=" + value + + ", depth=" + depth + + '}'; + } + } +} diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java index d41a51f2ea8d..63280ac9eb10 100644 --- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java +++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java @@ -27,6 +27,7 @@ import org.apache.druid.data.input.InputSource; import org.apache.druid.data.input.ResourceInputSource; import org.apache.druid.data.input.impl.DelimitedInputFormat; +import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.LocalInputSource; import org.apache.druid.data.input.impl.TimestampSpec; @@ -39,6 +40,7 @@ import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.segment.AutoTypeColumnSchema; +import org.apache.druid.segment.DefaultColumnFormatConfig; import org.apache.druid.segment.IncrementalIndexSegment; import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.IndexSpec; @@ -49,6 +51,7 @@ import org.apache.druid.segment.TestIndex; import org.apache.druid.segment.column.StringEncodingStrategy; import org.apache.druid.segment.incremental.IncrementalIndexSchema; +import org.apache.druid.segment.nested.NestedCommonFormatColumnFormatSpec; import org.apache.druid.segment.transform.ExpressionTransform; import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.timeline.SegmentId; @@ -66,6 +69,7 @@ import java.util.Collections; import java.util.List; import java.util.function.BiFunction; +import java.util.stream.Collectors; public class NestedDataTestUtils { @@ -91,34 +95,15 @@ public class NestedDataTestUtils DimensionsSpec.builder() .useSchemaDiscovery(true) .build(); + private static final List COLUMN_NAMES = Arrays.asList( + "dim", + "nest_json", + "nester_json", + "variant_json", + "list_json", + "nonexistent" + ); - public static final DimensionsSpec TSV_SCHEMA = - DimensionsSpec.builder() - .setDimensions( - Arrays.asList( - AutoTypeColumnSchema.of("dim"), - AutoTypeColumnSchema.of("nest_json"), - AutoTypeColumnSchema.of("nester_json"), - AutoTypeColumnSchema.of("variant_json"), - AutoTypeColumnSchema.of("list_json"), - AutoTypeColumnSchema.of("nonexistent") - ) - ) - .build(); - - public static final DimensionsSpec TSV_NESTED_SCHEMA = - DimensionsSpec.builder() - .setDimensions( - Arrays.asList( - new NestedDataColumnSchema("dim", 5), - new NestedDataColumnSchema("nest_json", 5), - new NestedDataColumnSchema("nester_json", 5), - new NestedDataColumnSchema("variant_json", 5), - new NestedDataColumnSchema("list_json", 5), - new NestedDataColumnSchema("nonexistent", 5) - ) - ) - .build(); public static final InputRowSchema AUTO_SCHEMA = new InputRowSchema( TIMESTAMP_SPEC, AUTO_DISCOVERY, @@ -163,30 +148,41 @@ public class NestedDataTestUtils public static List createSimpleSegmentsTsv( TemporaryFolder tempFolder, + NestedCommonFormatColumnFormatSpec spec, Closer closer ) throws Exception { + List dimensionsSpecs = + COLUMN_NAMES.stream() + .map(name -> (DimensionSchema) new AutoTypeColumnSchema(name, null, spec)) + .collect(Collectors.toList()); return createSimpleNestedTestDataTsvSegments( tempFolder, closer, Granularities.NONE, - TSV_SCHEMA, + DimensionsSpec.builder().setDimensions(dimensionsSpecs).build(), true ); } public static List createSimpleSegmentsTsvNested( TemporaryFolder tempFolder, + NestedCommonFormatColumnFormatSpec spec, Closer closer ) throws Exception { + DefaultColumnFormatConfig config = new DefaultColumnFormatConfig(null, null, null); + List dimensionsSpecs = + COLUMN_NAMES.stream() + .map(name -> (DimensionSchema) new NestedDataColumnSchema(name, 5, spec, config)) + .collect(Collectors.toList()); return createSimpleNestedTestDataTsvSegments( tempFolder, closer, Granularities.NONE, - TSV_NESTED_SCHEMA, + DimensionsSpec.builder().setDimensions(dimensionsSpecs).build(), true ); } diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index e297cedc425b..645b4f2840a0 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -22,6 +22,9 @@ import com.fasterxml.jackson.databind.Module; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import junitparams.JUnitParamsRunner; +import junitparams.Parameters; +import junitparams.naming.TestCaseName; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.guice.BuiltInTypesModule; import org.apache.druid.java.util.common.Intervals; @@ -46,6 +49,8 @@ import org.apache.druid.segment.Segment; import org.apache.druid.segment.TestIndex; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.nested.NestedCommonFormatColumnFormatSpec; +import org.apache.druid.segment.nested.ObjectStorageEncoding; import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.segment.virtual.NestedFieldVirtualColumn; import org.apache.druid.testing.InitializedNullHandlingTest; @@ -54,11 +59,13 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; import java.io.IOException; import java.util.Collections; import java.util.List; +@RunWith(JUnitParamsRunner.class) public class NestedDataScanQueryTest extends InitializedNullHandlingTest { private static final Logger LOG = new Logger(NestedDataScanQueryTest.class); @@ -69,6 +76,18 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest @Rule public final TemporaryFolder tempFolder = new TemporaryFolder(); + public static Object[] getNestedColumnFormatSpec() + { + return new Object[]{ + new Object[]{"default", null}, + new Object[]{ + "noneObjectStorageEncoding", + NestedCommonFormatColumnFormatSpec.builder() + .setObjectStorageEncoding(ObjectStorageEncoding.NONE).build() + } + }; + } + @After public void teardown() throws IOException { @@ -267,7 +286,9 @@ public void testIngestAndScanSegmentsRealtimeWithFallback() throws Exception } @Test - public void testIngestAndScanSegmentsTsvV4() throws Exception + @Parameters(method = "getNestedColumnFormatSpec") + @TestCaseName("{0}") + public void testIngestAndScanSegmentsTsvV4(String name, NestedCommonFormatColumnFormatSpec spec) throws Exception { Query scanQuery = Druids.newScanQueryBuilder() .dataSource("test_datasource") @@ -285,7 +306,7 @@ public void testIngestAndScanSegmentsTsvV4() throws Exception .limit(100) .context(ImmutableMap.of()) .build(); - List segs = NestedDataTestUtils.createSimpleSegmentsTsvNested(tempFolder, closer); + List segs = NestedDataTestUtils.createSimpleSegmentsTsvNested(tempFolder, spec, closer); final Sequence seq = helper.runQueryOnSegmentsObjs(segs, scanQuery); @@ -295,9 +316,10 @@ public void testIngestAndScanSegmentsTsvV4() throws Exception logResults(results); } - @Test - public void testIngestAndScanSegmentsTsv() throws Exception + @Parameters(method = "getNestedColumnFormatSpec") + @TestCaseName("{0}") + public void testIngestAndScanSegmentsTsv(String name, NestedCommonFormatColumnFormatSpec spec) throws Exception { Query scanQuery = Druids.newScanQueryBuilder() .dataSource("test_datasource") @@ -315,7 +337,7 @@ public void testIngestAndScanSegmentsTsv() throws Exception .limit(100) .context(ImmutableMap.of()) .build(); - List segs = NestedDataTestUtils.createSimpleSegmentsTsv(tempFolder, closer); + List segs = NestedDataTestUtils.createSimpleSegmentsTsv(tempFolder, spec, closer); final Sequence seq = helper.runQueryOnSegmentsObjs(segs, scanQuery); diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java index 39109f5ed049..9338a8415bac 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java @@ -170,11 +170,15 @@ public static Collection constructorFeeder() .setLongColumnCompression(CompressionStrategy.LZF) .setDoubleColumnCompression(CompressionStrategy.LZF) .build(); + + NestedCommonFormatColumnFormatSpec noRawStorage = + NestedCommonFormatColumnFormatSpec.builder().setObjectStorageEncoding(ObjectStorageEncoding.NONE).build(); final List constructors = ImmutableList.of( new Object[]{defaultSpec}, new Object[]{frontCodedKeysAndDicts}, new Object[]{zstdRaw}, - new Object[]{lzf} + new Object[]{lzf}, + new Object[]{noRawStorage} ); return constructors; @@ -455,11 +459,22 @@ private void smokeTest(NestedDataComplexColumn column) throws IOException Assert.assertEquals(ImmutableList.of(nullishPath, vPath, xPath, yPath, zPath), column.getNestedFields()); for (int i = 0; i < DATA.size(); i++) { - Map row = DATA.get(i); + final Map row; + if (ObjectStorageEncoding.NONE.equals(columnFormatSpec.getObjectStorageEncoding())) { + // if raw object is not stored, the derived object will have sorted key and no nulls + row = new TreeMap<>(DATA.get(i)); + row.entrySet().removeIf(entry -> entry.getValue() == null); + } else { + row = DATA.get(i); + } Assert.assertEquals( JSON_MAPPER.writeValueAsString(row), JSON_MAPPER.writeValueAsString(StructuredData.unwrap(rawSelector.getObject())) ); + Assert.assertEquals( + JSON_MAPPER.writeValueAsString(row), + JSON_MAPPER.writeValueAsString(StructuredData.unwrap(column.getRowValue(i))) + ); testPath(row, i, "v", vSelector, vDimSelector, vValueIndex, vPredicateIndex, vNulls, null); testPath(row, i, "x", xSelector, xDimSelector, xValueIndex, xPredicateIndex, xNulls, ColumnType.LONG); @@ -594,11 +609,22 @@ private void smokeTestArrays(NestedDataComplexColumn column) throws IOException int rowCounter = 0; while (offset.withinBounds()) { - Map row = ARRAY_TEST_DATA.get(rowCounter); + final Map row; + if (ObjectStorageEncoding.NONE.equals(columnFormatSpec.getObjectStorageEncoding())) { + // if raw object is not stored, the derived object will have sorted key and no nulls + row = new TreeMap<>(ARRAY_TEST_DATA.get(rowCounter)); + row.entrySet().removeIf(entry -> entry.getValue() == null); + } else { + row = ARRAY_TEST_DATA.get(rowCounter); + } Assert.assertEquals( JSON_MAPPER.writeValueAsString(row), JSON_MAPPER.writeValueAsString(StructuredData.unwrap(rawSelector.getObject())) ); + Assert.assertEquals( + JSON_MAPPER.writeValueAsString(row), + JSON_MAPPER.writeValueAsString(StructuredData.unwrap(column.getRowValue(rowCounter))) + ); Object[] s = (Object[]) row.get("s"); Object[] l = (Object[]) row.get("l"); @@ -650,7 +676,14 @@ private void smokeTestArrays(NestedDataComplexColumn column) throws IOException for (int i = 0; i < vectorOffset.getCurrentVectorSize(); i++, rowCounter++) { - Map row = ARRAY_TEST_DATA.get(rowCounter); + final Map row; + if (ObjectStorageEncoding.NONE.equals(columnFormatSpec.getObjectStorageEncoding())) { + // if raw object is not stored, the derived object will have sorted key and no nulls + row = new TreeMap<>(ARRAY_TEST_DATA.get(rowCounter)); + row.entrySet().removeIf(entry -> entry.getValue() == null); + } else { + row = ARRAY_TEST_DATA.get(rowCounter); + } Assert.assertEquals( JSON_MAPPER.writeValueAsString(row), JSON_MAPPER.writeValueAsString(StructuredData.unwrap(rawVector[i])) @@ -699,7 +732,14 @@ private void smokeTestArrays(NestedDataComplexColumn column) throws IOException final boolean[] dElementNulls = dElementFilteredVectorSelector.getNullVector(); for (int i = 0; i < bitmapVectorOffset.getCurrentVectorSize(); i++, rowCounter += 2) { - Map row = ARRAY_TEST_DATA.get(rowCounter); + final Map row; + if (ObjectStorageEncoding.NONE.equals(columnFormatSpec.getObjectStorageEncoding())) { + // if raw object is not stored, the derived object will have sorted key and no nulls + row = new TreeMap<>(ARRAY_TEST_DATA.get(rowCounter)); + row.entrySet().removeIf(entry -> entry.getValue() == null); + } else { + row = ARRAY_TEST_DATA.get(rowCounter); + } Assert.assertEquals( JSON_MAPPER.writeValueAsString(row), JSON_MAPPER.writeValueAsString(StructuredData.unwrap(rawVector[i])) diff --git a/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataBuilderTest.java b/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataBuilderTest.java new file mode 100644 index 000000000000..fa49e1104dc6 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataBuilderTest.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.nested; + +import org.apache.druid.error.DruidException; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +public class StructuredDataBuilderTest +{ + @Test + public void testBuildSingleDepth() + { + Object[] array = new Object[]{1, 2}; + StructuredDataBuilder.Element childArrayElement = new StructuredDataBuilder.Element( + List.of(new NestedPathArrayElement(1)), + array, + 1 + ); + Assert.assertEquals(StructuredData.wrap(array), new StructuredDataBuilder(childArrayElement).build()); + + // [null, [1, 2]] + StructuredDataBuilder.Element arrayElement = new StructuredDataBuilder.Element( + List.of(new NestedPathArrayElement(1)), + array, + 0 + ); + Assert.assertEquals( + StructuredData.wrap(Arrays.asList(null, array)), + new StructuredDataBuilder(arrayElement).build() + ); + + StructuredDataBuilder.Element nullElement = new StructuredDataBuilder.Element( + List.of(new NestedPathField("y")), + null, + 0 + ); + Assert.assertEquals(StructuredData.wrap(Map.of()), new StructuredDataBuilder(nullElement).build()); + + // {"x": "hi"} + StructuredDataBuilder.Element mapElement = new StructuredDataBuilder.Element( + List.of(new NestedPathField("x")), + "hi", + 0 + ); + Assert.assertEquals( + StructuredData.wrap(Map.of("x", "hi")), + new StructuredDataBuilder(mapElement, nullElement).build() + ); + } + + @Test + public void testBuildRootPath() + { + // "root-val" + StructuredDataBuilder.Element rootElement = new StructuredDataBuilder.Element( + List.of(), + "root-val", + 0 + ); + Assert.assertEquals(StructuredData.wrap("root-val"), new StructuredDataBuilder(rootElement).build()); + } + + @Test + public void testBuildArrayMultipleDepths() + { + // [[1], [null, [2]]] + Object[] array = new Object[]{2}; + StructuredDataBuilder.Element element1 = new StructuredDataBuilder.Element( + List.of(new NestedPathArrayElement(0), new NestedPathArrayElement(0)), + 1, + 0 + ); + StructuredDataBuilder.Element element2 = new StructuredDataBuilder.Element( + List.of(new NestedPathArrayElement(1), new NestedPathArrayElement(1)), + array, + 0 + ); + List expected = List.of(List.of(1), Arrays.asList(null, array)); + Assert.assertEquals(StructuredData.wrap(expected), new StructuredDataBuilder(element1, element2).build()); + } + + @Test + public void testBuildMapMultipleDepths() + { + // {"x": {"y": "hi-xy", "z": "hi-xz"}, "yz": {"z": "hi-yz"}} + StructuredDataBuilder.Element xyElement = new StructuredDataBuilder.Element( + List.of(new NestedPathField("x"), new NestedPathField("y")), + "hi-xy", + 0 + ); + StructuredDataBuilder.Element xzElement = new StructuredDataBuilder.Element( + List.of(new NestedPathField("x"), new NestedPathField("z")), + "hi-xz", + 0 + ); + StructuredDataBuilder.Element yzElement = new StructuredDataBuilder.Element( + List.of(new NestedPathField("yz"), new NestedPathField("z")), + "hi-yz", + 0 + ); + Map expected = Map.of("x", Map.of("y", "hi-xy", "z", "hi-xz"), "yz", Map.of("z", "hi-yz")); + Assert.assertEquals( + StructuredData.wrap(expected), + new StructuredDataBuilder(xyElement, xzElement, yzElement).build() + ); + } + + @Test + public void testBuildMixedMultipleDepths() + { + // {"x": {"y": "hi-xy", "array": ["hi-x-array-0", null, "hi-x-array-2"]}} + StructuredDataBuilder.Element xyElement = new StructuredDataBuilder.Element( + List.of(new NestedPathField("x"), new NestedPathField("y")), + "hi-xy", + 0 + ); + StructuredDataBuilder.Element xArray = new StructuredDataBuilder.Element( + List.of(new NestedPathField("x"), new NestedPathField("array"), new NestedPathArrayElement(0)), + "hi-x-array-0", + 0 + ); + StructuredDataBuilder.Element xArray2 = new StructuredDataBuilder.Element( + List.of(new NestedPathField("x"), new NestedPathField("array"), new NestedPathArrayElement(2)), + "hi-x-array-2", + 0 + ); + + Map expected = Map.of( + "x", + Map.of("y", "hi-xy", "array", Arrays.asList("hi-x-array-0", null, "hi-x-array-2")) + ); + Assert.assertEquals(StructuredData.wrap(expected), new StructuredDataBuilder(xyElement, xArray, xArray2).build()); + } + + @Test + public void testBuildExceptions() + { + StructuredDataBuilder.Element rootElement = new StructuredDataBuilder.Element( + List.of(), + "root-val", + 0 + ); + StructuredDataBuilder.Element mapElement = new StructuredDataBuilder.Element( + List.of(new NestedPathField("x")), + "hi", + 0 + ); + StructuredDataBuilder.Element arrayElement = new StructuredDataBuilder.Element( + List.of(new NestedPathArrayElement(0)), + 1, + 0 + ); + DruidException e1 = Assert.assertThrows( + DruidException.class, + () -> new StructuredDataBuilder(rootElement, mapElement).build() + ); + Assert.assertEquals( + "Error building structured data from paths[[Element{path=[], value=root-val, depth=0}, Element{path=[NestedPathField{field='x'}], value=hi, depth=0}]], " + + "cannot have map or array elements when root value is set", + e1.getMessage() + ); + DruidException e2 = Assert.assertThrows( + DruidException.class, + () -> new StructuredDataBuilder(rootElement, arrayElement).build() + ); + Assert.assertEquals( + "Error building structured data from paths[[Element{path=[], value=root-val, depth=0}, Element{path=[NestedPathArrayElement{index=0}], value=1, depth=0}]], " + + "cannot have map or array elements when root value is set", + e2.getMessage() + ); + DruidException e3 = Assert.assertThrows( + DruidException.class, + () -> new StructuredDataBuilder(mapElement, arrayElement).build() + ); + Assert.assertEquals( + "Error building structured data from paths[[Element{path=[NestedPathField{field='x'}], value=hi, depth=0}, Element{path=[NestedPathArrayElement{index=0}], value=1, depth=0}]], " + + "cannot have both map and array elements at the same level", + e3.getMessage() + ); + } +} diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 97db549de27f..1325dc933731 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -64,7 +64,9 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; +import org.apache.druid.segment.nested.NestedCommonFormatColumnFormatSpec; import org.apache.druid.segment.nested.NestedPathField; +import org.apache.druid.segment.nested.ObjectStorageEncoding; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.segment.virtual.NestedFieldVirtualColumn; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; @@ -79,7 +81,9 @@ import org.junit.Assert; import org.junit.internal.matchers.ThrowableMessageMatcher; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; +import org.mockito.Mockito; import java.util.Arrays; import java.util.Collections; @@ -89,7 +93,7 @@ import static org.hamcrest.MatcherAssert.assertThat; @SqlTestFrameworkConfig.ComponentSupplier(NestedComponentSupplier.class) -public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest +public abstract class CalciteNestedDataQueryTest extends BaseCalciteQueryTest { public static final String DATA_SOURCE = "nested"; public static final String DATA_SOURCE_MIXED = "nested_mix"; @@ -103,7 +107,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .put("t", "2000-01-01") .put("string", "aaa") .put("string_sparse", "zzz") - .put("nest", ImmutableMap.of("x", 100L, "y", 2.02, "z", "300", "mixed", 1L, "mixed2", "1")) + .put("nest", ImmutableMap.of("mixed", 1L, "mixed2", "1", "x", 100L, "y", 2.02, "z", "300")) .put( "nester", ImmutableMap.of("array", ImmutableList.of("a", "b"), "n", ImmutableMap.of("x", "hello")) @@ -120,7 +124,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .put("t", "2000-01-01") .put("string", "ccc") .put("string_sparse", "10") - .put("nest", ImmutableMap.of("x", 200L, "y", 3.03, "z", "abcdef", "mixed", 1.1, "mixed2", 1L)) + .put("nest", ImmutableMap.of("mixed", 1.1, "mixed2", 1L, "x", 200L, "y", 3.03, "z", "abcdef")) .put("long", 3L) .build(), ImmutableMap.builder() @@ -138,7 +142,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest ImmutableMap.builder() .put("t", "2000-01-02") .put("string", "aaa") - .put("nest", ImmutableMap.of("x", 100L, "y", 2.02, "z", "400", "mixed2", 1.1)) + .put("nest", ImmutableMap.of("mixed2", 1.1, "x", 100L, "y", 2.02, "z", "400")) .put("nester", ImmutableMap.of("array", ImmutableList.of("a", "b"), "n", ImmutableMap.of("x", 1L))) .put("long", 5L) .build(), @@ -150,41 +154,80 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .build() ); - public static final InputRowSchema ALL_JSON_COLUMNS = new InputRowSchema( - new TimestampSpec("t", "iso", null), - DimensionsSpec.builder().setDimensions( - ImmutableList.builder() - .add(AutoTypeColumnSchema.of("string")) - .add(AutoTypeColumnSchema.of("nest")) - .add(AutoTypeColumnSchema.of("nester")) - .add(AutoTypeColumnSchema.of("long")) - .add(AutoTypeColumnSchema.of("string_sparse")) - .build() - ).build(), - null - ); + @Nested + public static class DefaultCalciteNestedDataQueryTest extends CalciteNestedDataQueryTest + { + } - public static final InputRowSchema JSON_AND_SCALAR_MIX = new InputRowSchema( - new TimestampSpec("t", "iso", null), - DimensionsSpec.builder().setDimensions( - ImmutableList.builder() - .add(new StringDimensionSchema("string")) - .add(AutoTypeColumnSchema.of("nest")) - .add(AutoTypeColumnSchema.of("nester")) - .add(new LongDimensionSchema("long")) - .add(new StringDimensionSchema("string_sparse")) - .build() - ).build(), - null - ); - public static final List ROWS = - RAW_ROWS.stream().map(raw -> TestDataBuilder.createRow(raw, ALL_JSON_COLUMNS)).collect(Collectors.toList()); + @Nested + public static class NoneObjectStorageCalciteNestedDataQueryTest extends CalciteNestedDataQueryTest + { + public NoneObjectStorageCalciteNestedDataQueryTest() + { + super(); + // Override with none object storage + NestedCommonFormatColumnFormatSpec noneObjectStorage = + NestedCommonFormatColumnFormatSpec.builder().setObjectStorageEncoding(ObjectStorageEncoding.NONE).build(); + Mockito.when(ALL_JSON_COLUMNS.getDimensionsSpec()).thenReturn( + DimensionsSpec.builder().setDimensions( + ImmutableList.builder() + .add(new AutoTypeColumnSchema("string", null, noneObjectStorage)) + .add(new AutoTypeColumnSchema("nest", null, noneObjectStorage)) + .add(new AutoTypeColumnSchema("nester", null, noneObjectStorage)) + .add(new AutoTypeColumnSchema("long", null, noneObjectStorage)) + .add(new AutoTypeColumnSchema("string_sparse", null, noneObjectStorage)) + .build() + ).build()); + Mockito.when(JSON_AND_SCALAR_MIX.getDimensionsSpec()).thenReturn( + DimensionsSpec.builder().setDimensions( + ImmutableList.builder() + .add(new StringDimensionSchema("string")) + .add(new AutoTypeColumnSchema("nest", null, noneObjectStorage)) + .add(new AutoTypeColumnSchema("nester", null, noneObjectStorage)) + .add(new LongDimensionSchema("long")) + .add(new StringDimensionSchema("string_sparse")) + .build() + ).build()); + } + } + + public static final InputRowSchema ALL_JSON_COLUMNS = Mockito.mock(InputRowSchema.class); - public static final List ROWS_MIX = - RAW_ROWS.stream().map(raw -> TestDataBuilder.createRow(raw, JSON_AND_SCALAR_MIX)).collect(Collectors.toList()); + public static final InputRowSchema JSON_AND_SCALAR_MIX = Mockito.mock(InputRowSchema.class); + + public static List constructInputRows(InputRowSchema inputRowSchema) + { + return RAW_ROWS.stream().map(raw -> TestDataBuilder.createRow(raw, inputRowSchema)).collect(Collectors.toList()); + } public static class NestedComponentSupplier extends StandardComponentSupplier { + static { + Mockito.when(ALL_JSON_COLUMNS.getTimestampSpec()).thenReturn( + new TimestampSpec("t", "iso", null)); + Mockito.when(ALL_JSON_COLUMNS.getDimensionsSpec()).thenReturn( + DimensionsSpec.builder().setDimensions( + ImmutableList.builder() + .add(AutoTypeColumnSchema.of("string")) + .add(AutoTypeColumnSchema.of("nest")) + .add(AutoTypeColumnSchema.of("nester")) + .add(AutoTypeColumnSchema.of("long")) + .add(AutoTypeColumnSchema.of("string_sparse")) + .build() + ).build()); + Mockito.when(JSON_AND_SCALAR_MIX.getTimestampSpec()).thenReturn(new TimestampSpec("t", "iso", null)); + Mockito.when(JSON_AND_SCALAR_MIX.getDimensionsSpec()).thenReturn( + DimensionsSpec.builder().setDimensions( + ImmutableList.builder() + .add(new StringDimensionSchema("string")) + .add(AutoTypeColumnSchema.of("nest")) + .add(AutoTypeColumnSchema.of("nester")) + .add(new LongDimensionSchema("long")) + .add(new StringDimensionSchema("string_sparse")) + .build() + ).build()); + } + public NestedComponentSupplier(TempDirProducer tempFolderProducer) { super(tempFolderProducer); @@ -207,7 +250,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .withRollup(false) .build() ) - .rows(ROWS) + .rows(constructInputRows(ALL_JSON_COLUMNS)) .buildMMappedIndex(); final QueryableIndex indexMix11 = @@ -223,7 +266,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .withRollup(false) .build() ) - .rows(ROWS) + .rows(constructInputRows(ALL_JSON_COLUMNS)) .buildMMappedIndex(); @@ -240,7 +283,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .withRollup(false) .build() ) - .rows(ROWS_MIX) + .rows(constructInputRows(JSON_AND_SCALAR_MIX)) .buildMMappedIndex(); final QueryableIndex indexMix21 = @@ -256,7 +299,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .withRollup(false) .build() ) - .rows(ROWS_MIX) + .rows(constructInputRows(JSON_AND_SCALAR_MIX)) .buildMMappedIndex(); final QueryableIndex indexMix22 = @@ -272,7 +315,7 @@ public SpecificSegmentsQuerySegmentWalker addSegmentsToWalker(SpecificSegmentsQu .withRollup(false) .build() ) - .rows(ROWS) + .rows(constructInputRows(ALL_JSON_COLUMNS)) .buildMMappedIndex(); final QueryableIndex indexArrays = @@ -582,7 +625,12 @@ public void testGroupByOnNestedColumn() .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setVirtualColumns( - new ExpressionVirtualColumn("v0", "strlen(\"string\")", ColumnType.LONG, queryFramework().macroTable()) + new ExpressionVirtualColumn( + "v0", + "strlen(\"string\")", + ColumnType.LONG, + queryFramework().macroTable() + ) ) .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) @@ -611,7 +659,12 @@ public void testGroupByOnNestedColumnWithOrderBy() .setInterval(querySegmentSpec(Filtration.eternity())) .setGranularity(Granularities.ALL) .setVirtualColumns( - new ExpressionVirtualColumn("v0", "strlen(\"string\")", ColumnType.LONG, queryFramework().macroTable()) + new ExpressionVirtualColumn( + "v0", + "strlen(\"string\")", + ColumnType.LONG, + queryFramework().macroTable() + ) ) .setDimensions(dimensions(new DefaultDimensionSpec("nester", "d0", ColumnType.NESTED_DATA))) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "v0"))) @@ -1163,7 +1216,12 @@ public void testJsonValueArrays() new NestedFieldVirtualColumn("arrayNestedLong", "$[0]", "v3", ColumnType.LONG_ARRAY) ) .columns("v0", "v1", "v2", "v3") - .columnTypes(ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY, ColumnType.DOUBLE_ARRAY, ColumnType.LONG_ARRAY) + .columnTypes( + ColumnType.STRING_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.DOUBLE_ARRAY, + ColumnType.LONG_ARRAY + ) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .build() ) @@ -1669,23 +1727,23 @@ public void testGroupByRootSingleTypeArrayLongNullsUnnest() .queryContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) .expectedQuery( GroupByQuery.builder() - .setDataSource( - UnnestDataSource.create( - TableDataSource.create(DATA_SOURCE_ARRAYS), - expressionVirtualColumn("j0.unnest", "\"arrayLongNulls\"", ColumnType.LONG_ARRAY), - null - ) - ) - .setInterval(querySegmentSpec(Filtration.eternity())) - .setGranularity(Granularities.ALL) - .setDimensions( - dimensions( - new DefaultDimensionSpec("j0.unnest", "d0", ColumnType.LONG) - ) - ) - .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) - .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) - .build() + .setDataSource( + UnnestDataSource.create( + TableDataSource.create(DATA_SOURCE_ARRAYS), + expressionVirtualColumn("j0.unnest", "\"arrayLongNulls\"", ColumnType.LONG_ARRAY), + null + ) + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("j0.unnest", "d0", ColumnType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .build() ) .expectedResults( ImmutableList.of( @@ -2753,7 +2811,7 @@ public void testJsonAndArrayAgg() ImmutableList.of( new Object[]{ "aaa", - "[{\"x\":100,\"y\":2.02,\"z\":\"300\",\"mixed\":1,\"mixed2\":\"1\"},{\"x\":100,\"y\":2.02,\"z\":\"400\",\"mixed2\":1.1}]", + "[{\"mixed\":1,\"mixed2\":\"1\",\"x\":100,\"y\":2.02,\"z\":\"300\"},{\"mixed2\":1.1,\"x\":100,\"y\":2.02,\"z\":\"400\"}]", 2L }, new Object[]{ @@ -2763,7 +2821,7 @@ public void testJsonAndArrayAgg() }, new Object[]{ "ccc", - "[{\"x\":200,\"y\":3.03,\"z\":\"abcdef\",\"mixed\":1.1,\"mixed2\":1}]", + "[{\"mixed\":1.1,\"mixed2\":1,\"x\":200,\"y\":3.03,\"z\":\"abcdef\"}]", 1L }, new Object[]{ @@ -4646,8 +4704,8 @@ public void testGroupByRootKeys2() ), ImmutableList.of( new Object[]{null, 4L}, - new Object[]{"[\"x\",\"y\",\"z\",\"mixed\",\"mixed2\"]", 2L}, - new Object[]{"[\"x\",\"y\",\"z\",\"mixed2\"]", 1L} + new Object[]{"[\"mixed\",\"mixed2\",\"x\",\"y\",\"z\"]", 2L}, + new Object[]{"[\"mixed2\",\"x\",\"y\",\"z\"]", 1L} ), RowSignature.builder() .add("EXPR$0", ColumnType.STRING_ARRAY) @@ -4912,9 +4970,7 @@ public void testJsonMerging() "nest", "v1", ColumnType.STRING, - ImmutableList.of( - new NestedPathField("x") - ), + ImmutableList.of(new NestedPathField("x")), false, null, false @@ -5020,7 +5076,12 @@ public void testToJsonAndParseJson() ) ) .columns("string", "v0", "v1", "v2") - .columnTypes(ColumnType.STRING, ColumnType.NESTED_DATA, ColumnType.NESTED_DATA, ColumnType.NESTED_DATA) + .columnTypes( + ColumnType.STRING, + ColumnType.NESTED_DATA, + ColumnType.NESTED_DATA, + ColumnType.NESTED_DATA + ) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .build() ), @@ -5987,7 +6048,44 @@ public void testScanAllTypesAuto() "cObjectArray", "cnt" ) - .columnTypes(ColumnType.LONG, ColumnType.STRING, ColumnType.LONG, ColumnType.DOUBLE, ColumnType.LONG, ColumnType.STRING, ColumnType.DOUBLE, ColumnType.ofComplex("json"), ColumnType.LONG_ARRAY, ColumnType.STRING_ARRAY, ColumnType.ofComplex("json"), ColumnType.ofComplex("json"), ColumnType.STRING_ARRAY, ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY, ColumnType.LONG_ARRAY, ColumnType.DOUBLE_ARRAY, ColumnType.DOUBLE_ARRAY, ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY, ColumnType.ofComplex("json"), ColumnType.ofComplex("json"), ColumnType.STRING, ColumnType.STRING, ColumnType.LONG, ColumnType.DOUBLE, ColumnType.ofComplex("json"), ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY, ColumnType.DOUBLE_ARRAY, ColumnType.LONG_ARRAY, ColumnType.ofComplex("json"), ColumnType.LONG_ARRAY, ColumnType.ofComplex("json"), ColumnType.ofComplex("json"), ColumnType.LONG) + .columnTypes( + ColumnType.LONG, + ColumnType.STRING, + ColumnType.LONG, + ColumnType.DOUBLE, + ColumnType.LONG, + ColumnType.STRING, + ColumnType.DOUBLE, + ColumnType.ofComplex("json"), + ColumnType.LONG_ARRAY, + ColumnType.STRING_ARRAY, + ColumnType.ofComplex("json"), + ColumnType.ofComplex("json"), + ColumnType.STRING_ARRAY, + ColumnType.STRING_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.DOUBLE_ARRAY, + ColumnType.DOUBLE_ARRAY, + ColumnType.STRING_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.ofComplex("json"), + ColumnType.ofComplex("json"), + ColumnType.STRING, + ColumnType.STRING, + ColumnType.LONG, + ColumnType.DOUBLE, + ColumnType.ofComplex("json"), + ColumnType.STRING_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.DOUBLE_ARRAY, + ColumnType.LONG_ARRAY, + ColumnType.ofComplex("json"), + ColumnType.LONG_ARRAY, + ColumnType.ofComplex("json"), + ColumnType.ofComplex("json"), + ColumnType.LONG + ) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .build() ), @@ -6317,9 +6415,9 @@ public void testFilterJsonIsNotNull() .build() ), ImmutableList.of( - new Object[]{"{\"x\":100,\"y\":2.02,\"z\":\"300\",\"mixed\":1,\"mixed2\":\"1\"}"}, - new Object[]{"{\"x\":200,\"y\":3.03,\"z\":\"abcdef\",\"mixed\":1.1,\"mixed2\":1}"}, - new Object[]{"{\"x\":100,\"y\":2.02,\"z\":\"400\",\"mixed2\":1.1}"} + new Object[]{"{\"mixed\":1,\"mixed2\":\"1\",\"x\":100,\"y\":2.02,\"z\":\"300\"}"}, + new Object[]{"{\"mixed\":1.1,\"mixed2\":1,\"x\":200,\"y\":3.03,\"z\":\"abcdef\"}"}, + new Object[]{"{\"mixed2\":1.1,\"x\":100,\"y\":2.02,\"z\":\"400\"}"} ), RowSignature.builder() .add("nest", ColumnType.NESTED_DATA) @@ -6364,25 +6462,30 @@ public void testCoalesceOnNestedColumns() testBuilder() .sql( "select c,long,coalesce(c,long) as col " - + " from druid.all_auto, unnest(json_value(arrayNestedLong, '$[1]' returning bigint array)) as u(c) " + + " from druid.all_auto, unnest(json_value(arrayNestedLong, '$[1]' returning bigint array)) as u(c) " ) .expectedQueries( ImmutableList.of( Druids.newScanQueryBuilder() - .dataSource( - UnnestDataSource.create( - new TableDataSource(DATA_SOURCE_ALL), - new NestedFieldVirtualColumn("arrayNestedLong", "$[1]", "j0.unnest", ColumnType.LONG_ARRAY), - null - ) - ) - .virtualColumns(expressionVirtualColumn("v0", "nvl(\"j0.unnest\",\"long\")", ColumnType.LONG)) - .intervals(querySegmentSpec(Filtration.eternity())) - .columns("j0.unnest", "long", "v0") - .columnTypes(ColumnType.LONG, ColumnType.LONG, ColumnType.LONG) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .context(QUERY_CONTEXT_DEFAULT) - .build() + .dataSource( + UnnestDataSource.create( + new TableDataSource(DATA_SOURCE_ALL), + new NestedFieldVirtualColumn( + "arrayNestedLong", + "$[1]", + "j0.unnest", + ColumnType.LONG_ARRAY + ), + null + ) + ) + .virtualColumns(expressionVirtualColumn("v0", "nvl(\"j0.unnest\",\"long\")", ColumnType.LONG)) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("j0.unnest", "long", "v0") + .columnTypes(ColumnType.LONG, ColumnType.LONG, ColumnType.LONG) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .context(QUERY_CONTEXT_DEFAULT) + .build() ) ) .expectedResults( @@ -6399,10 +6502,10 @@ public void testCoalesceOnNestedColumns() ) .expectedSignature( RowSignature.builder() - .add("c", ColumnType.LONG) - .add("long", ColumnType.LONG) - .add("col", ColumnType.LONG) - .build() + .add("c", ColumnType.LONG) + .add("long", ColumnType.LONG) + .add("col", ColumnType.LONG) + .build() ) .run(); }