Skip to content

Commit 4195ffd

Browse files
authored
Benchmark object storage encoding (#18628)
* object-storage * none * check * trigger ci / empty commit * auto
1 parent a72eaa8 commit 4195ffd

File tree

3 files changed

+77
-25
lines changed

3 files changed

+77
-25
lines changed

benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBaseBenchmark.java

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import com.google.inject.Injector;
2828
import com.google.inject.Key;
2929
import com.google.inject.multibindings.MapBinder;
30+
import org.apache.druid.data.input.impl.DimensionsSpec;
3031
import org.apache.druid.frame.FrameType;
3132
import org.apache.druid.frame.read.FrameReader;
3233
import org.apache.druid.frame.segment.FrameSegment;
@@ -61,6 +62,7 @@
6162
import org.apache.druid.query.aggregation.datasketches.tuple.ArrayOfDoublesSketchModule;
6263
import org.apache.druid.query.lookup.LookupExtractor;
6364
import org.apache.druid.query.policy.NoopPolicyEnforcer;
65+
import org.apache.druid.segment.AutoTypeColumnSchema;
6466
import org.apache.druid.segment.IncrementalIndexSegment;
6567
import org.apache.druid.segment.IndexSpec;
6668
import org.apache.druid.segment.PhysicalSegmentInspector;
@@ -74,6 +76,8 @@
7476
import org.apache.druid.segment.generator.SegmentGenerator;
7577
import org.apache.druid.segment.incremental.IncrementalIndex;
7678
import org.apache.druid.segment.join.JoinableFactoryWrapper;
79+
import org.apache.druid.segment.nested.NestedCommonFormatColumnFormatSpec;
80+
import org.apache.druid.segment.nested.ObjectStorageEncoding;
7781
import org.apache.druid.server.QueryStackTests;
7882
import org.apache.druid.server.SpecificSegmentsQuerySegmentWalker;
7983
import org.apache.druid.server.security.AuthConfig;
@@ -100,6 +104,7 @@
100104
import org.apache.druid.sql.calcite.util.testoperator.CalciteTestOperatorModule;
101105
import org.apache.druid.sql.hook.DruidHookDispatcher;
102106
import org.apache.druid.timeline.DataSegment;
107+
import org.apache.druid.timeline.SegmentId;
103108
import org.openjdk.jmh.annotations.Level;
104109
import org.openjdk.jmh.annotations.Param;
105110
import org.openjdk.jmh.annotations.Scope;
@@ -109,6 +114,7 @@
109114

110115
import javax.annotation.Nonnull;
111116
import javax.annotation.Nullable;
117+
import java.util.Collection;
112118
import java.util.Collections;
113119
import java.util.HashMap;
114120
import java.util.HashSet;
@@ -177,6 +183,12 @@ public enum BenchmarkStringEncodingStrategy
177183
})
178184
protected String complexCompression;
179185

186+
@Param({
187+
"NONE",
188+
"SMILE"
189+
})
190+
protected ObjectStorageEncoding jsonObjectStorageEncoding;
191+
180192
@Param({
181193
"explicit",
182194
"auto"
@@ -238,13 +250,14 @@ public void setup() throws JsonProcessingException
238250

239251
Map<DataSegment, IncrementalIndex> realtimeSegments = new HashMap<>();
240252
Map<DataSegment, QueryableIndex> segments = new HashMap<>();
253+
NestedCommonFormatColumnFormatSpec columnFormatSpec = NestedCommonFormatColumnFormatSpec
254+
.builder()
255+
.setObjectStorageEncoding(jsonObjectStorageEncoding)
256+
.build();
241257
for (String dataSource : getDatasources()) {
242-
final SqlBenchmarkDatasets.BenchmarkSchema schema;
243-
if ("auto".equals(schemaType)) {
244-
schema = SqlBenchmarkDatasets.getSchema(dataSource).asAutoDimensions();
245-
} else {
246-
schema = SqlBenchmarkDatasets.getSchema(dataSource);
247-
}
258+
final SqlBenchmarkDatasets.BenchmarkSchema schema =
259+
SqlBenchmarkDatasets.getSchema(dataSource)
260+
.convertDimensions("auto".equals(schemaType), columnFormatSpec);
248261

249262
for (DataSegment dataSegment : schema.getDataSegments()) {
250263
final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator());
@@ -355,6 +368,18 @@ public void setup() throws JsonProcessingException
355368

356369
private void checkIncompatibleParameters()
357370
{
371+
// we only support NONE object storage encoding for auto column with mmap segments
372+
if (ObjectStorageEncoding.NONE.equals(jsonObjectStorageEncoding)) {
373+
boolean hasAutoColumn = "auto".equals(schemaType) || getDatasources().stream()
374+
.map(SqlBenchmarkDatasets::getSchema)
375+
.map(SqlBenchmarkDatasets.BenchmarkSchema::getDimensionsSpec)
376+
.map(DimensionsSpec::getDimensions)
377+
.flatMap(Collection::stream)
378+
.anyMatch(x -> x instanceof AutoTypeColumnSchema);
379+
if (!hasAutoColumn || !BenchmarkStorage.MMAP.equals(storageType)) {
380+
System.exit(0);
381+
}
382+
}
358383
// if running as fork 0, maybe don't use these combinations since it will kill everything
359384
if (stringEncoding != BenchmarkStringEncodingStrategy.UTF8 && storageType != BenchmarkStorage.MMAP) {
360385
System.exit(0);
@@ -364,7 +389,8 @@ private void checkIncompatibleParameters()
364389
System.exit(0);
365390
}
366391
// vectorize only works for mmap and frame column segments, bail out if
367-
if (vectorizeContext.shouldVectorize(true) && !(storageType == BenchmarkStorage.MMAP || storageType == BenchmarkStorage.FRAME_COLUMNAR)) {
392+
if (vectorizeContext.shouldVectorize(true) && !(storageType == BenchmarkStorage.MMAP
393+
|| storageType == BenchmarkStorage.FRAME_COLUMNAR)) {
368394
System.exit(0);
369395
}
370396
}
@@ -479,6 +505,12 @@ private static void addSegmentToWalker(
479505
FrameReader.create(cursorFactory.getRowSignature())
480506
)
481507
{
508+
@Override
509+
public SegmentId getId()
510+
{
511+
return descriptor.getId();
512+
}
513+
482514
@Nullable
483515
@Override
484516
public <T> T as(@Nonnull Class<T> clazz)
@@ -500,6 +532,12 @@ public <T> T as(@Nonnull Class<T> clazz)
500532
FrameReader.create(cursorFactory.getRowSignature())
501533
)
502534
{
535+
@Override
536+
public SegmentId getId()
537+
{
538+
return descriptor.getId();
539+
}
540+
503541
@Nullable
504542
@Override
505543
public <T> T as(@Nonnull Class<T> clazz)

benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlBenchmarkDatasets.java

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import com.google.common.collect.ImmutableList;
2323
import com.google.common.collect.Iterables;
2424
import org.apache.druid.data.input.impl.AggregateProjectionSpec;
25+
import org.apache.druid.data.input.impl.DimensionSchema;
2526
import org.apache.druid.data.input.impl.DimensionsSpec;
2627
import org.apache.druid.data.input.impl.LongDimensionSchema;
2728
import org.apache.druid.data.input.impl.StringDimensionSchema;
@@ -37,6 +38,7 @@
3738
import org.apache.druid.segment.AutoTypeColumnSchema;
3839
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
3940
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
41+
import org.apache.druid.segment.nested.NestedCommonFormatColumnFormatSpec;
4042
import org.apache.druid.segment.transform.ExpressionTransform;
4143
import org.apache.druid.segment.transform.TransformSpec;
4244
import org.apache.druid.timeline.DataSegment;
@@ -405,33 +407,43 @@ public List<AggregateProjectionSpec> getProjections()
405407
return projections;
406408
}
407409

408-
public BenchmarkSchema asAutoDimensions()
410+
public BenchmarkSchema convertDimensions(boolean convertToAuto, NestedCommonFormatColumnFormatSpec columnFormatSpec)
409411
{
410412
return new SqlBenchmarkDatasets.BenchmarkSchema(
411413
dataSegments,
412414
generatorSchemaInfo,
413415
transformSpec,
414416
dimensionsSpec.withDimensions(
415-
dimensionsSpec.getDimensions()
416-
.stream()
417-
.map(dim -> AutoTypeColumnSchema.of(dim.getName()))
418-
.collect(Collectors.toList())
417+
dimensionsSpec.getDimensions()
418+
.stream()
419+
.map(dim -> asDimensionSchema(dim, convertToAuto, columnFormatSpec))
420+
.collect(Collectors.toList())
419421
),
420422
aggregators,
421-
projections.stream()
422-
.map(
423-
projection ->
424-
AggregateProjectionSpec.builder(projection)
425-
.groupingColumns(
426-
projection.getGroupingColumns()
427-
.stream()
428-
.map(dim -> AutoTypeColumnSchema.of(dim.getName()))
429-
.collect(Collectors.toList())
430-
)
431-
.build()
432-
).collect(Collectors.toList()),
423+
projections
424+
.stream()
425+
.map(
426+
projection ->
427+
AggregateProjectionSpec
428+
.builder(projection)
429+
.groupingColumns(
430+
projection.getGroupingColumns()
431+
.stream()
432+
.map(dim -> asDimensionSchema(dim, convertToAuto, columnFormatSpec))
433+
.collect(Collectors.toList())
434+
)
435+
.build()
436+
).collect(Collectors.toList()),
433437
queryGranularity
434438
);
435439
}
440+
441+
private static DimensionSchema asDimensionSchema(DimensionSchema dim, boolean convertToAuto, NestedCommonFormatColumnFormatSpec columnFormatSpec)
442+
{
443+
if (convertToAuto || dim instanceof AutoTypeColumnSchema) {
444+
return new AutoTypeColumnSchema(dim.getName(), null, columnFormatSpec);
445+
}
446+
return dim;
447+
}
436448
}
437449
}

benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,9 @@ public class SqlNestedDataBenchmark extends SqlBaseQueryBenchmark
119119
"SELECT long2 FROM druid.nested WHERE double3 IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0)",
120120
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0)",
121121
"SELECT long2 FROM druid.nested WHERE double3 IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0) GROUP BY 1",
122-
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0) GROUP BY 1"
122+
"SELECT JSON_VALUE(nested, '$.nesteder.long2' RETURNING BIGINT) FROM druid.nested WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) IN (1.0, 19.0, 21.0, 23.0, 25.0, 26.0, 46.0, 50.0, 51.0, 55.0, 60.0, 61.0, 66.0, 68.0, 69.0, 70.0, 77.0, 88.0, 90.0, 92.0, 93.0, 94.0, 95.0, 100.0, 101.0, 102.0, 104.0, 109.0, 111.0, 113.0, 114.0, 115.0, 120.0, 121.0, 122.0, 134.0, 135.0, 136.0, 140.0, 142.0, 150.0, 155.0, 170.0, 172.0, 173.0, 174.0, 180.0, 181.0, 190.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0) GROUP BY 1",
123+
// 56
124+
"SELECT TO_JSON_STRING(nested) FROM druid.nested"
123125
);
124126

125127

0 commit comments

Comments
 (0)