Skip to content

Commit d786c7e

Browse files
committed
SQL: Use specialized virtual columns for JSON_OBJECT, JSON_MERGE.
Building on #18521, this patch makes virtual column specialization recursive. It also now happens immediately on calling getOrCreateVirtualColumnForExpression. Specializations are added for JSON_OBJECT and JSON_MERGE. Now, chains of JSON_MERGE, JSON_OBJECT, and JSON_VALUE can preserve lazy evaluation, index usage, dictionary usage, etc. There is a change to VirtualColumnCreator that can affect extensions that add SQL operators. To allow the creator to access rewritten arguments, a "DruidExpression self" parameter is added. The "String expression" is no longer needed so it is removed.
1 parent 34aa655 commit d786c7e

File tree

10 files changed

+493
-160
lines changed

10 files changed

+493
-160
lines changed

processing/src/main/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumn.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,12 @@ public NestedObjectVirtualColumn(
8888
StringUtils.format(
8989
"%s(%s)",
9090
NestedDataExpressions.JsonObjectExprMacro.NAME,
91-
keyExprMap.entrySet().stream().sorted(Map.Entry.comparingByKey()).map(entry -> {
92-
final String key = entry.getKey();
93-
final TypedExpression valueExpr = entry.getValue();
94-
return Parser.constant(key).stringify() + ',' + valueExpr.expression;
95-
}).collect(Collectors.joining(","))
91+
Preconditions.checkNotNull(keyExprMap, "object")
92+
.entrySet().stream().map(entry -> {
93+
final String key = entry.getKey();
94+
final TypedExpression valueExpr = entry.getValue();
95+
return Parser.constant(key).stringify() + ',' + valueExpr.expression;
96+
}).collect(Collectors.joining(","))
9697
),
9798
ColumnType.NESTED_DATA,
9899
macroTable

processing/src/test/java/org/apache/druid/segment/virtual/NestedObjectVirtualColumnTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import org.junit.Assert;
3030
import org.junit.Test;
3131

32-
import java.util.HashMap;
3332
import java.util.Map;
3433

3534
public class NestedObjectVirtualColumnTest
@@ -56,9 +55,10 @@ public void testSerde() throws JsonProcessingException
5655
@Test
5756
public void testGetKeyExprMap()
5857
{
59-
Map<String, NestedObjectVirtualColumn.TypedExpression> keyExprMap = new HashMap<>();
60-
keyExprMap.put("key1", new NestedObjectVirtualColumn.TypedExpression("expr1", ColumnType.STRING));
61-
keyExprMap.put("key2", new NestedObjectVirtualColumn.TypedExpression("expr2", ColumnType.DOUBLE));
58+
Map<String, NestedObjectVirtualColumn.TypedExpression> keyExprMap = ImmutableMap.of(
59+
"key1", new NestedObjectVirtualColumn.TypedExpression("expr1", ColumnType.STRING),
60+
"key2", new NestedObjectVirtualColumn.TypedExpression("expr2", ColumnType.DOUBLE)
61+
);
6262

6363
NestedObjectVirtualColumn column = new NestedObjectVirtualColumn(
6464
"test_obj",

quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.msq.iq

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -666,10 +666,14 @@ FROM test_json_cols;
666666
"intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
667667
},
668668
"virtualColumns" : [ {
669-
"type" : "expression",
669+
"type" : "nested-object",
670670
"name" : "v0",
671-
"expression" : "json_object('a',\"c1\")",
672-
"outputType" : "COMPLEX<json>"
671+
"object" : {
672+
"a" : {
673+
"expression" : "\"c1\"",
674+
"type" : "COMPLEX<json>"
675+
}
676+
}
673677
} ],
674678
"resultFormat" : "compactedList",
675679
"columns" : [ "v0" ],
@@ -762,10 +766,14 @@ FROM test_json_cols;
762766
"intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
763767
},
764768
"virtualColumns" : [ {
765-
"type" : "expression",
769+
"type" : "nested-object",
766770
"name" : "v0",
767-
"expression" : "json_object('a',\"c1\")",
768-
"outputType" : "COMPLEX<json>"
771+
"object" : {
772+
"a" : {
773+
"expression" : "\"c1\"",
774+
"type" : "COMPLEX<json>"
775+
}
776+
}
769777
} ],
770778
"resultFormat" : "compactedList",
771779
"columns" : [ "v0" ],

quidem-ut/src/test/quidem/org.apache.druid.quidem.QTest/qaJsonCols/funcs_and_sql_func_json_object.03.std.iq

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -681,10 +681,14 @@ FROM test_json_cols;
681681
"intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
682682
},
683683
"virtualColumns" : [ {
684-
"type" : "expression",
684+
"type" : "nested-object",
685685
"name" : "v0",
686-
"expression" : "json_object('a',\"c1\")",
687-
"outputType" : "COMPLEX<json>"
686+
"object" : {
687+
"a" : {
688+
"expression" : "\"c1\"",
689+
"type" : "COMPLEX<json>"
690+
}
691+
}
688692
} ],
689693
"resultFormat" : "compactedList",
690694
"columns" : [ "v0" ],
@@ -725,10 +729,14 @@ FROM test_json_cols;
725729
"intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
726730
},
727731
"virtualColumns" : [ {
728-
"type" : "expression",
732+
"type" : "nested-object",
729733
"name" : "v0",
730-
"expression" : "json_object('a',\"c1\")",
731-
"outputType" : "COMPLEX<json>"
734+
"object" : {
735+
"a" : {
736+
"expression" : "\"c1\"",
737+
"type" : "COMPLEX<json>"
738+
}
739+
}
732740
} ],
733741
"resultFormat" : "compactedList",
734742
"columns" : [ "v0" ],

sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ public boolean isDirectColumnAccess()
369369

370370
public String getDirectColumn()
371371
{
372-
return Preconditions.checkNotNull(simpleExtraction.getColumn());
372+
return Preconditions.checkNotNull(simpleExtraction, "simpleExtraction").getColumn();
373373
}
374374

375375
public boolean isSimpleExtraction()
@@ -401,7 +401,7 @@ public VirtualColumn toVirtualColumn(
401401
final ExpressionParser parser
402402
)
403403
{
404-
return virtualColumnCreator.create(name, outputType, expression.get(), parser);
404+
return virtualColumnCreator.create(name, outputType, parser, this);
405405
}
406406

407407
public VirtualColumn toExpressionVirtualColumn(
@@ -410,14 +410,24 @@ public VirtualColumn toExpressionVirtualColumn(
410410
final ExpressionParser parser
411411
)
412412
{
413-
return DEFAULT_VIRTUAL_COLUMN_BUILDER.create(name, outputType, expression.get(), parser);
413+
return DEFAULT_VIRTUAL_COLUMN_BUILDER.create(name, outputType, parser, this);
414414
}
415415

416416
public NodeType getType()
417417
{
418418
return nodeType;
419419
}
420420

421+
/**
422+
* Returns whether this expression is {@link NodeType#IDENTIFIER} or {@link NodeType#SPECIALIZED}. Useful because
423+
* these are the expressions that can be expected to become direct column references once virtual columns have gone
424+
* through a specialization pass.
425+
*/
426+
public boolean isIdentifierOrSpecialized()
427+
{
428+
return nodeType == NodeType.IDENTIFIER || nodeType == NodeType.SPECIALIZED;
429+
}
430+
421431
/**
422432
* The {@link ColumnType} of this expression as inferred when this expression was created. This is likely the result
423433
* of converting the output of {@link org.apache.calcite.rex.RexNode#getType()} using
@@ -609,15 +619,28 @@ public String compile(List<DruidExpression> arguments)
609619
@FunctionalInterface
610620
public interface VirtualColumnCreator
611621
{
612-
VirtualColumn create(String name, ColumnType outputType, String expression, ExpressionParser parser);
622+
/**
623+
* Create a virtual column for an expression.
624+
*
625+
* @param name name of the virtual column
626+
* @param outputType type of the virtual column
627+
* @param parser expression parser, if needed
628+
* @param self expression, possibly rewritten to refer to specialized virtual columns
629+
*/
630+
VirtualColumn create(
631+
String name,
632+
ColumnType outputType,
633+
ExpressionParser parser,
634+
DruidExpression self
635+
);
613636
}
614637

615638
public static class ExpressionVirtualColumnCreator implements VirtualColumnCreator
616639
{
617640
@Override
618-
public VirtualColumn create(String name, ColumnType outputType, String expression, ExpressionParser parser)
641+
public VirtualColumn create(String name, ColumnType outputType, ExpressionParser parser, DruidExpression self)
619642
{
620-
return new ExpressionVirtualColumn(name, expression, parser.parse(expression), outputType);
643+
return new ExpressionVirtualColumn(name, self.getExpression(), parser.parse(self.getExpression()), outputType);
621644
}
622645
}
623646
}

sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringOperatorConversions.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -389,11 +389,11 @@ public DruidExpression toDruidExpression(
389389
Calcites.getColumnTypeForRelDataType(rexNode.getType()),
390390
builder,
391391
druidExpressions,
392-
(name, outputType, expression, macroTable) -> new ListFilteredVirtualColumn(
392+
(name, outputType, parser, self) -> new ListFilteredVirtualColumn(
393393
name,
394-
druidExpressions.get(0)
395-
.getSimpleExtraction()
396-
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
394+
self.getArguments().get(0)
395+
.getSimpleExtraction()
396+
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
397397
literals,
398398
isAllowList()
399399
)
@@ -470,11 +470,11 @@ public DruidExpression toDruidExpression(
470470
Calcites.getColumnTypeForRelDataType(rexNode.getType()),
471471
builder,
472472
druidExpressions,
473-
(name, outputType, expression, macroTable) -> new RegexFilteredVirtualColumn(
473+
(name, outputType, parser, self) -> new RegexFilteredVirtualColumn(
474474
name,
475-
druidExpressions.get(0)
476-
.getSimpleExtraction()
477-
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
475+
self.getArguments().get(0)
476+
.getSimpleExtraction()
477+
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
478478
pattern
479479
)
480480
);
@@ -555,11 +555,11 @@ public DruidExpression toDruidExpression(
555555
Calcites.getColumnTypeForRelDataType(rexNode.getType()),
556556
builder,
557557
druidExpressions,
558-
(name, outputType, expression, macroTable) -> new PrefixFilteredVirtualColumn(
558+
(name, outputType, parser, self) -> new PrefixFilteredVirtualColumn(
559559
name,
560-
druidExpressions.get(0)
561-
.getSimpleExtraction()
562-
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
560+
self.getArguments().get(0)
561+
.getSimpleExtraction()
562+
.toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
563563
prefix
564564
)
565565
);

0 commit comments

Comments
 (0)