Skip to content

Commit e8daef5

Browse files
committed
Optimize further
1 parent 81af068 commit e8daef5

File tree

10 files changed

+274
-184
lines changed

10 files changed

+274
-184
lines changed

python/pyspark/sql/connect/proto/expressions_pb2.pyi

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -479,23 +479,27 @@ class Expression(google.protobuf.message.Message):
479479
def element_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
480480
"""(Deprecated) The element type of the array.
481481
482-
This field is deprecated since Spark 4.1+ and should only be set
483-
if the data_type field is not set. Use data_type field instead.
482+
This field is deprecated since Spark 4.1+. Use data_type field instead.
484483
"""
485484
@property
486485
def elements(
487486
self,
488487
) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
489488
global___Expression.Literal
490489
]:
491-
"""The literal values that make up the array elements."""
490+
"""The literal values that make up the array elements.
491+
492+
For inferring the data_type.element_type, only the first element needs to
493+
contain the type information.
494+
"""
492495
@property
493496
def data_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType.Array:
494-
"""The type of the array.
497+
"""The type of the array. You don't need to set this field if the type information is not needed.
495498
496499
If the element type can be inferred from the first element of the elements field,
497-
then you don't need to set data_type.element_type to save space. On the other hand,
498-
redundant type information is also acceptable.
500+
then you don't need to set data_type.element_type to save space.
501+
502+
On the other hand, redundant type information is also acceptable.
499503
"""
500504
def __init__(
501505
self,
@@ -534,8 +538,7 @@ class Expression(google.protobuf.message.Message):
534538
def key_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
535539
"""(Deprecated) The key type of the map.
536540
537-
This field is deprecated since Spark 4.1+ and should only be set
538-
if the data_type field is not set. Use data_type field instead.
541+
This field is deprecated since Spark 4.1+. Use data_type field instead.
539542
"""
540543
@property
541544
def value_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
@@ -550,20 +553,29 @@ class Expression(google.protobuf.message.Message):
550553
) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
551554
global___Expression.Literal
552555
]:
553-
"""The literal keys that make up the map."""
556+
"""The literal keys that make up the map.
557+
558+
For inferring the data_type.key_type, only the first key needs to
559+
contain the type information.
560+
"""
554561
@property
555562
def values(
556563
self,
557564
) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
558565
global___Expression.Literal
559566
]:
560-
"""The literal values that make up the map."""
567+
"""The literal values that make up the map.
568+
569+
For inferring the data_type.value_type, only the first value needs to
570+
contain the type information.
571+
"""
561572
@property
562573
def data_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType.Map:
563-
"""The type of the map.
574+
"""The type of the map. You don't need to set this field if the type information is not needed.
564575
565576
If the key/value types can be inferred from the first element of the keys/values fields,
566577
then you don't need to set data_type.key_type/data_type.value_type to save space.
578+
567579
On the other hand, redundant type information is also acceptable.
568580
"""
569581
def __init__(
@@ -608,8 +620,7 @@ class Expression(google.protobuf.message.Message):
608620
"""(Deprecated) The type of the struct.
609621
610622
This field is deprecated since Spark 4.1+ because using DataType as the type of a struct
611-
is ambiguous. This field should only be set if the data_type_struct field is not set.
612-
Use data_type_struct field instead.
623+
is ambiguous. Use data_type_struct field instead.
613624
"""
614625
@property
615626
def elements(
@@ -620,7 +631,7 @@ class Expression(google.protobuf.message.Message):
620631
"""(Required) The literal values that make up the struct elements."""
621632
@property
622633
def data_type_struct(self) -> pyspark.sql.connect.proto.types_pb2.DataType.Struct:
623-
"""The type of the struct.
634+
"""The type of the struct. You don't need to set this field if the type information is not needed.
624635
625636
Whether data_type_struct.fields.data_type should be set depends on
626637
whether each field's type can be inferred from the elements field.

sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3419,6 +3419,11 @@ class PlanGenerationTestSuite
34193419
mutable.LinkedHashMap("a" -> 1, "b" -> 2),
34203420
mutable.LinkedHashMap("a" -> 3, "b" -> 4),
34213421
mutable.LinkedHashMap("a" -> 5, "b" -> 6))),
3422+
fn.typedLit(
3423+
Seq(
3424+
mutable.LinkedHashMap("a" -> Seq("1", "2"), "b" -> Seq("3", "4")),
3425+
mutable.LinkedHashMap("a" -> Seq("5", "6"), "b" -> Seq("7", "8")),
3426+
mutable.LinkedHashMap("a" -> Seq.empty[String], "b" -> Seq.empty[String]))),
34223427
fn.typedLit(
34233428
mutable.LinkedHashMap(
34243429
1 -> mutable.LinkedHashMap("a" -> 1, "b" -> 2),

sql/connect/common/src/main/protobuf/spark/connect/expressions.proto

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -217,26 +217,28 @@ message Expression {
217217
message Array {
218218
// (Deprecated) The element type of the array.
219219
//
220-
// This field is deprecated since Spark 4.1+ and should only be set
221-
// if the data_type field is not set. Use data_type field instead.
220+
// This field is deprecated since Spark 4.1+. Use data_type field instead.
222221
DataType element_type = 1 [deprecated = true];
223222

224223
// The literal values that make up the array elements.
224+
//
225+
// For inferring the data_type.element_type, only the first element needs to
226+
// contain the type information.
225227
repeated Literal elements = 2;
226228

227-
// The type of the array.
229+
// The type of the array. You don't need to set this field if the type information is not needed.
228230
//
229231
// If the element type can be inferred from the first element of the elements field,
230-
// then you don't need to set data_type.element_type to save space. On the other hand,
231-
// redundant type information is also acceptable.
232+
// then you don't need to set data_type.element_type to save space.
233+
//
234+
// On the other hand, redundant type information is also acceptable.
232235
DataType.Array data_type = 3;
233236
}
234237

235238
message Map {
236239
// (Deprecated) The key type of the map.
237240
//
238-
// This field is deprecated since Spark 4.1+ and should only be set
239-
// if the data_type field is not set. Use data_type field instead.
241+
// This field is deprecated since Spark 4.1+. Use data_type field instead.
240242
DataType key_type = 1 [deprecated = true];
241243

242244
// (Deprecated) The value type of the map.
@@ -246,15 +248,22 @@ message Expression {
246248
DataType value_type = 2 [deprecated = true];
247249

248250
// The literal keys that make up the map.
251+
//
252+
// For inferring the data_type.key_type, only the first key needs to
253+
// contain the type information.
249254
repeated Literal keys = 3;
250255

251256
// The literal values that make up the map.
257+
//
258+
// For inferring the data_type.value_type, only the first value needs to
259+
// contain the type information.
252260
repeated Literal values = 4;
253261

254-
// The type of the map.
262+
// The type of the map. You don't need to set this field if the type information is not needed.
255263
//
256264
// If the key/value types can be inferred from the first element of the keys/values fields,
257265
// then you don't need to set data_type.key_type/data_type.value_type to save space.
266+
//
258267
// On the other hand, redundant type information is also acceptable.
259268
DataType.Map data_type = 5;
260269
}
@@ -263,14 +272,13 @@ message Expression {
263272
// (Deprecated) The type of the struct.
264273
//
265274
// This field is deprecated since Spark 4.1+ because using DataType as the type of a struct
266-
// is ambiguous. This field should only be set if the data_type_struct field is not set.
267-
// Use data_type_struct field instead.
275+
// is ambiguous. Use data_type_struct field instead.
268276
DataType struct_type = 1 [deprecated = true];
269277

270278
// (Required) The literal values that make up the struct elements.
271279
repeated Literal elements = 2;
272280

273-
// The type of the struct.
281+
// The type of the struct. You don't need to set this field if the type information is not needed.
274282
//
275283
// Whether data_type_struct.fields.data_type should be set depends on
276284
// whether each field's type can be inferred from the elements field.

0 commit comments

Comments
 (0)