diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.py b/python/pyspark/sql/connect/proto/expressions_pb2.py index 351bc65d30bc0..fe83e78ce6571 100644 --- a/python/pyspark/sql/connect/proto/expressions_pb2.py +++ b/python/pyspark/sql/connect/proto/expressions_pb2.py @@ -40,7 +40,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x1fspark/connect/expressions.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x19spark/connect/types.proto\x1a\x1aspark/connect/common.proto"\xfe\x35\n\nExpression\x12\x37\n\x06\x63ommon\x18\x12 \x01(\x0b\x32\x1f.spark.connect.ExpressionCommonR\x06\x63ommon\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12\x62\n\x14unresolved_attribute\x18\x02 \x01(\x0b\x32-.spark.connect.Expression.UnresolvedAttributeH\x00R\x13unresolvedAttribute\x12_\n\x13unresolved_function\x18\x03 \x01(\x0b\x32,.spark.connect.Expression.UnresolvedFunctionH\x00R\x12unresolvedFunction\x12Y\n\x11\x65xpression_string\x18\x04 \x01(\x0b\x32*.spark.connect.Expression.ExpressionStringH\x00R\x10\x65xpressionString\x12S\n\x0funresolved_star\x18\x05 \x01(\x0b\x32(.spark.connect.Expression.UnresolvedStarH\x00R\x0eunresolvedStar\x12\x37\n\x05\x61lias\x18\x06 \x01(\x0b\x32\x1f.spark.connect.Expression.AliasH\x00R\x05\x61lias\x12\x34\n\x04\x63\x61st\x18\x07 \x01(\x0b\x32\x1e.spark.connect.Expression.CastH\x00R\x04\x63\x61st\x12V\n\x10unresolved_regex\x18\x08 \x01(\x0b\x32).spark.connect.Expression.UnresolvedRegexH\x00R\x0funresolvedRegex\x12\x44\n\nsort_order\x18\t \x01(\x0b\x32#.spark.connect.Expression.SortOrderH\x00R\tsortOrder\x12S\n\x0flambda_function\x18\n \x01(\x0b\x32(.spark.connect.Expression.LambdaFunctionH\x00R\x0elambdaFunction\x12:\n\x06window\x18\x0b \x01(\x0b\x32 .spark.connect.Expression.WindowH\x00R\x06window\x12l\n\x18unresolved_extract_value\x18\x0c \x01(\x0b\x32\x30.spark.connect.Expression.UnresolvedExtractValueH\x00R\x16unresolvedExtractValue\x12M\n\rupdate_fields\x18\r \x01(\x0b\x32&.spark.connect.Expression.UpdateFieldsH\x00R\x0cupdateFields\x12\x82\x01\n unresolved_named_lambda_variable\x18\x0e \x01(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableH\x00R\x1dunresolvedNamedLambdaVariable\x12~\n#common_inline_user_defined_function\x18\x0f \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x1f\x63ommonInlineUserDefinedFunction\x12\x42\n\rcall_function\x18\x10 \x01(\x0b\x32\x1b.spark.connect.CallFunctionH\x00R\x0c\x63\x61llFunction\x12\x64\n\x19named_argument_expression\x18\x11 \x01(\x0b\x32&.spark.connect.NamedArgumentExpressionH\x00R\x17namedArgumentExpression\x12?\n\x0cmerge_action\x18\x13 \x01(\x0b\x32\x1a.spark.connect.MergeActionH\x00R\x0bmergeAction\x12g\n\x1atyped_aggregate_expression\x18\x14 \x01(\x0b\x32\'.spark.connect.TypedAggregateExpressionH\x00R\x18typedAggregateExpression\x12T\n\x13subquery_expression\x18\x15 \x01(\x0b\x32!.spark.connect.SubqueryExpressionH\x00R\x12subqueryExpression\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\x8f\x06\n\x06Window\x12\x42\n\x0fwindow_function\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0ewindowFunction\x12@\n\x0epartition_spec\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x03 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12K\n\nframe_spec\x18\x04 \x01(\x0b\x32,.spark.connect.Expression.Window.WindowFrameR\tframeSpec\x1a\xed\x03\n\x0bWindowFrame\x12U\n\nframe_type\x18\x01 \x01(\x0e\x32\x36.spark.connect.Expression.Window.WindowFrame.FrameTypeR\tframeType\x12P\n\x05lower\x18\x02 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05lower\x12P\n\x05upper\x18\x03 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05upper\x1a\x91\x01\n\rFrameBoundary\x12!\n\x0b\x63urrent_row\x18\x01 \x01(\x08H\x00R\ncurrentRow\x12\x1e\n\tunbounded\x18\x02 \x01(\x08H\x00R\tunbounded\x12\x31\n\x05value\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\x05valueB\n\n\x08\x62oundary"O\n\tFrameType\x12\x18\n\x14\x46RAME_TYPE_UNDEFINED\x10\x00\x12\x12\n\x0e\x46RAME_TYPE_ROW\x10\x01\x12\x14\n\x10\x46RAME_TYPE_RANGE\x10\x02\x1a\xa9\x03\n\tSortOrder\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12O\n\tdirection\x18\x02 \x01(\x0e\x32\x31.spark.connect.Expression.SortOrder.SortDirectionR\tdirection\x12U\n\rnull_ordering\x18\x03 \x01(\x0e\x32\x30.spark.connect.Expression.SortOrder.NullOrderingR\x0cnullOrdering"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"U\n\x0cNullOrdering\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02\x1a\xbb\x02\n\x04\x43\x61st\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12-\n\x04type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04type\x12\x1b\n\x08type_str\x18\x03 \x01(\tH\x00R\x07typeStr\x12\x44\n\teval_mode\x18\x04 \x01(\x0e\x32\'.spark.connect.Expression.Cast.EvalModeR\x08\x65valMode"b\n\x08\x45valMode\x12\x19\n\x15\x45VAL_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10\x45VAL_MODE_LEGACY\x10\x01\x12\x12\n\x0e\x45VAL_MODE_ANSI\x10\x02\x12\x11\n\rEVAL_MODE_TRY\x10\x03\x42\x0e\n\x0c\x63\x61st_to_type\x1a\xcc\x10\n\x07Literal\x12-\n\x04null\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04null\x12\x18\n\x06\x62inary\x18\x02 \x01(\x0cH\x00R\x06\x62inary\x12\x1a\n\x07\x62oolean\x18\x03 \x01(\x08H\x00R\x07\x62oolean\x12\x14\n\x04\x62yte\x18\x04 \x01(\x05H\x00R\x04\x62yte\x12\x16\n\x05short\x18\x05 \x01(\x05H\x00R\x05short\x12\x1a\n\x07integer\x18\x06 \x01(\x05H\x00R\x07integer\x12\x14\n\x04long\x18\x07 \x01(\x03H\x00R\x04long\x12\x16\n\x05\x66loat\x18\n \x01(\x02H\x00R\x05\x66loat\x12\x18\n\x06\x64ouble\x18\x0b \x01(\x01H\x00R\x06\x64ouble\x12\x45\n\x07\x64\x65\x63imal\x18\x0c \x01(\x0b\x32).spark.connect.Expression.Literal.DecimalH\x00R\x07\x64\x65\x63imal\x12\x18\n\x06string\x18\r \x01(\tH\x00R\x06string\x12\x14\n\x04\x64\x61te\x18\x10 \x01(\x05H\x00R\x04\x64\x61te\x12\x1e\n\ttimestamp\x18\x11 \x01(\x03H\x00R\ttimestamp\x12%\n\rtimestamp_ntz\x18\x12 \x01(\x03H\x00R\x0ctimestampNtz\x12\x61\n\x11\x63\x61lendar_interval\x18\x13 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12\x30\n\x13year_month_interval\x18\x14 \x01(\x05H\x00R\x11yearMonthInterval\x12,\n\x11\x64\x61y_time_interval\x18\x15 \x01(\x03H\x00R\x0f\x64\x61yTimeInterval\x12?\n\x05\x61rray\x18\x16 \x01(\x0b\x32\'.spark.connect.Expression.Literal.ArrayH\x00R\x05\x61rray\x12\x39\n\x03map\x18\x17 \x01(\x0b\x32%.spark.connect.Expression.Literal.MapH\x00R\x03map\x12\x42\n\x06struct\x18\x18 \x01(\x0b\x32(.spark.connect.Expression.Literal.StructH\x00R\x06struct\x12\x61\n\x11specialized_array\x18\x19 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.SpecializedArrayH\x00R\x10specializedArray\x12<\n\x04time\x18\x1a \x01(\x0b\x32&.spark.connect.Expression.Literal.TimeH\x00R\x04time\x1au\n\x07\x44\x65\x63imal\x12\x14\n\x05value\x18\x01 \x01(\tR\x05value\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x19\n\x05scale\x18\x03 \x01(\x05H\x01R\x05scale\x88\x01\x01\x42\x0c\n\n_precisionB\x08\n\x06_scale\x1a\x62\n\x10\x43\x61lendarInterval\x12\x16\n\x06months\x18\x01 \x01(\x05R\x06months\x12\x12\n\x04\x64\x61ys\x18\x02 \x01(\x05R\x04\x64\x61ys\x12"\n\x0cmicroseconds\x18\x03 \x01(\x03R\x0cmicroseconds\x1a\x82\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x1a\xe3\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12\x35\n\x04keys\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x04keys\x12\x39\n\x06values\x18\x04 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1a\x81\x01\n\x06Struct\x12\x38\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\nstructType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x1a\xc0\x02\n\x10SpecializedArray\x12,\n\x05\x62ools\x18\x01 \x01(\x0b\x32\x14.spark.connect.BoolsH\x00R\x05\x62ools\x12)\n\x04ints\x18\x02 \x01(\x0b\x32\x13.spark.connect.IntsH\x00R\x04ints\x12,\n\x05longs\x18\x03 \x01(\x0b\x32\x14.spark.connect.LongsH\x00R\x05longs\x12/\n\x06\x66loats\x18\x04 \x01(\x0b\x32\x15.spark.connect.FloatsH\x00R\x06\x66loats\x12\x32\n\x07\x64oubles\x18\x05 \x01(\x0b\x32\x16.spark.connect.DoublesH\x00R\x07\x64oubles\x12\x32\n\x07strings\x18\x06 \x01(\x0b\x32\x16.spark.connect.StringsH\x00R\x07stringsB\x0c\n\nvalue_type\x1aK\n\x04Time\x12\x12\n\x04nano\x18\x01 \x01(\x03R\x04nano\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x42\x0c\n\n_precisionB\x0e\n\x0cliteral_type\x1a\xba\x01\n\x13UnresolvedAttribute\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12\x31\n\x12is_metadata_column\x18\x03 \x01(\x08H\x01R\x10isMetadataColumn\x88\x01\x01\x42\n\n\x08_plan_idB\x15\n\x13_is_metadata_column\x1a\x82\x02\n\x12UnresolvedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x1f\n\x0bis_distinct\x18\x03 \x01(\x08R\nisDistinct\x12\x37\n\x18is_user_defined_function\x18\x04 \x01(\x08R\x15isUserDefinedFunction\x12$\n\x0bis_internal\x18\x05 \x01(\x08H\x00R\nisInternal\x88\x01\x01\x42\x0e\n\x0c_is_internal\x1a\x32\n\x10\x45xpressionString\x12\x1e\n\nexpression\x18\x01 \x01(\tR\nexpression\x1a|\n\x0eUnresolvedStar\x12,\n\x0funparsed_target\x18\x01 \x01(\tH\x00R\x0eunparsedTarget\x88\x01\x01\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x01R\x06planId\x88\x01\x01\x42\x12\n\x10_unparsed_targetB\n\n\x08_plan_id\x1aV\n\x0fUnresolvedRegex\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\x84\x01\n\x16UnresolvedExtractValue\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12\x39\n\nextraction\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nextraction\x1a\xbb\x01\n\x0cUpdateFields\x12\x46\n\x11struct_expression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x10structExpression\x12\x1d\n\nfield_name\x18\x02 \x01(\tR\tfieldName\x12\x44\n\x10value_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0fvalueExpression\x1ax\n\x05\x41lias\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12\x12\n\x04name\x18\x02 \x03(\tR\x04name\x12\x1f\n\x08metadata\x18\x03 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x9e\x01\n\x0eLambdaFunction\x12\x35\n\x08\x66unction\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08\x66unction\x12U\n\targuments\x18\x02 \x03(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableR\targuments\x1a>\n\x1dUnresolvedNamedLambdaVariable\x12\x1d\n\nname_parts\x18\x01 \x03(\tR\tnamePartsB\x0b\n\texpr_type"A\n\x10\x45xpressionCommon\x12-\n\x06origin\x18\x01 \x01(\x0b\x32\x15.spark.connect.OriginR\x06origin"\x8d\x03\n\x1f\x43ommonInlineUserDefinedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x39\n\npython_udf\x18\x04 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\tpythonUdf\x12I\n\x10scalar_scala_udf\x18\x05 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\x0escalarScalaUdf\x12\x33\n\x08java_udf\x18\x06 \x01(\x0b\x32\x16.spark.connect.JavaUDFH\x00R\x07javaUdf\x12\x1f\n\x0bis_distinct\x18\x07 \x01(\x08R\nisDistinctB\n\n\x08\x66unction"\xcc\x01\n\tPythonUDF\x12\x38\n\x0boutput_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVer\x12/\n\x13\x61\x64\x64itional_includes\x18\x05 \x03(\tR\x12\x61\x64\x64itionalIncludes"\xd6\x01\n\x0eScalarScalaUDF\x12\x18\n\x07payload\x18\x01 \x01(\x0cR\x07payload\x12\x37\n\ninputTypes\x18\x02 \x03(\x0b\x32\x17.spark.connect.DataTypeR\ninputTypes\x12\x37\n\noutputType\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1a\n\x08nullable\x18\x04 \x01(\x08R\x08nullable\x12\x1c\n\taggregate\x18\x05 \x01(\x08R\taggregate"\x95\x01\n\x07JavaUDF\x12\x1d\n\nclass_name\x18\x01 \x01(\tR\tclassName\x12=\n\x0boutput_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\noutputType\x88\x01\x01\x12\x1c\n\taggregate\x18\x03 \x01(\x08R\taggregateB\x0e\n\x0c_output_type"c\n\x18TypedAggregateExpression\x12G\n\x10scalar_scala_udf\x18\x01 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFR\x0escalarScalaUdf"l\n\x0c\x43\x61llFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\\\n\x17NamedArgumentExpression\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\x80\x04\n\x0bMergeAction\x12\x46\n\x0b\x61\x63tion_type\x18\x01 \x01(\x0e\x32%.spark.connect.MergeAction.ActionTypeR\nactionType\x12<\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\tcondition\x88\x01\x01\x12G\n\x0b\x61ssignments\x18\x03 \x03(\x0b\x32%.spark.connect.MergeAction.AssignmentR\x0b\x61ssignments\x1aj\n\nAssignment\x12+\n\x03key\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\xa7\x01\n\nActionType\x12\x17\n\x13\x41\x43TION_TYPE_INVALID\x10\x00\x12\x16\n\x12\x41\x43TION_TYPE_DELETE\x10\x01\x12\x16\n\x12\x41\x43TION_TYPE_INSERT\x10\x02\x12\x1b\n\x17\x41\x43TION_TYPE_INSERT_STAR\x10\x03\x12\x16\n\x12\x41\x43TION_TYPE_UPDATE\x10\x04\x12\x1b\n\x17\x41\x43TION_TYPE_UPDATE_STAR\x10\x05\x42\x0c\n\n_condition"\xc5\x05\n\x12SubqueryExpression\x12\x17\n\x07plan_id\x18\x01 \x01(\x03R\x06planId\x12S\n\rsubquery_type\x18\x02 \x01(\x0e\x32..spark.connect.SubqueryExpression.SubqueryTypeR\x0csubqueryType\x12\x62\n\x11table_arg_options\x18\x03 \x01(\x0b\x32\x31.spark.connect.SubqueryExpression.TableArgOptionsH\x00R\x0ftableArgOptions\x88\x01\x01\x12G\n\x12in_subquery_values\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x10inSubqueryValues\x1a\xea\x01\n\x0fTableArgOptions\x12@\n\x0epartition_spec\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12\x37\n\x15with_single_partition\x18\x03 \x01(\x08H\x00R\x13withSinglePartition\x88\x01\x01\x42\x18\n\x16_with_single_partition"\x90\x01\n\x0cSubqueryType\x12\x19\n\x15SUBQUERY_TYPE_UNKNOWN\x10\x00\x12\x18\n\x14SUBQUERY_TYPE_SCALAR\x10\x01\x12\x18\n\x14SUBQUERY_TYPE_EXISTS\x10\x02\x12\x1b\n\x17SUBQUERY_TYPE_TABLE_ARG\x10\x03\x12\x14\n\x10SUBQUERY_TYPE_IN\x10\x04\x42\x14\n\x12_table_arg_optionsB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' + b'\n\x1fspark/connect/expressions.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x19spark/connect/types.proto\x1a\x1aspark/connect/common.proto"\xcc\x36\n\nExpression\x12\x37\n\x06\x63ommon\x18\x12 \x01(\x0b\x32\x1f.spark.connect.ExpressionCommonR\x06\x63ommon\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12\x62\n\x14unresolved_attribute\x18\x02 \x01(\x0b\x32-.spark.connect.Expression.UnresolvedAttributeH\x00R\x13unresolvedAttribute\x12_\n\x13unresolved_function\x18\x03 \x01(\x0b\x32,.spark.connect.Expression.UnresolvedFunctionH\x00R\x12unresolvedFunction\x12Y\n\x11\x65xpression_string\x18\x04 \x01(\x0b\x32*.spark.connect.Expression.ExpressionStringH\x00R\x10\x65xpressionString\x12S\n\x0funresolved_star\x18\x05 \x01(\x0b\x32(.spark.connect.Expression.UnresolvedStarH\x00R\x0eunresolvedStar\x12\x37\n\x05\x61lias\x18\x06 \x01(\x0b\x32\x1f.spark.connect.Expression.AliasH\x00R\x05\x61lias\x12\x34\n\x04\x63\x61st\x18\x07 \x01(\x0b\x32\x1e.spark.connect.Expression.CastH\x00R\x04\x63\x61st\x12V\n\x10unresolved_regex\x18\x08 \x01(\x0b\x32).spark.connect.Expression.UnresolvedRegexH\x00R\x0funresolvedRegex\x12\x44\n\nsort_order\x18\t \x01(\x0b\x32#.spark.connect.Expression.SortOrderH\x00R\tsortOrder\x12S\n\x0flambda_function\x18\n \x01(\x0b\x32(.spark.connect.Expression.LambdaFunctionH\x00R\x0elambdaFunction\x12:\n\x06window\x18\x0b \x01(\x0b\x32 .spark.connect.Expression.WindowH\x00R\x06window\x12l\n\x18unresolved_extract_value\x18\x0c \x01(\x0b\x32\x30.spark.connect.Expression.UnresolvedExtractValueH\x00R\x16unresolvedExtractValue\x12M\n\rupdate_fields\x18\r \x01(\x0b\x32&.spark.connect.Expression.UpdateFieldsH\x00R\x0cupdateFields\x12\x82\x01\n unresolved_named_lambda_variable\x18\x0e \x01(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableH\x00R\x1dunresolvedNamedLambdaVariable\x12~\n#common_inline_user_defined_function\x18\x0f \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x1f\x63ommonInlineUserDefinedFunction\x12\x42\n\rcall_function\x18\x10 \x01(\x0b\x32\x1b.spark.connect.CallFunctionH\x00R\x0c\x63\x61llFunction\x12\x64\n\x19named_argument_expression\x18\x11 \x01(\x0b\x32&.spark.connect.NamedArgumentExpressionH\x00R\x17namedArgumentExpression\x12?\n\x0cmerge_action\x18\x13 \x01(\x0b\x32\x1a.spark.connect.MergeActionH\x00R\x0bmergeAction\x12g\n\x1atyped_aggregate_expression\x18\x14 \x01(\x0b\x32\'.spark.connect.TypedAggregateExpressionH\x00R\x18typedAggregateExpression\x12T\n\x13subquery_expression\x18\x15 \x01(\x0b\x32!.spark.connect.SubqueryExpressionH\x00R\x12subqueryExpression\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\x8f\x06\n\x06Window\x12\x42\n\x0fwindow_function\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0ewindowFunction\x12@\n\x0epartition_spec\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x03 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12K\n\nframe_spec\x18\x04 \x01(\x0b\x32,.spark.connect.Expression.Window.WindowFrameR\tframeSpec\x1a\xed\x03\n\x0bWindowFrame\x12U\n\nframe_type\x18\x01 \x01(\x0e\x32\x36.spark.connect.Expression.Window.WindowFrame.FrameTypeR\tframeType\x12P\n\x05lower\x18\x02 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05lower\x12P\n\x05upper\x18\x03 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05upper\x1a\x91\x01\n\rFrameBoundary\x12!\n\x0b\x63urrent_row\x18\x01 \x01(\x08H\x00R\ncurrentRow\x12\x1e\n\tunbounded\x18\x02 \x01(\x08H\x00R\tunbounded\x12\x31\n\x05value\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\x05valueB\n\n\x08\x62oundary"O\n\tFrameType\x12\x18\n\x14\x46RAME_TYPE_UNDEFINED\x10\x00\x12\x12\n\x0e\x46RAME_TYPE_ROW\x10\x01\x12\x14\n\x10\x46RAME_TYPE_RANGE\x10\x02\x1a\xa9\x03\n\tSortOrder\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12O\n\tdirection\x18\x02 \x01(\x0e\x32\x31.spark.connect.Expression.SortOrder.SortDirectionR\tdirection\x12U\n\rnull_ordering\x18\x03 \x01(\x0e\x32\x30.spark.connect.Expression.SortOrder.NullOrderingR\x0cnullOrdering"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"U\n\x0cNullOrdering\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02\x1a\xbb\x02\n\x04\x43\x61st\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12-\n\x04type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04type\x12\x1b\n\x08type_str\x18\x03 \x01(\tH\x00R\x07typeStr\x12\x44\n\teval_mode\x18\x04 \x01(\x0e\x32\'.spark.connect.Expression.Cast.EvalModeR\x08\x65valMode"b\n\x08\x45valMode\x12\x19\n\x15\x45VAL_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10\x45VAL_MODE_LEGACY\x10\x01\x12\x12\n\x0e\x45VAL_MODE_ANSI\x10\x02\x12\x11\n\rEVAL_MODE_TRY\x10\x03\x42\x0e\n\x0c\x63\x61st_to_type\x1a\x9a\x11\n\x07Literal\x12-\n\x04null\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04null\x12\x18\n\x06\x62inary\x18\x02 \x01(\x0cH\x00R\x06\x62inary\x12\x1a\n\x07\x62oolean\x18\x03 \x01(\x08H\x00R\x07\x62oolean\x12\x14\n\x04\x62yte\x18\x04 \x01(\x05H\x00R\x04\x62yte\x12\x16\n\x05short\x18\x05 \x01(\x05H\x00R\x05short\x12\x1a\n\x07integer\x18\x06 \x01(\x05H\x00R\x07integer\x12\x14\n\x04long\x18\x07 \x01(\x03H\x00R\x04long\x12\x16\n\x05\x66loat\x18\n \x01(\x02H\x00R\x05\x66loat\x12\x18\n\x06\x64ouble\x18\x0b \x01(\x01H\x00R\x06\x64ouble\x12\x45\n\x07\x64\x65\x63imal\x18\x0c \x01(\x0b\x32).spark.connect.Expression.Literal.DecimalH\x00R\x07\x64\x65\x63imal\x12\x18\n\x06string\x18\r \x01(\tH\x00R\x06string\x12\x14\n\x04\x64\x61te\x18\x10 \x01(\x05H\x00R\x04\x64\x61te\x12\x1e\n\ttimestamp\x18\x11 \x01(\x03H\x00R\ttimestamp\x12%\n\rtimestamp_ntz\x18\x12 \x01(\x03H\x00R\x0ctimestampNtz\x12\x61\n\x11\x63\x61lendar_interval\x18\x13 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12\x30\n\x13year_month_interval\x18\x14 \x01(\x05H\x00R\x11yearMonthInterval\x12,\n\x11\x64\x61y_time_interval\x18\x15 \x01(\x03H\x00R\x0f\x64\x61yTimeInterval\x12?\n\x05\x61rray\x18\x16 \x01(\x0b\x32\'.spark.connect.Expression.Literal.ArrayH\x00R\x05\x61rray\x12\x39\n\x03map\x18\x17 \x01(\x0b\x32%.spark.connect.Expression.Literal.MapH\x00R\x03map\x12\x42\n\x06struct\x18\x18 \x01(\x0b\x32(.spark.connect.Expression.Literal.StructH\x00R\x06struct\x12\x61\n\x11specialized_array\x18\x19 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.SpecializedArrayH\x00R\x10specializedArray\x12<\n\x04time\x18\x1a \x01(\x0b\x32&.spark.connect.Expression.Literal.TimeH\x00R\x04time\x1au\n\x07\x44\x65\x63imal\x12\x14\n\x05value\x18\x01 \x01(\tR\x05value\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x19\n\x05scale\x18\x03 \x01(\x05H\x01R\x05scale\x88\x01\x01\x42\x0c\n\n_precisionB\x08\n\x06_scale\x1a\x62\n\x10\x43\x61lendarInterval\x12\x16\n\x06months\x18\x01 \x01(\x05R\x06months\x12\x12\n\x04\x64\x61ys\x18\x02 \x01(\x05R\x04\x64\x61ys\x12"\n\x0cmicroseconds\x18\x03 \x01(\x03R\x0cmicroseconds\x1a\x82\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x1a\xe3\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12\x35\n\x04keys\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x04keys\x12\x39\n\x06values\x18\x04 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1a\xcf\x01\n\x06Struct\x12<\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeB\x02\x18\x01R\nstructType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x12H\n\x10\x64\x61ta_type_struct\x18\x03 \x01(\x0b\x32\x1e.spark.connect.DataType.StructR\x0e\x64\x61taTypeStruct\x1a\xc0\x02\n\x10SpecializedArray\x12,\n\x05\x62ools\x18\x01 \x01(\x0b\x32\x14.spark.connect.BoolsH\x00R\x05\x62ools\x12)\n\x04ints\x18\x02 \x01(\x0b\x32\x13.spark.connect.IntsH\x00R\x04ints\x12,\n\x05longs\x18\x03 \x01(\x0b\x32\x14.spark.connect.LongsH\x00R\x05longs\x12/\n\x06\x66loats\x18\x04 \x01(\x0b\x32\x15.spark.connect.FloatsH\x00R\x06\x66loats\x12\x32\n\x07\x64oubles\x18\x05 \x01(\x0b\x32\x16.spark.connect.DoublesH\x00R\x07\x64oubles\x12\x32\n\x07strings\x18\x06 \x01(\x0b\x32\x16.spark.connect.StringsH\x00R\x07stringsB\x0c\n\nvalue_type\x1aK\n\x04Time\x12\x12\n\x04nano\x18\x01 \x01(\x03R\x04nano\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x42\x0c\n\n_precisionB\x0e\n\x0cliteral_type\x1a\xba\x01\n\x13UnresolvedAttribute\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12\x31\n\x12is_metadata_column\x18\x03 \x01(\x08H\x01R\x10isMetadataColumn\x88\x01\x01\x42\n\n\x08_plan_idB\x15\n\x13_is_metadata_column\x1a\x82\x02\n\x12UnresolvedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x1f\n\x0bis_distinct\x18\x03 \x01(\x08R\nisDistinct\x12\x37\n\x18is_user_defined_function\x18\x04 \x01(\x08R\x15isUserDefinedFunction\x12$\n\x0bis_internal\x18\x05 \x01(\x08H\x00R\nisInternal\x88\x01\x01\x42\x0e\n\x0c_is_internal\x1a\x32\n\x10\x45xpressionString\x12\x1e\n\nexpression\x18\x01 \x01(\tR\nexpression\x1a|\n\x0eUnresolvedStar\x12,\n\x0funparsed_target\x18\x01 \x01(\tH\x00R\x0eunparsedTarget\x88\x01\x01\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x01R\x06planId\x88\x01\x01\x42\x12\n\x10_unparsed_targetB\n\n\x08_plan_id\x1aV\n\x0fUnresolvedRegex\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\x84\x01\n\x16UnresolvedExtractValue\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12\x39\n\nextraction\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nextraction\x1a\xbb\x01\n\x0cUpdateFields\x12\x46\n\x11struct_expression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x10structExpression\x12\x1d\n\nfield_name\x18\x02 \x01(\tR\tfieldName\x12\x44\n\x10value_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0fvalueExpression\x1ax\n\x05\x41lias\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12\x12\n\x04name\x18\x02 \x03(\tR\x04name\x12\x1f\n\x08metadata\x18\x03 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x9e\x01\n\x0eLambdaFunction\x12\x35\n\x08\x66unction\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08\x66unction\x12U\n\targuments\x18\x02 \x03(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableR\targuments\x1a>\n\x1dUnresolvedNamedLambdaVariable\x12\x1d\n\nname_parts\x18\x01 \x03(\tR\tnamePartsB\x0b\n\texpr_type"A\n\x10\x45xpressionCommon\x12-\n\x06origin\x18\x01 \x01(\x0b\x32\x15.spark.connect.OriginR\x06origin"\x8d\x03\n\x1f\x43ommonInlineUserDefinedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x39\n\npython_udf\x18\x04 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\tpythonUdf\x12I\n\x10scalar_scala_udf\x18\x05 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\x0escalarScalaUdf\x12\x33\n\x08java_udf\x18\x06 \x01(\x0b\x32\x16.spark.connect.JavaUDFH\x00R\x07javaUdf\x12\x1f\n\x0bis_distinct\x18\x07 \x01(\x08R\nisDistinctB\n\n\x08\x66unction"\xcc\x01\n\tPythonUDF\x12\x38\n\x0boutput_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVer\x12/\n\x13\x61\x64\x64itional_includes\x18\x05 \x03(\tR\x12\x61\x64\x64itionalIncludes"\xd6\x01\n\x0eScalarScalaUDF\x12\x18\n\x07payload\x18\x01 \x01(\x0cR\x07payload\x12\x37\n\ninputTypes\x18\x02 \x03(\x0b\x32\x17.spark.connect.DataTypeR\ninputTypes\x12\x37\n\noutputType\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1a\n\x08nullable\x18\x04 \x01(\x08R\x08nullable\x12\x1c\n\taggregate\x18\x05 \x01(\x08R\taggregate"\x95\x01\n\x07JavaUDF\x12\x1d\n\nclass_name\x18\x01 \x01(\tR\tclassName\x12=\n\x0boutput_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\noutputType\x88\x01\x01\x12\x1c\n\taggregate\x18\x03 \x01(\x08R\taggregateB\x0e\n\x0c_output_type"c\n\x18TypedAggregateExpression\x12G\n\x10scalar_scala_udf\x18\x01 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFR\x0escalarScalaUdf"l\n\x0c\x43\x61llFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\\\n\x17NamedArgumentExpression\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\x80\x04\n\x0bMergeAction\x12\x46\n\x0b\x61\x63tion_type\x18\x01 \x01(\x0e\x32%.spark.connect.MergeAction.ActionTypeR\nactionType\x12<\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\tcondition\x88\x01\x01\x12G\n\x0b\x61ssignments\x18\x03 \x03(\x0b\x32%.spark.connect.MergeAction.AssignmentR\x0b\x61ssignments\x1aj\n\nAssignment\x12+\n\x03key\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\xa7\x01\n\nActionType\x12\x17\n\x13\x41\x43TION_TYPE_INVALID\x10\x00\x12\x16\n\x12\x41\x43TION_TYPE_DELETE\x10\x01\x12\x16\n\x12\x41\x43TION_TYPE_INSERT\x10\x02\x12\x1b\n\x17\x41\x43TION_TYPE_INSERT_STAR\x10\x03\x12\x16\n\x12\x41\x43TION_TYPE_UPDATE\x10\x04\x12\x1b\n\x17\x41\x43TION_TYPE_UPDATE_STAR\x10\x05\x42\x0c\n\n_condition"\xc5\x05\n\x12SubqueryExpression\x12\x17\n\x07plan_id\x18\x01 \x01(\x03R\x06planId\x12S\n\rsubquery_type\x18\x02 \x01(\x0e\x32..spark.connect.SubqueryExpression.SubqueryTypeR\x0csubqueryType\x12\x62\n\x11table_arg_options\x18\x03 \x01(\x0b\x32\x31.spark.connect.SubqueryExpression.TableArgOptionsH\x00R\x0ftableArgOptions\x88\x01\x01\x12G\n\x12in_subquery_values\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x10inSubqueryValues\x1a\xea\x01\n\x0fTableArgOptions\x12@\n\x0epartition_spec\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12\x37\n\x15with_single_partition\x18\x03 \x01(\x08H\x00R\x13withSinglePartition\x88\x01\x01\x42\x18\n\x16_with_single_partition"\x90\x01\n\x0cSubqueryType\x12\x19\n\x15SUBQUERY_TYPE_UNKNOWN\x10\x00\x12\x18\n\x14SUBQUERY_TYPE_SCALAR\x10\x01\x12\x18\n\x14SUBQUERY_TYPE_EXISTS\x10\x02\x12\x1b\n\x17SUBQUERY_TYPE_TABLE_ARG\x10\x03\x12\x14\n\x10SUBQUERY_TYPE_IN\x10\x04\x42\x14\n\x12_table_arg_optionsB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' ) _globals = globals() @@ -53,8 +53,12 @@ _globals[ "DESCRIPTOR" ]._serialized_options = b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated" + _globals["_EXPRESSION_LITERAL_STRUCT"].fields_by_name["struct_type"]._loaded_options = None + _globals["_EXPRESSION_LITERAL_STRUCT"].fields_by_name[ + "struct_type" + ]._serialized_options = b"\030\001" _globals["_EXPRESSION"]._serialized_start = 133 - _globals["_EXPRESSION"]._serialized_end = 7043 + _globals["_EXPRESSION"]._serialized_end = 7121 _globals["_EXPRESSION_WINDOW"]._serialized_start = 1986 _globals["_EXPRESSION_WINDOW"]._serialized_end = 2769 _globals["_EXPRESSION_WINDOW_WINDOWFRAME"]._serialized_start = 2276 @@ -74,7 +78,7 @@ _globals["_EXPRESSION_CAST_EVALMODE"]._serialized_start = 3401 _globals["_EXPRESSION_CAST_EVALMODE"]._serialized_end = 3499 _globals["_EXPRESSION_LITERAL"]._serialized_start = 3518 - _globals["_EXPRESSION_LITERAL"]._serialized_end = 5642 + _globals["_EXPRESSION_LITERAL"]._serialized_end = 5720 _globals["_EXPRESSION_LITERAL_DECIMAL"]._serialized_start = 4514 _globals["_EXPRESSION_LITERAL_DECIMAL"]._serialized_end = 4631 _globals["_EXPRESSION_LITERAL_CALENDARINTERVAL"]._serialized_start = 4633 @@ -84,57 +88,57 @@ _globals["_EXPRESSION_LITERAL_MAP"]._serialized_start = 4867 _globals["_EXPRESSION_LITERAL_MAP"]._serialized_end = 5094 _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_start = 5097 - _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_end = 5226 - _globals["_EXPRESSION_LITERAL_SPECIALIZEDARRAY"]._serialized_start = 5229 - _globals["_EXPRESSION_LITERAL_SPECIALIZEDARRAY"]._serialized_end = 5549 - _globals["_EXPRESSION_LITERAL_TIME"]._serialized_start = 5551 - _globals["_EXPRESSION_LITERAL_TIME"]._serialized_end = 5626 - _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_start = 5645 - _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_end = 5831 - _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_start = 5834 - _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_end = 6092 - _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_start = 6094 - _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_end = 6144 - _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_start = 6146 - _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_end = 6270 - _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_start = 6272 - _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_end = 6358 - _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_start = 6361 - _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_end = 6493 - _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_start = 6496 - _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_end = 6683 - _globals["_EXPRESSION_ALIAS"]._serialized_start = 6685 - _globals["_EXPRESSION_ALIAS"]._serialized_end = 6805 - _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_start = 6808 - _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_end = 6966 - _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_start = 6968 - _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_end = 7030 - _globals["_EXPRESSIONCOMMON"]._serialized_start = 7045 - _globals["_EXPRESSIONCOMMON"]._serialized_end = 7110 - _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_start = 7113 - _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_end = 7510 - _globals["_PYTHONUDF"]._serialized_start = 7513 - _globals["_PYTHONUDF"]._serialized_end = 7717 - _globals["_SCALARSCALAUDF"]._serialized_start = 7720 - _globals["_SCALARSCALAUDF"]._serialized_end = 7934 - _globals["_JAVAUDF"]._serialized_start = 7937 - _globals["_JAVAUDF"]._serialized_end = 8086 - _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_start = 8088 - _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_end = 8187 - _globals["_CALLFUNCTION"]._serialized_start = 8189 - _globals["_CALLFUNCTION"]._serialized_end = 8297 - _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_start = 8299 - _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_end = 8391 - _globals["_MERGEACTION"]._serialized_start = 8394 - _globals["_MERGEACTION"]._serialized_end = 8906 - _globals["_MERGEACTION_ASSIGNMENT"]._serialized_start = 8616 - _globals["_MERGEACTION_ASSIGNMENT"]._serialized_end = 8722 - _globals["_MERGEACTION_ACTIONTYPE"]._serialized_start = 8725 - _globals["_MERGEACTION_ACTIONTYPE"]._serialized_end = 8892 - _globals["_SUBQUERYEXPRESSION"]._serialized_start = 8909 - _globals["_SUBQUERYEXPRESSION"]._serialized_end = 9618 - _globals["_SUBQUERYEXPRESSION_TABLEARGOPTIONS"]._serialized_start = 9215 - _globals["_SUBQUERYEXPRESSION_TABLEARGOPTIONS"]._serialized_end = 9449 - _globals["_SUBQUERYEXPRESSION_SUBQUERYTYPE"]._serialized_start = 9452 - _globals["_SUBQUERYEXPRESSION_SUBQUERYTYPE"]._serialized_end = 9596 + _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_end = 5304 + _globals["_EXPRESSION_LITERAL_SPECIALIZEDARRAY"]._serialized_start = 5307 + _globals["_EXPRESSION_LITERAL_SPECIALIZEDARRAY"]._serialized_end = 5627 + _globals["_EXPRESSION_LITERAL_TIME"]._serialized_start = 5629 + _globals["_EXPRESSION_LITERAL_TIME"]._serialized_end = 5704 + _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_start = 5723 + _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_end = 5909 + _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_start = 5912 + _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_end = 6170 + _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_start = 6172 + _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_end = 6222 + _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_start = 6224 + _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_end = 6348 + _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_start = 6350 + _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_end = 6436 + _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_start = 6439 + _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_end = 6571 + _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_start = 6574 + _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_end = 6761 + _globals["_EXPRESSION_ALIAS"]._serialized_start = 6763 + _globals["_EXPRESSION_ALIAS"]._serialized_end = 6883 + _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_start = 6886 + _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_end = 7044 + _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_start = 7046 + _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_end = 7108 + _globals["_EXPRESSIONCOMMON"]._serialized_start = 7123 + _globals["_EXPRESSIONCOMMON"]._serialized_end = 7188 + _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_start = 7191 + _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_end = 7588 + _globals["_PYTHONUDF"]._serialized_start = 7591 + _globals["_PYTHONUDF"]._serialized_end = 7795 + _globals["_SCALARSCALAUDF"]._serialized_start = 7798 + _globals["_SCALARSCALAUDF"]._serialized_end = 8012 + _globals["_JAVAUDF"]._serialized_start = 8015 + _globals["_JAVAUDF"]._serialized_end = 8164 + _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_start = 8166 + _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_end = 8265 + _globals["_CALLFUNCTION"]._serialized_start = 8267 + _globals["_CALLFUNCTION"]._serialized_end = 8375 + _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_start = 8377 + _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_end = 8469 + _globals["_MERGEACTION"]._serialized_start = 8472 + _globals["_MERGEACTION"]._serialized_end = 8984 + _globals["_MERGEACTION_ASSIGNMENT"]._serialized_start = 8694 + _globals["_MERGEACTION_ASSIGNMENT"]._serialized_end = 8800 + _globals["_MERGEACTION_ACTIONTYPE"]._serialized_start = 8803 + _globals["_MERGEACTION_ACTIONTYPE"]._serialized_end = 8970 + _globals["_SUBQUERYEXPRESSION"]._serialized_start = 8987 + _globals["_SUBQUERYEXPRESSION"]._serialized_end = 9696 + _globals["_SUBQUERYEXPRESSION_TABLEARGOPTIONS"]._serialized_start = 9293 + _globals["_SUBQUERYEXPRESSION_TABLEARGOPTIONS"]._serialized_end = 9527 + _globals["_SUBQUERYEXPRESSION_SUBQUERYTYPE"]._serialized_start = 9530 + _globals["_SUBQUERYEXPRESSION_SUBQUERYTYPE"]._serialized_end = 9674 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.pyi b/python/pyspark/sql/connect/proto/expressions_pb2.pyi index 9376f5aac5c58..ad347fd4bd154 100644 --- a/python/pyspark/sql/connect/proto/expressions_pb2.pyi +++ b/python/pyspark/sql/connect/proto/expressions_pb2.pyi @@ -554,27 +554,51 @@ class Expression(google.protobuf.message.Message): STRUCT_TYPE_FIELD_NUMBER: builtins.int ELEMENTS_FIELD_NUMBER: builtins.int + DATA_TYPE_STRUCT_FIELD_NUMBER: builtins.int @property - def struct_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType: ... + def struct_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType: + """(Deprecated) The type of the struct. + + This field is deprecated since Spark 4.1+ because using DataType as the type of a struct + is ambiguous. This field should only be set if the data_type_struct field is not set. + Use data_type_struct field instead. + """ @property def elements( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ global___Expression.Literal - ]: ... + ]: + """(Required) The literal values that make up the struct elements.""" + @property + def data_type_struct(self) -> pyspark.sql.connect.proto.types_pb2.DataType.Struct: + """The type of the struct. + + Whether data_type_struct.fields.data_type should be set depends on + whether each field's type can be inferred from the elements field. + """ def __init__( self, *, struct_type: pyspark.sql.connect.proto.types_pb2.DataType | None = ..., elements: collections.abc.Iterable[global___Expression.Literal] | None = ..., + data_type_struct: pyspark.sql.connect.proto.types_pb2.DataType.Struct | None = ..., ) -> None: ... def HasField( - self, field_name: typing_extensions.Literal["struct_type", b"struct_type"] + self, + field_name: typing_extensions.Literal[ + "data_type_struct", b"data_type_struct", "struct_type", b"struct_type" + ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ - "elements", b"elements", "struct_type", b"struct_type" + "data_type_struct", + b"data_type_struct", + "elements", + b"elements", + "struct_type", + b"struct_type", ], ) -> None: ... diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ColumnNodeToProtoConverterSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ColumnNodeToProtoConverterSuite.scala index 02f0c35c44a8f..90da125b49ff0 100644 --- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ColumnNodeToProtoConverterSuite.scala +++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ColumnNodeToProtoConverterSuite.scala @@ -79,15 +79,24 @@ class ColumnNodeToProtoConverterSuite extends ConnectFunSuite { Literal((12.0, "north", 60.0, "west"), Option(dataType)), expr { b => val builder = b.getLiteralBuilder.getStructBuilder - builder.getStructTypeBuilder.getStructBuilder - .addFields(structField("_1", ProtoDataTypes.DoubleType)) - .addFields(structField("_2", stringTypeWithCollation)) - .addFields(structField("_3", ProtoDataTypes.DoubleType)) - .addFields(structField("_4", stringTypeWithCollation)) - builder.addElements(proto.Expression.Literal.newBuilder().setDouble(12.0)) - builder.addElements(proto.Expression.Literal.newBuilder().setString("north")) - builder.addElements(proto.Expression.Literal.newBuilder().setDouble(60.0)) - builder.addElements(proto.Expression.Literal.newBuilder().setString("west")) + builder + .addElements(proto.Expression.Literal.newBuilder().setDouble(12.0).build()) + builder + .addElements(proto.Expression.Literal.newBuilder().setString("north").build()) + builder + .addElements(proto.Expression.Literal.newBuilder().setDouble(60.0).build()) + builder + .addElements(proto.Expression.Literal.newBuilder().setString("west").build()) + builder.setDataTypeStruct( + proto.DataType.Struct + .newBuilder() + .addFields( + proto.DataType.StructField.newBuilder().setName("_1").setNullable(true).build()) + .addFields(structField("_2", stringTypeWithCollation)) + .addFields( + proto.DataType.StructField.newBuilder().setName("_3").setNullable(true).build()) + .addFields(structField("_4", stringTypeWithCollation)) + .build()) }) } diff --git a/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto b/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto index 78fa0041e1922..3ae6cb8dba9b5 100644 --- a/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto +++ b/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto @@ -227,8 +227,21 @@ message Expression { } message Struct { - DataType struct_type = 1; + // (Deprecated) The type of the struct. + // + // This field is deprecated since Spark 4.1+ because using DataType as the type of a struct + // is ambiguous. This field should only be set if the data_type_struct field is not set. + // Use data_type_struct field instead. + DataType struct_type = 1 [deprecated = true]; + + // (Required) The literal values that make up the struct elements. repeated Literal elements = 2; + + // The type of the struct. + // + // Whether data_type_struct.fields.data_type should be set depends on + // whether each field's type can be inferred from the elements field. + DataType.Struct data_type_struct = 3; } message SpecializedArray { diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala index 31bbfe08c8f88..4567cc10c81c8 100644 --- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala +++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala @@ -149,17 +149,36 @@ object LiteralValueProtoConverter { } def structBuilder(scalaValue: Any, structType: StructType) = { - val sb = builder.getStructBuilder.setStructType(toConnectProtoType(structType)) - val dataTypes = structType.fields.map(_.dataType) + val sb = builder.getStructBuilder + val fields = structType.fields scalaValue match { case p: Product => val iter = p.productIterator + val dataTypeStruct = proto.DataType.Struct.newBuilder() var idx = 0 while (idx < structType.size) { - sb.addElements(toLiteralProto(iter.next(), dataTypes(idx))) + val field = fields(idx) + val literalProto = toLiteralProto(iter.next(), field.dataType) + sb.addElements(literalProto) + + val fieldBuilder = dataTypeStruct + .addFieldsBuilder() + .setName(field.name) + .setNullable(field.nullable) + + if (LiteralValueProtoConverter.getInferredDataType(literalProto).isEmpty) { + fieldBuilder.setDataType(toConnectProtoType(field.dataType)) + } + + // Set metadata if available + if (field.metadata != Metadata.empty) { + fieldBuilder.setMetadata(field.metadata.json) + } + idx += 1 } + sb.setDataTypeStruct(dataTypeStruct.build()) case other => throw new IllegalArgumentException(s"literal $other not supported (yet).") } @@ -300,54 +319,101 @@ object LiteralValueProtoConverter { case proto.Expression.Literal.LiteralTypeCase.ARRAY => toCatalystArray(literal.getArray) + case proto.Expression.Literal.LiteralTypeCase.STRUCT => + toCatalystStruct(literal.getStruct)._1 + case other => throw new UnsupportedOperationException( s"Unsupported Literal Type: ${other.getNumber} (${other.name})") } } - private def getConverter(dataType: proto.DataType): proto.Expression.Literal => Any = { - if (dataType.hasShort) { v => - v.getShort.toShort - } else if (dataType.hasInteger) { v => - v.getInteger - } else if (dataType.hasLong) { v => - v.getLong - } else if (dataType.hasDouble) { v => - v.getDouble - } else if (dataType.hasByte) { v => - v.getByte.toByte - } else if (dataType.hasFloat) { v => - v.getFloat - } else if (dataType.hasBoolean) { v => - v.getBoolean - } else if (dataType.hasString) { v => - v.getString - } else if (dataType.hasBinary) { v => - v.getBinary.toByteArray - } else if (dataType.hasDate) { v => - v.getDate - } else if (dataType.hasTimestamp) { v => - v.getTimestamp - } else if (dataType.hasTimestampNtz) { v => - v.getTimestampNtz - } else if (dataType.hasDayTimeInterval) { v => - v.getDayTimeInterval - } else if (dataType.hasYearMonthInterval) { v => - v.getYearMonthInterval - } else if (dataType.hasDecimal) { v => - Decimal(v.getDecimal.getValue) - } else if (dataType.hasCalendarInterval) { v => - val interval = v.getCalendarInterval - new CalendarInterval(interval.getMonths, interval.getDays, interval.getMicroseconds) - } else if (dataType.hasArray) { v => - toCatalystArray(v.getArray) - } else if (dataType.hasMap) { v => - toCatalystMap(v.getMap) - } else if (dataType.hasStruct) { v => - toCatalystStruct(v.getStruct) - } else { - throw InvalidPlanInput(s"Unsupported Literal Type: $dataType)") + private def getConverter( + dataType: proto.DataType, + inferDataType: Boolean = false): proto.Expression.Literal => Any = { + dataType.getKindCase match { + case proto.DataType.KindCase.SHORT => v => v.getShort.toShort + case proto.DataType.KindCase.INTEGER => v => v.getInteger + case proto.DataType.KindCase.LONG => v => v.getLong + case proto.DataType.KindCase.DOUBLE => v => v.getDouble + case proto.DataType.KindCase.BYTE => v => v.getByte.toByte + case proto.DataType.KindCase.FLOAT => v => v.getFloat + case proto.DataType.KindCase.BOOLEAN => v => v.getBoolean + case proto.DataType.KindCase.STRING => v => v.getString + case proto.DataType.KindCase.BINARY => v => v.getBinary.toByteArray + case proto.DataType.KindCase.DATE => v => v.getDate + case proto.DataType.KindCase.TIMESTAMP => v => v.getTimestamp + case proto.DataType.KindCase.TIMESTAMP_NTZ => v => v.getTimestampNtz + case proto.DataType.KindCase.DAY_TIME_INTERVAL => v => v.getDayTimeInterval + case proto.DataType.KindCase.YEAR_MONTH_INTERVAL => v => v.getYearMonthInterval + case proto.DataType.KindCase.DECIMAL => v => Decimal(v.getDecimal.getValue) + case proto.DataType.KindCase.CALENDAR_INTERVAL => + v => + val interval = v.getCalendarInterval + new CalendarInterval(interval.getMonths, interval.getDays, interval.getMicroseconds) + case proto.DataType.KindCase.ARRAY => v => toCatalystArray(v.getArray) + case proto.DataType.KindCase.MAP => v => toCatalystMap(v.getMap) + case proto.DataType.KindCase.STRUCT => + if (inferDataType) { v => + val (struct, structType) = toCatalystStruct(v.getStruct, None) + LiteralValueWithDataType( + struct, + proto.DataType.newBuilder.setStruct(structType).build()) + } else { v => + toCatalystStruct(v.getStruct, Some(dataType.getStruct))._1 + } + case _ => + throw InvalidPlanInput(s"Unsupported Literal Type: $dataType)") + } + } + + private def getInferredDataType(literal: proto.Expression.Literal): Option[proto.DataType] = { + if (literal.hasNull) { + return Some(literal.getNull) + } + + val builder = proto.DataType.newBuilder() + literal.getLiteralTypeCase match { + case proto.Expression.Literal.LiteralTypeCase.BINARY => + builder.setBinary(proto.DataType.Binary.newBuilder.build()) + case proto.Expression.Literal.LiteralTypeCase.BOOLEAN => + builder.setBoolean(proto.DataType.Boolean.newBuilder.build()) + case proto.Expression.Literal.LiteralTypeCase.BYTE => + builder.setByte(proto.DataType.Byte.newBuilder.build()) + case proto.Expression.Literal.LiteralTypeCase.SHORT => + builder.setShort(proto.DataType.Short.newBuilder.build()) + case proto.Expression.Literal.LiteralTypeCase.INTEGER => + builder.setInteger(proto.DataType.Integer.newBuilder.build()) + case proto.Expression.Literal.LiteralTypeCase.LONG => + builder.setLong(proto.DataType.Long.newBuilder.build()) + case proto.Expression.Literal.LiteralTypeCase.FLOAT => + builder.setFloat(proto.DataType.Float.newBuilder.build()) + case proto.Expression.Literal.LiteralTypeCase.DOUBLE => + builder.setDouble(proto.DataType.Double.newBuilder.build()) + case proto.Expression.Literal.LiteralTypeCase.DATE => + builder.setDate(proto.DataType.Date.newBuilder.build()) + case proto.Expression.Literal.LiteralTypeCase.TIMESTAMP => + builder.setTimestamp(proto.DataType.Timestamp.newBuilder.build()) + case proto.Expression.Literal.LiteralTypeCase.TIMESTAMP_NTZ => + builder.setTimestampNtz(proto.DataType.TimestampNTZ.newBuilder.build()) + case proto.Expression.Literal.LiteralTypeCase.CALENDAR_INTERVAL => + builder.setCalendarInterval(proto.DataType.CalendarInterval.newBuilder.build()) + case proto.Expression.Literal.LiteralTypeCase.STRUCT => + // The type of the fields will be inferred from the literals of the fields in the struct. + builder.setStruct(literal.getStruct.getStructType.getStruct) + case _ => + // Not all data types support inferring the data type from the literal at the moment. + // e.g. the type of DayTimeInterval contains extra information like start_field and + // end_field and cannot be inferred from the literal. + return None + } + Some(builder.build()) + } + + private def getInferredDataTypeOrThrow(literal: proto.Expression.Literal): proto.DataType = { + getInferredDataType(literal).getOrElse { + throw InvalidPlanInput( + s"Unsupported Literal type for data type inference: ${literal.getLiteralTypeCase}") } } @@ -386,7 +452,9 @@ object LiteralValueProtoConverter { makeMapData(getConverter(map.getKeyType), getConverter(map.getValueType)) } - def toCatalystStruct(struct: proto.Expression.Literal.Struct): Any = { + def toCatalystStruct( + struct: proto.Expression.Literal.Struct, + structTypeOpt: Option[proto.DataType.Struct] = None): (Any, proto.DataType.Struct) = { def toTuple[A <: Object](data: Seq[A]): Product = { try { val tupleClass = SparkClassUtils.classForName(s"scala.Tuple${data.length}") @@ -397,16 +465,78 @@ object LiteralValueProtoConverter { } } - val elements = struct.getElementsList.asScala - val dataTypes = struct.getStructType.getStruct.getFieldsList.asScala.map(_.getDataType) - val structData = elements - .zip(dataTypes) - .map { case (element, dataType) => - getConverter(dataType)(element) + if (struct.hasDataTypeStruct) { + // The new way to define and convert structs. + val (structData, structType) = if (structTypeOpt.isDefined) { + val structFields = structTypeOpt.get.getFieldsList.asScala + val structData = + struct.getElementsList.asScala.zip(structFields).map { case (element, structField) => + getConverter(structField.getDataType)(element) + } + (structData, structTypeOpt.get) + } else { + def protoStructField( + name: String, + dataType: proto.DataType, + nullable: Boolean, + metadata: Option[String]): proto.DataType.StructField = { + val builder = proto.DataType.StructField + .newBuilder() + .setName(name) + .setDataType(dataType) + .setNullable(nullable) + metadata.foreach(builder.setMetadata) + builder.build() + } + + val dataTypeFields = struct.getDataTypeStruct.getFieldsList.asScala + + val structDataAndFields = struct.getElementsList.asScala.zip(dataTypeFields).map { + case (element, dataTypeField) => + if (dataTypeField.hasDataType) { + (getConverter(dataTypeField.getDataType)(element), dataTypeField) + } else { + val outerDataType = getInferredDataTypeOrThrow(element) + val (value, dataType) = + getConverter(outerDataType, inferDataType = true)(element) match { + case LiteralValueWithDataType(value, dataType) => (value, dataType) + case value => (value, outerDataType) + } + ( + value, + protoStructField( + dataTypeField.getName, + dataType, + dataTypeField.getNullable, + if (dataTypeField.hasMetadata) Some(dataTypeField.getMetadata) else None)) + } + } + + val structType = proto.DataType.Struct + .newBuilder() + .addAllFields(structDataAndFields.map(_._2).asJava) + .build() + + (structDataAndFields.map(_._1), structType) } - .asInstanceOf[scala.collection.Seq[Object]] - .toSeq + (toTuple(structData.toSeq.asInstanceOf[Seq[Object]]), structType) + } else if (struct.hasStructType) { + // For backward compatibility, we still support the old way to define and convert structs. + val elements = struct.getElementsList.asScala + val dataTypes = struct.getStructType.getStruct.getFieldsList.asScala.map(_.getDataType) + val structData = elements + .zip(dataTypes) + .map { case (element, dataType) => + getConverter(dataType)(element) + } + .asInstanceOf[scala.collection.Seq[Object]] + .toSeq - toTuple(structData) + (toTuple(structData), struct.getStructType.getStruct) + } else { + throw InvalidPlanInput("Data type information is missing in the struct literal.") + } } + + private case class LiteralValueWithDataType(value: Any, dataType: proto.DataType) } diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json index fa1352557a583..bd9d6bb3c8bb7 100644 --- a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json +++ b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json @@ -807,38 +807,28 @@ }, { "literal": { "struct": { - "structType": { - "struct": { - "fields": [{ - "name": "_1", - "dataType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "nullable": true - }, { - "name": "_2", - "dataType": { - "integer": { - } - } - }, { - "name": "_3", - "dataType": { - "double": { - } - } - }] - } - }, "elements": [{ "string": "a" }, { "integer": 2 }, { "double": 1.0 - }] + }], + "dataTypeStruct": { + "fields": [{ + "name": "_1", + "dataType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "nullable": true + }, { + "name": "_2" + }, { + "name": "_3" + }] + } } }, "common": { @@ -1295,71 +1285,6 @@ }, { "literal": { "struct": { - "structType": { - "struct": { - "fields": [{ - "name": "_1", - "dataType": { - "array": { - "elementType": { - "integer": { - } - } - } - }, - "nullable": true - }, { - "name": "_2", - "dataType": { - "map": { - "keyType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "valueType": { - "integer": { - } - } - } - }, - "nullable": true - }, { - "name": "_3", - "dataType": { - "struct": { - "fields": [{ - "name": "_1", - "dataType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "nullable": true - }, { - "name": "_2", - "dataType": { - "map": { - "keyType": { - "integer": { - } - }, - "valueType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "valueContainsNull": true - } - }, - "nullable": true - }] - } - }, - "nullable": true - }] - } - }, "elements": [{ "array": { "elementType": { @@ -1398,36 +1323,6 @@ } }, { "struct": { - "structType": { - "struct": { - "fields": [{ - "name": "_1", - "dataType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "nullable": true - }, { - "name": "_2", - "dataType": { - "map": { - "keyType": { - "integer": { - } - }, - "valueType": { - "string": { - "collation": "UTF8_BINARY" - } - }, - "valueContainsNull": true - } - }, - "nullable": true - }] - } - }, "elements": [{ "string": "a" }, { @@ -1452,9 +1347,70 @@ "string": "b" }] } - }] + }], + "dataTypeStruct": { + "fields": [{ + "name": "_1", + "dataType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "nullable": true + }, { + "name": "_2", + "dataType": { + "map": { + "keyType": { + "integer": { + } + }, + "valueType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueContainsNull": true + } + }, + "nullable": true + }] + } } - }] + }], + "dataTypeStruct": { + "fields": [{ + "name": "_1", + "dataType": { + "array": { + "elementType": { + "integer": { + } + } + } + }, + "nullable": true + }, { + "name": "_2", + "dataType": { + "map": { + "keyType": { + "string": { + "collation": "UTF8_BINARY" + } + }, + "valueType": { + "integer": { + } + } + } + }, + "nullable": true + }, { + "name": "_3", + "nullable": true + }] + } } }, "common": { diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin index dca6c588cb26e..da3a4a946d210 100644 Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin differ diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverter.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverter.scala index ab7b56a9b74c4..10f046a57da92 100644 --- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverter.scala +++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverter.scala @@ -117,8 +117,9 @@ object LiteralExpressionProtoConverter { DataTypeProtoConverter.toCatalystType(lit.getMap.getValueType))) case proto.Expression.Literal.LiteralTypeCase.STRUCT => - val dataType = DataTypeProtoConverter.toCatalystType(lit.getStruct.getStructType) - val structData = LiteralValueProtoConverter.toCatalystStruct(lit.getStruct) + val (structData, structType) = LiteralValueProtoConverter.toCatalystStruct(lit.getStruct) + val dataType = DataTypeProtoConverter.toCatalystType( + proto.DataType.newBuilder.setStruct(structType).build()) val convert = CatalystTypeConverters.createToCatalystConverter(dataType) expressions.Literal(convert(structData), dataType) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala index 79ef8decb310a..559984e47cf8b 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala @@ -19,7 +19,9 @@ package org.apache.spark.sql.connect.planner import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite +import org.apache.spark.connect.proto import org.apache.spark.sql.connect.common.LiteralValueProtoConverter +import org.apache.spark.sql.types._ class LiteralExpressionProtoConverterSuite extends AnyFunSuite { // scalastyle:ignore funsuite @@ -30,4 +32,135 @@ class LiteralExpressionProtoConverterSuite extends AnyFunSuite { // scalastyle:i LiteralValueProtoConverter.toCatalystValue(LiteralValueProtoConverter.toLiteralProto(v))) } } + + Seq( + ( + (1, "string", true), + StructType( + Seq( + StructField("a", IntegerType), + StructField("b", StringType), + StructField("c", BooleanType)))), + ( + Array((1, "string", true), (2, "string", false), (3, "string", true)), + ArrayType( + StructType( + Seq( + StructField("a", IntegerType), + StructField("b", StringType), + StructField("c", BooleanType))))), + ( + (1, (2, 3)), + StructType( + Seq( + StructField("a", IntegerType), + StructField( + "b", + StructType( + Seq(StructField("c", IntegerType), StructField("d", IntegerType)))))))).zipWithIndex + .foreach { case ((v, t), idx) => + test(s"complex proto value and catalyst value conversion #$idx") { + assertResult(v)( + LiteralValueProtoConverter.toCatalystValue( + LiteralValueProtoConverter.toLiteralProto(v, t))) + } + } + + test("backward compatibility for struct literal proto") { + // Test the old way of defining structs with structType field and elements + val structTypeProto = proto.DataType.Struct + .newBuilder() + .addFields( + proto.DataType.StructField + .newBuilder() + .setName("a") + .setDataType(proto.DataType + .newBuilder() + .setInteger(proto.DataType.Integer.newBuilder()) + .build()) + .setNullable(true) + .build()) + .addFields( + proto.DataType.StructField + .newBuilder() + .setName("b") + .setDataType(proto.DataType + .newBuilder() + .setString(proto.DataType.String.newBuilder()) + .build()) + .setNullable(false) + .build()) + .build() + + val structProto = proto.Expression.Literal.Struct + .newBuilder() + .setStructType(proto.DataType.newBuilder().setStruct(structTypeProto).build()) + .addElements(LiteralValueProtoConverter.toLiteralProto(1)) + .addElements(LiteralValueProtoConverter.toLiteralProto("test")) + .build() + + val (result, resultType) = LiteralValueProtoConverter.toCatalystStruct(structProto) + + // Verify the result is a tuple with correct values + assert(result.isInstanceOf[Product]) + val product = result.asInstanceOf[Product] + assert(product.productArity == 2) + assert(product.productElement(0) == 1) + assert(product.productElement(1) == "test") + + // Verify the returned struct type matches the original + assert(resultType.getFieldsCount == 2) + assert(resultType.getFields(0).getName == "a") + assert(resultType.getFields(0).getDataType.hasInteger) + assert(resultType.getFields(0).getNullable) + assert(resultType.getFields(1).getName == "b") + assert(resultType.getFields(1).getDataType.hasString) + assert(!resultType.getFields(1).getNullable) + } + + test("data types of struct fields are not set for inferable types") { + val literalProto = LiteralValueProtoConverter.toLiteralProto( + (1, 2.0, true, (1, 2)), + StructType( + Seq( + StructField("a", IntegerType), + StructField("b", DoubleType), + StructField("c", BooleanType), + StructField( + "d", + StructType(Seq(StructField("e", IntegerType), StructField("f", IntegerType))))))) + assert(!literalProto.getStruct.getDataTypeStruct.getFieldsList.get(0).hasDataType) + assert(!literalProto.getStruct.getDataTypeStruct.getFieldsList.get(1).hasDataType) + assert(!literalProto.getStruct.getDataTypeStruct.getFieldsList.get(2).hasDataType) + assert(!literalProto.getStruct.getDataTypeStruct.getFieldsList.get(3).hasDataType) + } + + test("data types of struct fields are set for non-inferable types") { + val literalProto = LiteralValueProtoConverter.toLiteralProto( + ("string", Decimal(1)), + StructType(Seq(StructField("a", StringType), StructField("b", DecimalType(10, 2))))) + assert(literalProto.getStruct.getDataTypeStruct.getFieldsList.get(0).hasDataType) + assert(literalProto.getStruct.getDataTypeStruct.getFieldsList.get(1).hasDataType) + } + + test("nullable and metadata fields are set for struct literal proto") { + val literalProto = LiteralValueProtoConverter.toLiteralProto( + ("string", Decimal(1)), + StructType(Seq( + StructField("a", StringType, nullable = true, Metadata.fromJson("""{"key": "value"}""")), + StructField("b", DecimalType(10, 2), nullable = false)))) + val structFields = literalProto.getStruct.getDataTypeStruct.getFieldsList + assert(structFields.get(0).getNullable) + assert(structFields.get(0).hasMetadata) + assert(structFields.get(0).getMetadata == """{"key":"value"}""") + assert(!structFields.get(1).getNullable) + assert(!structFields.get(1).hasMetadata) + + val (_, structTypeProto) = LiteralValueProtoConverter.toCatalystStruct(literalProto.getStruct) + assert(structTypeProto.getFieldsList.get(0).getNullable) + assert(structTypeProto.getFieldsList.get(0).hasMetadata) + assert(structTypeProto.getFieldsList.get(0).getMetadata == """{"key":"value"}""") + assert(!structTypeProto.getFieldsList.get(1).getNullable) + assert(!structTypeProto.getFieldsList.get(1).hasMetadata) + } }