diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py b/python/pyspark/sql/connect/proto/relations_pb2.py
index 3774bcbdbfb0e..9f008b756de22 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.py
+++ b/python/pyspark/sql/connect/proto/relations_pb2.py
@@ -43,7 +43,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto\x1a\x1aspark/connect/common.proto\x1a\x1dspark/connect/ml_common.proto"\x9c\x1d\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x45\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00R\rwithWatermark\x12\x63\n\x1a\x61pply_in_pandas_with_state\x18" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00R\x16\x61pplyInPandasWithState\x12<\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00R\nhtmlString\x12X\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32".spark.connect.CachedLocalRelationH\x00R\x13\x63\x61\x63hedLocalRelation\x12[\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00R\x14\x63\x61\x63hedRemoteRelation\x12\x8e\x01\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R$commonInlineUserDefinedTableFunction\x12\x37\n\nas_of_join\x18\' \x01(\x0b\x32\x17.spark.connect.AsOfJoinH\x00R\x08\x61sOfJoin\x12\x85\x01\n&common_inline_user_defined_data_source\x18( \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R!commonInlineUserDefinedDataSource\x12\x45\n\x0ewith_relations\x18) \x01(\x0b\x32\x1c.spark.connect.WithRelationsH\x00R\rwithRelations\x12\x38\n\ttranspose\x18* \x01(\x0b\x32\x18.spark.connect.TransposeH\x00R\ttranspose\x12w\n unresolved_table_valued_function\x18+ \x01(\x0b\x32,.spark.connect.UnresolvedTableValuedFunctionH\x00R\x1dunresolvedTableValuedFunction\x12?\n\x0clateral_join\x18, \x01(\x0b\x32\x1a.spark.connect.LateralJoinH\x00R\x0blateralJoin\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12=\n\x0bml_relation\x18\xac\x02 \x01(\x0b\x32\x19.spark.connect.MlRelationH\x00R\nmlRelation\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\xe4\x03\n\nMlRelation\x12\x43\n\ttransform\x18\x01 \x01(\x0b\x32#.spark.connect.MlRelation.TransformH\x00R\ttransform\x12,\n\x05\x66\x65tch\x18\x02 \x01(\x0b\x32\x14.spark.connect.FetchH\x00R\x05\x66\x65tch\x12P\n\x15model_summary_dataset\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationH\x01R\x13modelSummaryDataset\x88\x01\x01\x1a\xeb\x01\n\tTransform\x12\x33\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefH\x00R\x06objRef\x12=\n\x0btransformer\x18\x02 \x01(\x0b\x32\x19.spark.connect.MlOperatorH\x00R\x0btransformer\x12-\n\x05input\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06params\x18\x04 \x01(\x0b\x32\x17.spark.connect.MlParamsR\x06paramsB\n\n\x08operatorB\t\n\x07ml_typeB\x18\n\x16_model_summary_dataset"\xcb\x02\n\x05\x46\x65tch\x12\x31\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefR\x06objRef\x12\x35\n\x07methods\x18\x02 \x03(\x0b\x32\x1b.spark.connect.Fetch.MethodR\x07methods\x1a\xd7\x01\n\x06Method\x12\x16\n\x06method\x18\x01 \x01(\tR\x06method\x12\x34\n\x04\x61rgs\x18\x02 \x03(\x0b\x32 .spark.connect.Fetch.Method.ArgsR\x04\x61rgs\x1a\x7f\n\x04\x41rgs\x12\x39\n\x05param\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x05param\x12/\n\x05input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x05inputB\x0b\n\targs_type"\t\n\x07Unknown"\x8e\x01\n\x0eRelationCommon\x12#\n\x0bsource_info\x18\x01 \x01(\tB\x02\x18\x01R\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12-\n\x06origin\x18\x03 \x01(\x0b\x32\x15.spark.connect.OriginR\x06originB\n\n\x08_plan_id"\xde\x03\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x34\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12O\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32&.spark.connect.SQL.NamedArgumentsEntryR\x0enamedArguments\x12>\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cposArguments\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"u\n\rWithRelations\x12+\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04root\x12\x37\n\nreferences\x18\x02 \x03(\x0b\x32\x17.spark.connect.RelationR\nreferences"\x97\x05\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\x95\x05\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns\x12K\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00R\x0cjoinDataType\x88\x01\x01\x1a\\\n\x0cJoinDataType\x12$\n\x0eis_left_struct\x18\x01 \x01(\x08R\x0cisLeftStruct\x12&\n\x0fis_right_struct\x18\x02 \x01(\x08R\risRightStruct"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xfe\x05\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x12J\n\rgrouping_sets\x18\x06 \x03(\x0b\x32%.spark.connect.Aggregate.GroupingSetsR\x0cgroupingSets\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1aL\n\x0cGroupingSets\x12<\n\x0cgrouping_set\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0bgroupingSet"\x9f\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04\x12\x1c\n\x18GROUP_TYPE_GROUPING_SETS\x10\x05"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xf0\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x12.\n\x10within_watermark\x18\x04 \x01(\x08H\x01R\x0fwithinWatermark\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"H\n\x13\x43\x61\x63hedLocalRelation\x12\x12\n\x04hash\x18\x03 \x01(\tR\x04hashJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId"7\n\x14\x43\x61\x63hedRemoteRelation\x12\x1f\n\x0brelation_id\x18\x01 \x01(\tR\nrelationId"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"r\n\nHtmlString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xfe\x02\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12i\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryB\x02\x18\x01R\x10renameColumnsMap\x12\x42\n\x07renames\x18\x03 \x03(\x0b\x32(.spark.connect.WithColumnsRenamed.RenameR\x07renames\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x45\n\x06Rename\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12 \n\x0cnew_col_name\x18\x02 \x01(\tR\nnewColName"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x86\x01\n\rWithWatermark\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\nevent_time\x18\x02 \x01(\tR\teventTime\x12\'\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\tR\x0e\x64\x65layThreshold"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"z\n\tTranspose\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12>\n\rindex_columns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cindexColumns"}\n\x1dUnresolvedTableValuedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\xe8\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12"\n\nis_barrier\x18\x03 \x01(\x08H\x00R\tisBarrier\x88\x01\x01\x12"\n\nprofile_id\x18\x04 \x01(\x05H\x01R\tprofileId\x88\x01\x01\x42\r\n\x0b_is_barrierB\r\n\x0b_profile_id"\xd2\x06\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12J\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x12sortingExpressions\x12<\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.RelationR\x0cinitialInput\x12[\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1ainitialGroupingExpressions\x12;\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00R\x14isMapGroupsWithState\x88\x01\x01\x12$\n\x0boutput_mode\x18\x08 \x01(\tH\x01R\noutputMode\x88\x01\x01\x12&\n\x0ctimeout_conf\x18\t \x01(\tH\x02R\x0btimeoutConf\x88\x01\x01\x12?\n\x0cstate_schema\x18\n \x01(\x0b\x32\x17.spark.connect.DataTypeH\x03R\x0bstateSchema\x88\x01\x01\x12\x65\n\x19transform_with_state_info\x18\x0b \x01(\x0b\x32%.spark.connect.TransformWithStateInfoH\x04R\x16transformWithStateInfo\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_confB\x0f\n\r_state_schemaB\x1c\n\x1a_transform_with_state_info"\xdf\x01\n\x16TransformWithStateInfo\x12\x1b\n\ttime_mode\x18\x01 \x01(\tR\x08timeMode\x12\x38\n\x16\x65vent_time_column_name\x18\x02 \x01(\tH\x00R\x13\x65ventTimeColumnName\x88\x01\x01\x12\x41\n\routput_schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x01R\x0coutputSchema\x88\x01\x01\x42\x19\n\x17_event_time_column_nameB\x10\n\x0e_output_schema"\x8e\x04\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12U\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17inputSortingExpressions\x12U\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17otherSortingExpressions"\xe5\x02\n\x16\x41pplyInPandasWithState\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12#\n\routput_schema\x18\x04 \x01(\tR\x0coutputSchema\x12!\n\x0cstate_schema\x18\x05 \x01(\tR\x0bstateSchema\x12\x1f\n\x0boutput_mode\x18\x06 \x01(\tR\noutputMode\x12!\n\x0ctimeout_conf\x18\x07 \x01(\tR\x0btimeoutConf"\xf4\x01\n$CommonInlineUserDefinedTableFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12<\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00R\npythonUdtfB\n\n\x08\x66unction"\xb1\x01\n\nPythonUDTF\x12=\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\nreturnType\x88\x01\x01\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVerB\x0e\n\x0c_return_type"\x97\x01\n!CommonInlineUserDefinedDataSource\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12O\n\x12python_data_source\x18\x02 \x01(\x0b\x32\x1f.spark.connect.PythonDataSourceH\x00R\x10pythonDataSourceB\r\n\x0b\x64\x61ta_source"K\n\x10PythonDataSource\x12\x18\n\x07\x63ommand\x18\x01 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x02 \x01(\tR\tpythonVer"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schema"\xdb\x03\n\x08\x41sOfJoin\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12\x37\n\nleft_as_of\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08leftAsOf\x12\x39\n\x0bright_as_of\x18\x04 \x01(\x0b\x32\x19.spark.connect.ExpressionR\trightAsOf\x12\x36\n\tjoin_expr\x18\x05 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08joinExpr\x12#\n\rusing_columns\x18\x06 \x03(\tR\x0cusingColumns\x12\x1b\n\tjoin_type\x18\x07 \x01(\tR\x08joinType\x12\x37\n\ttolerance\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\ttolerance\x12.\n\x13\x61llow_exact_matches\x18\t \x01(\x08R\x11\x61llowExactMatches\x12\x1c\n\tdirection\x18\n \x01(\tR\tdirection"\xe6\x01\n\x0bLateralJoin\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinTypeB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto\x1a\x1aspark/connect/common.proto\x1a\x1dspark/connect/ml_common.proto"\xcc\x1d\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x45\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00R\rwithWatermark\x12\x63\n\x1a\x61pply_in_pandas_with_state\x18" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00R\x16\x61pplyInPandasWithState\x12<\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00R\nhtmlString\x12X\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32".spark.connect.CachedLocalRelationH\x00R\x13\x63\x61\x63hedLocalRelation\x12[\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00R\x14\x63\x61\x63hedRemoteRelation\x12\x8e\x01\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R$commonInlineUserDefinedTableFunction\x12\x37\n\nas_of_join\x18\' \x01(\x0b\x32\x17.spark.connect.AsOfJoinH\x00R\x08\x61sOfJoin\x12\x85\x01\n&common_inline_user_defined_data_source\x18( \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R!commonInlineUserDefinedDataSource\x12\x45\n\x0ewith_relations\x18) \x01(\x0b\x32\x1c.spark.connect.WithRelationsH\x00R\rwithRelations\x12\x38\n\ttranspose\x18* \x01(\x0b\x32\x18.spark.connect.TransposeH\x00R\ttranspose\x12w\n unresolved_table_valued_function\x18+ \x01(\x0b\x32,.spark.connect.UnresolvedTableValuedFunctionH\x00R\x1dunresolvedTableValuedFunction\x12?\n\x0clateral_join\x18, \x01(\x0b\x32\x1a.spark.connect.LateralJoinH\x00R\x0blateralJoin\x12.\n\x12referenced_plan_id\x18- \x01(\x03H\x00R\x10referencedPlanId\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12=\n\x0bml_relation\x18\xac\x02 \x01(\x0b\x32\x19.spark.connect.MlRelationH\x00R\nmlRelation\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\xe4\x03\n\nMlRelation\x12\x43\n\ttransform\x18\x01 \x01(\x0b\x32#.spark.connect.MlRelation.TransformH\x00R\ttransform\x12,\n\x05\x66\x65tch\x18\x02 \x01(\x0b\x32\x14.spark.connect.FetchH\x00R\x05\x66\x65tch\x12P\n\x15model_summary_dataset\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationH\x01R\x13modelSummaryDataset\x88\x01\x01\x1a\xeb\x01\n\tTransform\x12\x33\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefH\x00R\x06objRef\x12=\n\x0btransformer\x18\x02 \x01(\x0b\x32\x19.spark.connect.MlOperatorH\x00R\x0btransformer\x12-\n\x05input\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06params\x18\x04 \x01(\x0b\x32\x17.spark.connect.MlParamsR\x06paramsB\n\n\x08operatorB\t\n\x07ml_typeB\x18\n\x16_model_summary_dataset"\xcb\x02\n\x05\x46\x65tch\x12\x31\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefR\x06objRef\x12\x35\n\x07methods\x18\x02 \x03(\x0b\x32\x1b.spark.connect.Fetch.MethodR\x07methods\x1a\xd7\x01\n\x06Method\x12\x16\n\x06method\x18\x01 \x01(\tR\x06method\x12\x34\n\x04\x61rgs\x18\x02 \x03(\x0b\x32 .spark.connect.Fetch.Method.ArgsR\x04\x61rgs\x1a\x7f\n\x04\x41rgs\x12\x39\n\x05param\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x05param\x12/\n\x05input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x05inputB\x0b\n\targs_type"\t\n\x07Unknown"\x8e\x01\n\x0eRelationCommon\x12#\n\x0bsource_info\x18\x01 \x01(\tB\x02\x18\x01R\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12-\n\x06origin\x18\x03 \x01(\x0b\x32\x15.spark.connect.OriginR\x06originB\n\n\x08_plan_id"\xde\x03\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x34\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12O\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32&.spark.connect.SQL.NamedArgumentsEntryR\x0enamedArguments\x12>\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cposArguments\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"u\n\rWithRelations\x12+\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04root\x12\x37\n\nreferences\x18\x02 \x03(\x0b\x32\x17.spark.connect.RelationR\nreferences"\x97\x05\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\x95\x05\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns\x12K\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00R\x0cjoinDataType\x88\x01\x01\x1a\\\n\x0cJoinDataType\x12$\n\x0eis_left_struct\x18\x01 \x01(\x08R\x0cisLeftStruct\x12&\n\x0fis_right_struct\x18\x02 \x01(\x08R\risRightStruct"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xfe\x05\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x12J\n\rgrouping_sets\x18\x06 \x03(\x0b\x32%.spark.connect.Aggregate.GroupingSetsR\x0cgroupingSets\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1aL\n\x0cGroupingSets\x12<\n\x0cgrouping_set\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0bgroupingSet"\x9f\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04\x12\x1c\n\x18GROUP_TYPE_GROUPING_SETS\x10\x05"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xf0\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x12.\n\x10within_watermark\x18\x04 \x01(\x08H\x01R\x0fwithinWatermark\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"H\n\x13\x43\x61\x63hedLocalRelation\x12\x12\n\x04hash\x18\x03 \x01(\tR\x04hashJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId"7\n\x14\x43\x61\x63hedRemoteRelation\x12\x1f\n\x0brelation_id\x18\x01 \x01(\tR\nrelationId"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"r\n\nHtmlString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xfe\x02\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12i\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryB\x02\x18\x01R\x10renameColumnsMap\x12\x42\n\x07renames\x18\x03 \x03(\x0b\x32(.spark.connect.WithColumnsRenamed.RenameR\x07renames\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x45\n\x06Rename\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12 \n\x0cnew_col_name\x18\x02 \x01(\tR\nnewColName"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x86\x01\n\rWithWatermark\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\nevent_time\x18\x02 \x01(\tR\teventTime\x12\'\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\tR\x0e\x64\x65layThreshold"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"z\n\tTranspose\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12>\n\rindex_columns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cindexColumns"}\n\x1dUnresolvedTableValuedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\xe8\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12"\n\nis_barrier\x18\x03 \x01(\x08H\x00R\tisBarrier\x88\x01\x01\x12"\n\nprofile_id\x18\x04 \x01(\x05H\x01R\tprofileId\x88\x01\x01\x42\r\n\x0b_is_barrierB\r\n\x0b_profile_id"\xd2\x06\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12J\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x12sortingExpressions\x12<\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.RelationR\x0cinitialInput\x12[\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1ainitialGroupingExpressions\x12;\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00R\x14isMapGroupsWithState\x88\x01\x01\x12$\n\x0boutput_mode\x18\x08 \x01(\tH\x01R\noutputMode\x88\x01\x01\x12&\n\x0ctimeout_conf\x18\t \x01(\tH\x02R\x0btimeoutConf\x88\x01\x01\x12?\n\x0cstate_schema\x18\n \x01(\x0b\x32\x17.spark.connect.DataTypeH\x03R\x0bstateSchema\x88\x01\x01\x12\x65\n\x19transform_with_state_info\x18\x0b \x01(\x0b\x32%.spark.connect.TransformWithStateInfoH\x04R\x16transformWithStateInfo\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_confB\x0f\n\r_state_schemaB\x1c\n\x1a_transform_with_state_info"\xdf\x01\n\x16TransformWithStateInfo\x12\x1b\n\ttime_mode\x18\x01 \x01(\tR\x08timeMode\x12\x38\n\x16\x65vent_time_column_name\x18\x02 \x01(\tH\x00R\x13\x65ventTimeColumnName\x88\x01\x01\x12\x41\n\routput_schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x01R\x0coutputSchema\x88\x01\x01\x42\x19\n\x17_event_time_column_nameB\x10\n\x0e_output_schema"\x8e\x04\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12U\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17inputSortingExpressions\x12U\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17otherSortingExpressions"\xe5\x02\n\x16\x41pplyInPandasWithState\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12#\n\routput_schema\x18\x04 \x01(\tR\x0coutputSchema\x12!\n\x0cstate_schema\x18\x05 \x01(\tR\x0bstateSchema\x12\x1f\n\x0boutput_mode\x18\x06 \x01(\tR\noutputMode\x12!\n\x0ctimeout_conf\x18\x07 \x01(\tR\x0btimeoutConf"\xf4\x01\n$CommonInlineUserDefinedTableFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12<\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00R\npythonUdtfB\n\n\x08\x66unction"\xb1\x01\n\nPythonUDTF\x12=\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\nreturnType\x88\x01\x01\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVerB\x0e\n\x0c_return_type"\x97\x01\n!CommonInlineUserDefinedDataSource\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12O\n\x12python_data_source\x18\x02 \x01(\x0b\x32\x1f.spark.connect.PythonDataSourceH\x00R\x10pythonDataSourceB\r\n\x0b\x64\x61ta_source"K\n\x10PythonDataSource\x12\x18\n\x07\x63ommand\x18\x01 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x02 \x01(\tR\tpythonVer"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schema"\xdb\x03\n\x08\x41sOfJoin\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12\x37\n\nleft_as_of\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08leftAsOf\x12\x39\n\x0bright_as_of\x18\x04 \x01(\x0b\x32\x19.spark.connect.ExpressionR\trightAsOf\x12\x36\n\tjoin_expr\x18\x05 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08joinExpr\x12#\n\rusing_columns\x18\x06 \x03(\tR\x0cusingColumns\x12\x1b\n\tjoin_type\x18\x07 \x01(\tR\x08joinType\x12\x37\n\ttolerance\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\ttolerance\x12.\n\x13\x61llow_exact_matches\x18\t \x01(\x08R\x11\x61llowExactMatches\x12\x1c\n\tdirection\x18\n \x01(\tR\tdirection"\xe6\x01\n\x0bLateralJoin\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinTypeB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
 _globals = globals()
@@ -79,171 +79,171 @@
     _globals["_PARSE_OPTIONSENTRY"]._loaded_options = None
     _globals["_PARSE_OPTIONSENTRY"]._serialized_options = b"8\001"
     _globals["_RELATION"]._serialized_start = 224
-    _globals["_RELATION"]._serialized_end = 3964
-    _globals["_MLRELATION"]._serialized_start = 3967
-    _globals["_MLRELATION"]._serialized_end = 4451
-    _globals["_MLRELATION_TRANSFORM"]._serialized_start = 4179
-    _globals["_MLRELATION_TRANSFORM"]._serialized_end = 4414
-    _globals["_FETCH"]._serialized_start = 4454
-    _globals["_FETCH"]._serialized_end = 4785
-    _globals["_FETCH_METHOD"]._serialized_start = 4570
-    _globals["_FETCH_METHOD"]._serialized_end = 4785
-    _globals["_FETCH_METHOD_ARGS"]._serialized_start = 4658
-    _globals["_FETCH_METHOD_ARGS"]._serialized_end = 4785
-    _globals["_UNKNOWN"]._serialized_start = 4787
-    _globals["_UNKNOWN"]._serialized_end = 4796
-    _globals["_RELATIONCOMMON"]._serialized_start = 4799
-    _globals["_RELATIONCOMMON"]._serialized_end = 4941
-    _globals["_SQL"]._serialized_start = 4944
-    _globals["_SQL"]._serialized_end = 5422
-    _globals["_SQL_ARGSENTRY"]._serialized_start = 5238
-    _globals["_SQL_ARGSENTRY"]._serialized_end = 5328
-    _globals["_SQL_NAMEDARGUMENTSENTRY"]._serialized_start = 5330
-    _globals["_SQL_NAMEDARGUMENTSENTRY"]._serialized_end = 5422
-    _globals["_WITHRELATIONS"]._serialized_start = 5424
-    _globals["_WITHRELATIONS"]._serialized_end = 5541
-    _globals["_READ"]._serialized_start = 5544
-    _globals["_READ"]._serialized_end = 6207
-    _globals["_READ_NAMEDTABLE"]._serialized_start = 5722
-    _globals["_READ_NAMEDTABLE"]._serialized_end = 5914
-    _globals["_READ_NAMEDTABLE_OPTIONSENTRY"]._serialized_start = 5856
-    _globals["_READ_NAMEDTABLE_OPTIONSENTRY"]._serialized_end = 5914
-    _globals["_READ_DATASOURCE"]._serialized_start = 5917
-    _globals["_READ_DATASOURCE"]._serialized_end = 6194
-    _globals["_READ_DATASOURCE_OPTIONSENTRY"]._serialized_start = 5856
-    _globals["_READ_DATASOURCE_OPTIONSENTRY"]._serialized_end = 5914
-    _globals["_PROJECT"]._serialized_start = 6209
-    _globals["_PROJECT"]._serialized_end = 6326
-    _globals["_FILTER"]._serialized_start = 6328
-    _globals["_FILTER"]._serialized_end = 6440
-    _globals["_JOIN"]._serialized_start = 6443
-    _globals["_JOIN"]._serialized_end = 7104
-    _globals["_JOIN_JOINDATATYPE"]._serialized_start = 6782
-    _globals["_JOIN_JOINDATATYPE"]._serialized_end = 6874
-    _globals["_JOIN_JOINTYPE"]._serialized_start = 6877
-    _globals["_JOIN_JOINTYPE"]._serialized_end = 7085
-    _globals["_SETOPERATION"]._serialized_start = 7107
-    _globals["_SETOPERATION"]._serialized_end = 7586
-    _globals["_SETOPERATION_SETOPTYPE"]._serialized_start = 7423
-    _globals["_SETOPERATION_SETOPTYPE"]._serialized_end = 7537
-    _globals["_LIMIT"]._serialized_start = 7588
-    _globals["_LIMIT"]._serialized_end = 7664
-    _globals["_OFFSET"]._serialized_start = 7666
-    _globals["_OFFSET"]._serialized_end = 7745
-    _globals["_TAIL"]._serialized_start = 7747
-    _globals["_TAIL"]._serialized_end = 7822
-    _globals["_AGGREGATE"]._serialized_start = 7825
-    _globals["_AGGREGATE"]._serialized_end = 8591
-    _globals["_AGGREGATE_PIVOT"]._serialized_start = 8240
-    _globals["_AGGREGATE_PIVOT"]._serialized_end = 8351
-    _globals["_AGGREGATE_GROUPINGSETS"]._serialized_start = 8353
-    _globals["_AGGREGATE_GROUPINGSETS"]._serialized_end = 8429
-    _globals["_AGGREGATE_GROUPTYPE"]._serialized_start = 8432
-    _globals["_AGGREGATE_GROUPTYPE"]._serialized_end = 8591
-    _globals["_SORT"]._serialized_start = 8594
-    _globals["_SORT"]._serialized_end = 8754
-    _globals["_DROP"]._serialized_start = 8757
-    _globals["_DROP"]._serialized_end = 8898
-    _globals["_DEDUPLICATE"]._serialized_start = 8901
-    _globals["_DEDUPLICATE"]._serialized_end = 9141
-    _globals["_LOCALRELATION"]._serialized_start = 9143
-    _globals["_LOCALRELATION"]._serialized_end = 9232
-    _globals["_CACHEDLOCALRELATION"]._serialized_start = 9234
-    _globals["_CACHEDLOCALRELATION"]._serialized_end = 9306
-    _globals["_CACHEDREMOTERELATION"]._serialized_start = 9308
-    _globals["_CACHEDREMOTERELATION"]._serialized_end = 9363
-    _globals["_SAMPLE"]._serialized_start = 9366
-    _globals["_SAMPLE"]._serialized_end = 9639
-    _globals["_RANGE"]._serialized_start = 9642
-    _globals["_RANGE"]._serialized_end = 9787
-    _globals["_SUBQUERYALIAS"]._serialized_start = 9789
-    _globals["_SUBQUERYALIAS"]._serialized_end = 9903
-    _globals["_REPARTITION"]._serialized_start = 9906
-    _globals["_REPARTITION"]._serialized_end = 10048
-    _globals["_SHOWSTRING"]._serialized_start = 10051
-    _globals["_SHOWSTRING"]._serialized_end = 10193
-    _globals["_HTMLSTRING"]._serialized_start = 10195
-    _globals["_HTMLSTRING"]._serialized_end = 10309
-    _globals["_STATSUMMARY"]._serialized_start = 10311
-    _globals["_STATSUMMARY"]._serialized_end = 10403
-    _globals["_STATDESCRIBE"]._serialized_start = 10405
-    _globals["_STATDESCRIBE"]._serialized_end = 10486
-    _globals["_STATCROSSTAB"]._serialized_start = 10488
-    _globals["_STATCROSSTAB"]._serialized_end = 10589
-    _globals["_STATCOV"]._serialized_start = 10591
-    _globals["_STATCOV"]._serialized_end = 10687
-    _globals["_STATCORR"]._serialized_start = 10690
-    _globals["_STATCORR"]._serialized_end = 10827
-    _globals["_STATAPPROXQUANTILE"]._serialized_start = 10830
-    _globals["_STATAPPROXQUANTILE"]._serialized_end = 10994
-    _globals["_STATFREQITEMS"]._serialized_start = 10996
-    _globals["_STATFREQITEMS"]._serialized_end = 11121
-    _globals["_STATSAMPLEBY"]._serialized_start = 11124
-    _globals["_STATSAMPLEBY"]._serialized_end = 11433
-    _globals["_STATSAMPLEBY_FRACTION"]._serialized_start = 11325
-    _globals["_STATSAMPLEBY_FRACTION"]._serialized_end = 11424
-    _globals["_NAFILL"]._serialized_start = 11436
-    _globals["_NAFILL"]._serialized_end = 11570
-    _globals["_NADROP"]._serialized_start = 11573
-    _globals["_NADROP"]._serialized_end = 11707
-    _globals["_NAREPLACE"]._serialized_start = 11710
-    _globals["_NAREPLACE"]._serialized_end = 12006
-    _globals["_NAREPLACE_REPLACEMENT"]._serialized_start = 11865
-    _globals["_NAREPLACE_REPLACEMENT"]._serialized_end = 12006
-    _globals["_TODF"]._serialized_start = 12008
-    _globals["_TODF"]._serialized_end = 12096
-    _globals["_WITHCOLUMNSRENAMED"]._serialized_start = 12099
-    _globals["_WITHCOLUMNSRENAMED"]._serialized_end = 12481
-    _globals["_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY"]._serialized_start = 12343
-    _globals["_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY"]._serialized_end = 12410
-    _globals["_WITHCOLUMNSRENAMED_RENAME"]._serialized_start = 12412
-    _globals["_WITHCOLUMNSRENAMED_RENAME"]._serialized_end = 12481
-    _globals["_WITHCOLUMNS"]._serialized_start = 12483
-    _globals["_WITHCOLUMNS"]._serialized_end = 12602
-    _globals["_WITHWATERMARK"]._serialized_start = 12605
-    _globals["_WITHWATERMARK"]._serialized_end = 12739
-    _globals["_HINT"]._serialized_start = 12742
-    _globals["_HINT"]._serialized_end = 12874
-    _globals["_UNPIVOT"]._serialized_start = 12877
-    _globals["_UNPIVOT"]._serialized_end = 13204
-    _globals["_UNPIVOT_VALUES"]._serialized_start = 13134
-    _globals["_UNPIVOT_VALUES"]._serialized_end = 13193
-    _globals["_TRANSPOSE"]._serialized_start = 13206
-    _globals["_TRANSPOSE"]._serialized_end = 13328
-    _globals["_UNRESOLVEDTABLEVALUEDFUNCTION"]._serialized_start = 13330
-    _globals["_UNRESOLVEDTABLEVALUEDFUNCTION"]._serialized_end = 13455
-    _globals["_TOSCHEMA"]._serialized_start = 13457
-    _globals["_TOSCHEMA"]._serialized_end = 13563
-    _globals["_REPARTITIONBYEXPRESSION"]._serialized_start = 13566
-    _globals["_REPARTITIONBYEXPRESSION"]._serialized_end = 13769
-    _globals["_MAPPARTITIONS"]._serialized_start = 13772
-    _globals["_MAPPARTITIONS"]._serialized_end = 14004
-    _globals["_GROUPMAP"]._serialized_start = 14007
-    _globals["_GROUPMAP"]._serialized_end = 14857
-    _globals["_TRANSFORMWITHSTATEINFO"]._serialized_start = 14860
-    _globals["_TRANSFORMWITHSTATEINFO"]._serialized_end = 15083
-    _globals["_COGROUPMAP"]._serialized_start = 15086
-    _globals["_COGROUPMAP"]._serialized_end = 15612
-    _globals["_APPLYINPANDASWITHSTATE"]._serialized_start = 15615
-    _globals["_APPLYINPANDASWITHSTATE"]._serialized_end = 15972
-    _globals["_COMMONINLINEUSERDEFINEDTABLEFUNCTION"]._serialized_start = 15975
-    _globals["_COMMONINLINEUSERDEFINEDTABLEFUNCTION"]._serialized_end = 16219
-    _globals["_PYTHONUDTF"]._serialized_start = 16222
-    _globals["_PYTHONUDTF"]._serialized_end = 16399
-    _globals["_COMMONINLINEUSERDEFINEDDATASOURCE"]._serialized_start = 16402
-    _globals["_COMMONINLINEUSERDEFINEDDATASOURCE"]._serialized_end = 16553
-    _globals["_PYTHONDATASOURCE"]._serialized_start = 16555
-    _globals["_PYTHONDATASOURCE"]._serialized_end = 16630
-    _globals["_COLLECTMETRICS"]._serialized_start = 16633
-    _globals["_COLLECTMETRICS"]._serialized_end = 16769
-    _globals["_PARSE"]._serialized_start = 16772
-    _globals["_PARSE"]._serialized_end = 17160
-    _globals["_PARSE_OPTIONSENTRY"]._serialized_start = 5856
-    _globals["_PARSE_OPTIONSENTRY"]._serialized_end = 5914
-    _globals["_PARSE_PARSEFORMAT"]._serialized_start = 17061
-    _globals["_PARSE_PARSEFORMAT"]._serialized_end = 17149
-    _globals["_ASOFJOIN"]._serialized_start = 17163
-    _globals["_ASOFJOIN"]._serialized_end = 17638
-    _globals["_LATERALJOIN"]._serialized_start = 17641
-    _globals["_LATERALJOIN"]._serialized_end = 17871
+    _globals["_RELATION"]._serialized_end = 4012
+    _globals["_MLRELATION"]._serialized_start = 4015
+    _globals["_MLRELATION"]._serialized_end = 4499
+    _globals["_MLRELATION_TRANSFORM"]._serialized_start = 4227
+    _globals["_MLRELATION_TRANSFORM"]._serialized_end = 4462
+    _globals["_FETCH"]._serialized_start = 4502
+    _globals["_FETCH"]._serialized_end = 4833
+    _globals["_FETCH_METHOD"]._serialized_start = 4618
+    _globals["_FETCH_METHOD"]._serialized_end = 4833
+    _globals["_FETCH_METHOD_ARGS"]._serialized_start = 4706
+    _globals["_FETCH_METHOD_ARGS"]._serialized_end = 4833
+    _globals["_UNKNOWN"]._serialized_start = 4835
+    _globals["_UNKNOWN"]._serialized_end = 4844
+    _globals["_RELATIONCOMMON"]._serialized_start = 4847
+    _globals["_RELATIONCOMMON"]._serialized_end = 4989
+    _globals["_SQL"]._serialized_start = 4992
+    _globals["_SQL"]._serialized_end = 5470
+    _globals["_SQL_ARGSENTRY"]._serialized_start = 5286
+    _globals["_SQL_ARGSENTRY"]._serialized_end = 5376
+    _globals["_SQL_NAMEDARGUMENTSENTRY"]._serialized_start = 5378
+    _globals["_SQL_NAMEDARGUMENTSENTRY"]._serialized_end = 5470
+    _globals["_WITHRELATIONS"]._serialized_start = 5472
+    _globals["_WITHRELATIONS"]._serialized_end = 5589
+    _globals["_READ"]._serialized_start = 5592
+    _globals["_READ"]._serialized_end = 6255
+    _globals["_READ_NAMEDTABLE"]._serialized_start = 5770
+    _globals["_READ_NAMEDTABLE"]._serialized_end = 5962
+    _globals["_READ_NAMEDTABLE_OPTIONSENTRY"]._serialized_start = 5904
+    _globals["_READ_NAMEDTABLE_OPTIONSENTRY"]._serialized_end = 5962
+    _globals["_READ_DATASOURCE"]._serialized_start = 5965
+    _globals["_READ_DATASOURCE"]._serialized_end = 6242
+    _globals["_READ_DATASOURCE_OPTIONSENTRY"]._serialized_start = 5904
+    _globals["_READ_DATASOURCE_OPTIONSENTRY"]._serialized_end = 5962
+    _globals["_PROJECT"]._serialized_start = 6257
+    _globals["_PROJECT"]._serialized_end = 6374
+    _globals["_FILTER"]._serialized_start = 6376
+    _globals["_FILTER"]._serialized_end = 6488
+    _globals["_JOIN"]._serialized_start = 6491
+    _globals["_JOIN"]._serialized_end = 7152
+    _globals["_JOIN_JOINDATATYPE"]._serialized_start = 6830
+    _globals["_JOIN_JOINDATATYPE"]._serialized_end = 6922
+    _globals["_JOIN_JOINTYPE"]._serialized_start = 6925
+    _globals["_JOIN_JOINTYPE"]._serialized_end = 7133
+    _globals["_SETOPERATION"]._serialized_start = 7155
+    _globals["_SETOPERATION"]._serialized_end = 7634
+    _globals["_SETOPERATION_SETOPTYPE"]._serialized_start = 7471
+    _globals["_SETOPERATION_SETOPTYPE"]._serialized_end = 7585
+    _globals["_LIMIT"]._serialized_start = 7636
+    _globals["_LIMIT"]._serialized_end = 7712
+    _globals["_OFFSET"]._serialized_start = 7714
+    _globals["_OFFSET"]._serialized_end = 7793
+    _globals["_TAIL"]._serialized_start = 7795
+    _globals["_TAIL"]._serialized_end = 7870
+    _globals["_AGGREGATE"]._serialized_start = 7873
+    _globals["_AGGREGATE"]._serialized_end = 8639
+    _globals["_AGGREGATE_PIVOT"]._serialized_start = 8288
+    _globals["_AGGREGATE_PIVOT"]._serialized_end = 8399
+    _globals["_AGGREGATE_GROUPINGSETS"]._serialized_start = 8401
+    _globals["_AGGREGATE_GROUPINGSETS"]._serialized_end = 8477
+    _globals["_AGGREGATE_GROUPTYPE"]._serialized_start = 8480
+    _globals["_AGGREGATE_GROUPTYPE"]._serialized_end = 8639
+    _globals["_SORT"]._serialized_start = 8642
+    _globals["_SORT"]._serialized_end = 8802
+    _globals["_DROP"]._serialized_start = 8805
+    _globals["_DROP"]._serialized_end = 8946
+    _globals["_DEDUPLICATE"]._serialized_start = 8949
+    _globals["_DEDUPLICATE"]._serialized_end = 9189
+    _globals["_LOCALRELATION"]._serialized_start = 9191
+    _globals["_LOCALRELATION"]._serialized_end = 9280
+    _globals["_CACHEDLOCALRELATION"]._serialized_start = 9282
+    _globals["_CACHEDLOCALRELATION"]._serialized_end = 9354
+    _globals["_CACHEDREMOTERELATION"]._serialized_start = 9356
+    _globals["_CACHEDREMOTERELATION"]._serialized_end = 9411
+    _globals["_SAMPLE"]._serialized_start = 9414
+    _globals["_SAMPLE"]._serialized_end = 9687
+    _globals["_RANGE"]._serialized_start = 9690
+    _globals["_RANGE"]._serialized_end = 9835
+    _globals["_SUBQUERYALIAS"]._serialized_start = 9837
+    _globals["_SUBQUERYALIAS"]._serialized_end = 9951
+    _globals["_REPARTITION"]._serialized_start = 9954
+    _globals["_REPARTITION"]._serialized_end = 10096
+    _globals["_SHOWSTRING"]._serialized_start = 10099
+    _globals["_SHOWSTRING"]._serialized_end = 10241
+    _globals["_HTMLSTRING"]._serialized_start = 10243
+    _globals["_HTMLSTRING"]._serialized_end = 10357
+    _globals["_STATSUMMARY"]._serialized_start = 10359
+    _globals["_STATSUMMARY"]._serialized_end = 10451
+    _globals["_STATDESCRIBE"]._serialized_start = 10453
+    _globals["_STATDESCRIBE"]._serialized_end = 10534
+    _globals["_STATCROSSTAB"]._serialized_start = 10536
+    _globals["_STATCROSSTAB"]._serialized_end = 10637
+    _globals["_STATCOV"]._serialized_start = 10639
+    _globals["_STATCOV"]._serialized_end = 10735
+    _globals["_STATCORR"]._serialized_start = 10738
+    _globals["_STATCORR"]._serialized_end = 10875
+    _globals["_STATAPPROXQUANTILE"]._serialized_start = 10878
+    _globals["_STATAPPROXQUANTILE"]._serialized_end = 11042
+    _globals["_STATFREQITEMS"]._serialized_start = 11044
+    _globals["_STATFREQITEMS"]._serialized_end = 11169
+    _globals["_STATSAMPLEBY"]._serialized_start = 11172
+    _globals["_STATSAMPLEBY"]._serialized_end = 11481
+    _globals["_STATSAMPLEBY_FRACTION"]._serialized_start = 11373
+    _globals["_STATSAMPLEBY_FRACTION"]._serialized_end = 11472
+    _globals["_NAFILL"]._serialized_start = 11484
+    _globals["_NAFILL"]._serialized_end = 11618
+    _globals["_NADROP"]._serialized_start = 11621
+    _globals["_NADROP"]._serialized_end = 11755
+    _globals["_NAREPLACE"]._serialized_start = 11758
+    _globals["_NAREPLACE"]._serialized_end = 12054
+    _globals["_NAREPLACE_REPLACEMENT"]._serialized_start = 11913
+    _globals["_NAREPLACE_REPLACEMENT"]._serialized_end = 12054
+    _globals["_TODF"]._serialized_start = 12056
+    _globals["_TODF"]._serialized_end = 12144
+    _globals["_WITHCOLUMNSRENAMED"]._serialized_start = 12147
+    _globals["_WITHCOLUMNSRENAMED"]._serialized_end = 12529
+    _globals["_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY"]._serialized_start = 12391
+    _globals["_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY"]._serialized_end = 12458
+    _globals["_WITHCOLUMNSRENAMED_RENAME"]._serialized_start = 12460
+    _globals["_WITHCOLUMNSRENAMED_RENAME"]._serialized_end = 12529
+    _globals["_WITHCOLUMNS"]._serialized_start = 12531
+    _globals["_WITHCOLUMNS"]._serialized_end = 12650
+    _globals["_WITHWATERMARK"]._serialized_start = 12653
+    _globals["_WITHWATERMARK"]._serialized_end = 12787
+    _globals["_HINT"]._serialized_start = 12790
+    _globals["_HINT"]._serialized_end = 12922
+    _globals["_UNPIVOT"]._serialized_start = 12925
+    _globals["_UNPIVOT"]._serialized_end = 13252
+    _globals["_UNPIVOT_VALUES"]._serialized_start = 13182
+    _globals["_UNPIVOT_VALUES"]._serialized_end = 13241
+    _globals["_TRANSPOSE"]._serialized_start = 13254
+    _globals["_TRANSPOSE"]._serialized_end = 13376
+    _globals["_UNRESOLVEDTABLEVALUEDFUNCTION"]._serialized_start = 13378
+    _globals["_UNRESOLVEDTABLEVALUEDFUNCTION"]._serialized_end = 13503
+    _globals["_TOSCHEMA"]._serialized_start = 13505
+    _globals["_TOSCHEMA"]._serialized_end = 13611
+    _globals["_REPARTITIONBYEXPRESSION"]._serialized_start = 13614
+    _globals["_REPARTITIONBYEXPRESSION"]._serialized_end = 13817
+    _globals["_MAPPARTITIONS"]._serialized_start = 13820
+    _globals["_MAPPARTITIONS"]._serialized_end = 14052
+    _globals["_GROUPMAP"]._serialized_start = 14055
+    _globals["_GROUPMAP"]._serialized_end = 14905
+    _globals["_TRANSFORMWITHSTATEINFO"]._serialized_start = 14908
+    _globals["_TRANSFORMWITHSTATEINFO"]._serialized_end = 15131
+    _globals["_COGROUPMAP"]._serialized_start = 15134
+    _globals["_COGROUPMAP"]._serialized_end = 15660
+    _globals["_APPLYINPANDASWITHSTATE"]._serialized_start = 15663
+    _globals["_APPLYINPANDASWITHSTATE"]._serialized_end = 16020
+    _globals["_COMMONINLINEUSERDEFINEDTABLEFUNCTION"]._serialized_start = 16023
+    _globals["_COMMONINLINEUSERDEFINEDTABLEFUNCTION"]._serialized_end = 16267
+    _globals["_PYTHONUDTF"]._serialized_start = 16270
+    _globals["_PYTHONUDTF"]._serialized_end = 16447
+    _globals["_COMMONINLINEUSERDEFINEDDATASOURCE"]._serialized_start = 16450
+    _globals["_COMMONINLINEUSERDEFINEDDATASOURCE"]._serialized_end = 16601
+    _globals["_PYTHONDATASOURCE"]._serialized_start = 16603
+    _globals["_PYTHONDATASOURCE"]._serialized_end = 16678
+    _globals["_COLLECTMETRICS"]._serialized_start = 16681
+    _globals["_COLLECTMETRICS"]._serialized_end = 16817
+    _globals["_PARSE"]._serialized_start = 16820
+    _globals["_PARSE"]._serialized_end = 17208
+    _globals["_PARSE_OPTIONSENTRY"]._serialized_start = 5904
+    _globals["_PARSE_OPTIONSENTRY"]._serialized_end = 5962
+    _globals["_PARSE_PARSEFORMAT"]._serialized_start = 17109
+    _globals["_PARSE_PARSEFORMAT"]._serialized_end = 17197
+    _globals["_ASOFJOIN"]._serialized_start = 17211
+    _globals["_ASOFJOIN"]._serialized_end = 17686
+    _globals["_LATERALJOIN"]._serialized_start = 17689
+    _globals["_LATERALJOIN"]._serialized_end = 17919
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi
index e1eb7945c19f0..d1c57f6dc38e4 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi
@@ -108,6 +108,7 @@ class Relation(google.protobuf.message.Message):
     TRANSPOSE_FIELD_NUMBER: builtins.int
     UNRESOLVED_TABLE_VALUED_FUNCTION_FIELD_NUMBER: builtins.int
     LATERAL_JOIN_FIELD_NUMBER: builtins.int
+    REFERENCED_PLAN_ID_FIELD_NUMBER: builtins.int
     FILL_NA_FIELD_NUMBER: builtins.int
     DROP_NA_FIELD_NUMBER: builtins.int
     REPLACE_FIELD_NUMBER: builtins.int
@@ -215,6 +216,14 @@ class Relation(google.protobuf.message.Message):
     def unresolved_table_valued_function(self) -> global___UnresolvedTableValuedFunction: ...
     @property
     def lateral_join(self) -> global___LateralJoin: ...
+    referenced_plan_id: builtins.int
+    """Reference to a node else where in the tree. There are two use cases for this:
+    1. Reduce tree duplication. In this case the tree contains two or more subtrees that are
+       identical. The referenced plan can only be a back reference, to a subtree that was
+       already visited by the planner. The planner is expected to visit the tree bottom-up from
+       left to right.
+    1. Reduce tree depth.
+    """
     @property
     def fill_na(self) -> global___NAFill:
         """NA functions"""
@@ -301,6 +310,7 @@ class Relation(google.protobuf.message.Message):
         transpose: global___Transpose | None = ...,
         unresolved_table_valued_function: global___UnresolvedTableValuedFunction | None = ...,
         lateral_join: global___LateralJoin | None = ...,
+        referenced_plan_id: builtins.int = ...,
         fill_na: global___NAFill | None = ...,
         drop_na: global___NADrop | None = ...,
         replace: global___NAReplace | None = ...,
@@ -394,6 +404,8 @@ class Relation(google.protobuf.message.Message):
             b"range",
             "read",
             b"read",
+            "referenced_plan_id",
+            b"referenced_plan_id",
             "rel_type",
             b"rel_type",
             "repartition",
@@ -519,6 +531,8 @@ class Relation(google.protobuf.message.Message):
             b"range",
             "read",
             b"read",
+            "referenced_plan_id",
+            b"referenced_plan_id",
             "rel_type",
             b"rel_type",
             "repartition",
@@ -614,6 +628,7 @@ class Relation(google.protobuf.message.Message):
             "transpose",
             "unresolved_table_valued_function",
             "lateral_join",
+            "referenced_plan_id",
             "fill_na",
             "drop_na",
             "replace",
diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
index 03e8d011b8d5f..57fc98046f295 100644
--- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
+++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala
@@ -25,6 +25,7 @@ import scala.collection.mutable
 import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration.{DurationInt, FiniteDuration}
 import scala.jdk.CollectionConverters._
+import scala.util.Random
 
 import org.apache.commons.io.FileUtils
 import org.apache.commons.io.output.TeeOutputStream
@@ -1671,6 +1672,26 @@ class ClientE2ETestSuite
     checkAnswer(df, (0 until 6).map(i => Row(i)))
   }
 
+  test("Execute optimized plan - 33 duplicate local relations") {
+    val implicits = spark.implicits
+    import implicits._
+    val rng = new Random(61209389765L)
+    val data = IndexedSeq.tabulate(128) { id =>
+      id -> rng.nextBytes(1024)
+    }
+    val input = data.toDF("key", "value")
+    val unions = Iterator.range(0, 5).foldLeft(input) {
+      case (current, _) => current.union(current)
+    }
+    val df = unions.filter($"key".isin(input.select($"key").filter($"key" < 5)))
+      .groupBy($"key", $"value")
+      .count()
+    val compressionRatio =
+      df.optimizedPlan.getSerializedSize.toDouble / df.plan.getSerializedSize.toDouble
+    assert(compressionRatio < (1.0d / 32.0d)) // It should be very close to a 1/33 ratio.
+    checkAnswer(df, data.take(5).map(kv => Row(kv._1, kv._2, 32L)))
+  }
+
   test("SPARK-52770: Support Time type") {
     val df = spark.sql("SELECT TIME '12:13:14'")
 
diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/PlanOptimizerSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/PlanOptimizerSuite.scala
new file mode 100644
index 0000000000000..069c677b016d6
--- /dev/null
+++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/PlanOptimizerSuite.scala
@@ -0,0 +1,469 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client
+
+import java.util.TimeZone
+import java.util.concurrent.atomic.AtomicLong
+
+import scala.collection.mutable
+import scala.util.Random
+
+import com.google.protobuf.{Any => PAny}
+import io.grpc.inprocess.InProcessChannelBuilder
+import org.apache.arrow.memory.RootAllocator
+import org.apache.commons.lang3.mutable.MutableInt
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.{Column, Encoder, Encoders}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.agnosticEncoderFor
+import org.apache.spark.sql.connect.{ColumnNodeToProtoConverter, DataFrame, Dataset, SparkSession}
+import org.apache.spark.sql.connect.client.arrow.ArrowSerializer
+import org.apache.spark.sql.connect.test.ConnectFunSuite
+import org.apache.spark.sql.functions.{col, max, min}
+import org.apache.spark.sql.streaming.{GroupState, GroupStateTimeout}
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Test suite for the [[PlanOptimizer]].
+ */
+class PlanOptimizerSuite extends ConnectFunSuite  with BeforeAndAfterEach {
+  import PlanOptimizer.PlanId
+
+  private implicit val longEncoder: Encoder[Long] = Encoders.scalaLong
+
+  private implicit val longLongTupleEncoder: Encoder[(Long, Long)] =
+    Encoders.tuple(longEncoder, longEncoder)
+
+  private var spark: SparkSession = _
+
+  private def newSparkSession(): SparkSession = {
+    val client = SparkConnectClient(
+      InProcessChannelBuilder.forName(getClass.getName).directExecutor().build())
+    val session = new SparkSession(client, planIdGenerator = new AtomicLong)
+    session.releaseSessionOnClose = false
+    session
+  }
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    spark = newSparkSession()
+  }
+
+  override def afterEach(): Unit = {
+    try {
+      if (spark != null) {
+        spark.close()
+        spark = null
+      }
+    } finally {
+      super.afterEach()
+    }
+  }
+
+  private case class PlanStats(
+      numRelations: Int,
+      planIds: Set[Long],
+      numDuplicatePlanIds: Int,
+      numDuplicateRelations: Int = 0)
+
+  private def collectPlanStats(plan: proto.Plan): PlanStats = {
+    assert(plan.hasRoot)
+    var numRelations = 0
+    val plansIdCounts = mutable.Map.empty[Long, MutableInt]
+    RelationTreeUtils.visit(plan.getRoot) { relation =>
+      PlanId.get(relation).foreach { id =>
+        plansIdCounts.getOrElseUpdate(id, new MutableInt(0)).incrementAndGet()
+      }
+      numRelations += 1
+      true
+    }
+    PlanStats(
+      numRelations,
+      plansIdCounts.keySet.toSet,
+      plansIdCounts.map(_._2.intValue()).count(_ > 1),
+      plansIdCounts.map(_._2.intValue()).filter(_ > 1).sum
+    )
+  }
+
+  private def checkNoDeduplication(df: Dataset[_]): Unit = {
+    val plan = df.plan
+    val optimizedPlan = df.optimizedPlan
+    assert(plan eq optimizedPlan)
+    val planStats = collectPlanStats(plan)
+    assert(planStats.numDuplicatePlanIds == 0)
+    assert(planStats.numDuplicateRelations == 0)
+  }
+
+  private def checkDeduplication(
+      df: Dataset[_],
+      numRelationsReduction: Int,
+      sizeReduction: Long): Unit = {
+    val plan = df.plan
+    val optimizedPlan = df.optimizedPlan
+    assert(plan != optimizedPlan)
+
+    val planStats = collectPlanStats(plan)
+    assert(planStats.numDuplicatePlanIds > 0)
+    assert(planStats.numDuplicateRelations > 0)
+
+    // An optimized plan should contain all the plan ids of the original plan.
+    val optimizedPlanStats = collectPlanStats(optimizedPlan)
+    assert(optimizedPlan.getRoot.hasWithRelations)
+    assert(planStats.planIds.equals(optimizedPlanStats.planIds - PlanId(optimizedPlan.getRoot)))
+
+    // Idempotency. Once optimized there should not be any optimization opportunity left.
+    assert(optimizedPlanStats.numDuplicatePlanIds == 0)
+    assert(optimizedPlanStats.numDuplicateRelations == 0)
+    assert(PlanOptimizer.optimize(optimizedPlan, () => 0L) eq optimizedPlan)
+
+    // Relations reduction.
+    assert(planStats.numRelations == optimizedPlanStats.numRelations + numRelationsReduction)
+
+    // Size reduction.
+    val actualSizeReduction = plan.getSerializedSize - optimizedPlan.getSerializedSize
+    assert(actualSizeReduction == sizeReduction,
+      s"Actual reduction in plan size does not match expected reduction in plan: " +
+        s"$actualSizeReduction != $sizeReduction")
+  }
+
+  test("un-optimizable plan remains unchanged - leafs") {
+    checkNoDeduplication(spark.range(10))
+    // checkNoDeduplication(spark.sql("select 1"))
+    checkNoDeduplication(spark.emptyDataset[(Long, Long)])
+    checkNoDeduplication(spark.read.format("parquet").load("s3://my-bucket/my-dir"))
+    checkNoDeduplication(spark.newDataFrame(_ => ()))
+    checkNoDeduplication(spark.newDataFrame(_.getUnknownBuilder))
+    checkNoDeduplication(spark.newDataFrame(_.getCachedLocalRelationBuilder.setHash("1234")))
+    checkNoDeduplication(spark.newDataFrame(_.getCachedRemoteRelationBuilder.setRelationId("rel1")))
+    checkNoDeduplication(spark.newDataFrame(_.setReferencedPlanId(1)))
+    checkNoDeduplication(spark.newDataFrame(_.setExtension(PAny.pack(spark.range(10).plan))))
+    checkNoDeduplication(spark.newDataFrame {
+      _.getCommonInlineUserDefinedDataSourceBuilder.setName("noop")
+    })
+    checkNoDeduplication(spark.newDataFrame {
+      _.getCommonInlineUserDefinedTableFunctionBuilder.setFunctionName("noop")
+    })
+    checkNoDeduplication(spark.newDataFrame {
+      _.getUnresolvedTableValuedFunctionBuilder.setFunctionName("noop")
+    })
+    checkNoDeduplication(spark.newDataFrame {
+      _.getCatalogBuilder.getListCatalogsBuilder.setPattern("tbl*")
+    })
+  }
+
+  test("un-optimizable plan remains unchanged - unary") {
+    val input = spark.range(10)
+    val id = col("id")
+    checkNoDeduplication(input.select((id + 1).as("plus1")))
+    checkNoDeduplication(input.filter(id > 1))
+    checkNoDeduplication(input.sort(id.desc))
+    checkNoDeduplication(input.limit(2))
+    checkNoDeduplication(input.groupBy(id).count())
+    checkNoDeduplication(input.sample(0.5))
+    checkNoDeduplication(input.offset(3))
+    checkNoDeduplication(input.dropDuplicates().as("q"))
+    checkNoDeduplication(input.repartition(3))
+    checkNoDeduplication(input.repartition(id))
+    checkNoDeduplication(input.toDF("id"))
+    checkNoDeduplication(input.withColumnRenamed("id", "di"))
+    checkNoDeduplication(input.drop("id"))
+    checkNoDeduplication(input.withColumn("id_plus1", id + 1))
+    checkNoDeduplication(input.hint("broadcast"))
+    checkNoDeduplication(input.to(new StructType().add("id", "string")))
+    checkNoDeduplication(input.mapPartitions(_.map(_.toLong)))
+    checkNoDeduplication(input.select(id, (id / 2).as("d2"), (id * 2).as("m2"))
+      .unpivot(Array(id), Array(col("d2"), col("m2")), "var", "val"))
+    checkNoDeduplication(input.withColumn("grp", id % 2).transpose(col("grp")))
+    checkNoDeduplication(input.observe("simple", min(id), max(id)))
+    checkNoDeduplication(spark.read.csv(input.map(i => s"$i,$i")(Encoders.STRING)))
+    checkNoDeduplication(input.withWatermark("id", "1 minute"))
+    checkNoDeduplication(input.describe("id"))
+    checkNoDeduplication(input.summary("max", "min"))
+    checkNoDeduplication(input.withColumn("b", id).stat.crosstab("id", "b"))
+    checkNoDeduplication(input.stat.freqItems(Array("id")))
+    checkNoDeduplication(input.stat.sampleBy(id, Map(0L -> 0.03, 1L -> 0.02), 33L))
+    checkNoDeduplication(input.na.drop())
+    checkNoDeduplication(input.na.fill(true))
+    checkNoDeduplication(input.na.replace("id", Map(0L -> 1L)))
+
+    // Manual ones...
+    checkNoDeduplication(spark.newDataFrame {
+      _.getShowStringBuilder.setInput(input.plan.getRoot)
+        .setNumRows(10)
+        .setTruncate(20)
+        .setVertical(false)
+    })
+    checkNoDeduplication(spark.newDataFrame {
+      _.getHtmlStringBuilder.setInput(input.plan.getRoot).setNumRows(10).setTruncate(20)
+    })
+    checkNoDeduplication(spark.newDataFrame {
+      _.getTailBuilder.setInput(input.plan.getRoot).setLimit(4)
+    })
+    checkNoDeduplication(spark.newDataFrame {
+      _.getCovBuilder.setInput(input.plan.getRoot).setCol1("a").setCol2("b")
+    })
+    checkNoDeduplication(spark.newDataFrame {
+      _.getCorrBuilder.setInput(input.plan.getRoot).setCol1("a").setCol2("b")
+    })
+    checkNoDeduplication(spark.newDataFrame {
+      _.getApplyInPandasWithStateBuilder.setInput(input.plan.getRoot)
+        .addGroupingExpressions(toExpr(id))
+    })
+    checkNoDeduplication(spark.newDataFrame {
+      _.getApproxQuantileBuilder.setInput(input.plan.getRoot)
+        .addCols("id")
+        .addProbabilities(0.1).addProbabilities(0.2)
+        .setRelativeError(0.01)
+    })
+    checkNoDeduplication(spark.newDataFrame { builder =>
+      val transform = builder.getMlRelationBuilder.getTransformBuilder
+        .setInput(input.plan.getRoot)
+      transform.getTransformerBuilder
+        .setName("oneHotEncoder")
+        .setType(proto.MlOperator.OperatorType.OPERATOR_TYPE_TRANSFORMER)
+    })
+  }
+
+  private def testBinaryOperationDeduplication(
+      name: String,
+      sizeReduction1: Int,
+      sizeReduction2: Int)(
+      f: ((DataFrame, Column), (DataFrame, Column)) => Dataset[_]): Unit = {
+    test("optimize plan with duplicated relations - " + name) {
+      val left = spark.range(10).as("a").toDF()
+      val right = spark.range(11).as("b").toDF()
+      // No deduplication.
+      val df1 = f((left, left("id")), (right, right("id")))
+      checkNoDeduplication(df1)
+      // Deduplication
+      val df2 = f((left, left("id")), (left, left("id")))
+      checkDeduplication(df2, numRelationsReduction = -1, sizeReduction = sizeReduction1)
+      // Deeper tree
+      val df3 = f((df2.toDF(), df2("id")), (left, left("id")))
+      checkDeduplication(df3, numRelationsReduction = 0, sizeReduction = sizeReduction2)
+    }
+  }
+
+  testBinaryOperationDeduplication("join", 5, 26) {
+    case ((left, leftKey), (right, rightKey)) =>
+      left.join(right, leftKey === rightKey)
+  }
+
+  testBinaryOperationDeduplication("lateralJoin", 5, 26) {
+    case ((left, leftKey), (right, rightKey)) =>
+      left.lateralJoin(right, leftKey === rightKey)
+  }
+
+  testBinaryOperationDeduplication("union", 7, 28) {
+    case ((left, _), (right, _)) =>
+      left.union(right)
+  }
+
+  testBinaryOperationDeduplication("intersect", 7, 28) {
+    case ((left, _), (right, _)) =>
+      left.intersect(right)
+  }
+
+  testBinaryOperationDeduplication("except", 7, 28) {
+    case ((left, _), (right, _)) =>
+      left.except(right)
+  }
+
+  testBinaryOperationDeduplication("subquery - exists", 5, 26) {
+    case ((left, leftKey), (right, rightKey)) =>
+      left.filter(right.filter(rightKey === leftKey).exists())
+  }
+
+  testBinaryOperationDeduplication("subquery - scalar", 5, 26) {
+    case ((left, _), (right, rightKey)) =>
+      left.select(right.agg(min(rightKey)).scalar())
+  }
+
+  testBinaryOperationDeduplication("subquery - in", 5, 26) {
+    case ((left, leftKey), (right, _)) =>
+      left.filter(!leftKey.isin(right))
+  }
+
+  testBinaryOperationDeduplication("groupMap", 5, 24) {
+    case ((left, leftKey), (right, rightKey)) =>
+      val initialState = right.groupBy(rightKey).as[Long, Long]
+      left.groupBy(leftKey).as[Long, Long]
+        .mapGroupsWithState(GroupStateTimeout.EventTimeTimeout(), initialState) {
+          (key: Long, values: Iterator[Long], state: GroupState[Long]) =>
+            (key, values.sum + state.get)
+        }
+  }
+
+  testBinaryOperationDeduplication("coGroup", 5, 26) {
+    case ((left, leftKey), (right, rightKey)) =>
+      val leftKv = left.groupBy(leftKey).as[Long, Long]
+      val rightKv = right.groupBy(rightKey).as[Long, Long]
+      leftKv.cogroup(rightKv) {
+        (key: Long, leftValues: Iterator[Long], rightValues: Iterator[Long]) =>
+          leftValues.zipAll(rightValues, 0L, 0L).map { lr =>
+            (key, lr._1 + lr._2)
+          }
+      }
+  }
+
+  test("optimize plan with duplicated relations - asOfJoin") {
+    val input = spark.range(10).as("x")
+    val id = ColumnNodeToProtoConverter.toExpr(input("id"))
+    val relation = input.plan.getRoot
+    val df = spark.newDataFrame { builder =>
+      builder.getAsOfJoinBuilder
+        .setLeft(relation)
+        .setLeftAsOf(id)
+        .setRight(relation)
+        .setRightAsOf(id)
+        .setDirection("backward")
+        .setAllowExactMatches(true)
+    }
+    checkDeduplication(df, numRelationsReduction = -1, sizeReduction = 5)
+  }
+
+  test("optimize plan with duplicated relations - MLRelation - fetch") {
+    val input = spark.range(10).as("x")
+    val other = spark.read.format("parquet").load()
+    val lit = proto.Expression.Literal.newBuilder().setLong(11L).build()
+    val df = spark.newDataFrame { builder =>
+      val fetch = builder.getMlRelationBuilder.getFetchBuilder
+      fetch.getObjRefBuilder.setId("21345")
+      fetch.addMethodsBuilder().setMethod("discombobulate")
+        .addArgs(proto.Fetch.Method.Args.newBuilder().setParam(lit))
+        .addArgs(proto.Fetch.Method.Args.newBuilder().setInput(input.plan.getRoot))
+      fetch.addMethodsBuilder().setMethod("fluster")
+        .addArgs(proto.Fetch.Method.Args.newBuilder().setInput(other.plan.getRoot))
+        .addArgs(proto.Fetch.Method.Args.newBuilder().setInput(input.plan.getRoot))
+    }
+    checkDeduplication(df, numRelationsReduction = -1, sizeReduction = 8)
+  }
+
+  test("optimize plan with duplicated relations - subquery WithRelations rewrite") {
+    val input1 = spark.range(10)
+    val input2 = spark.emptyDataset[Long]
+    val input3 = spark.range(1, 1, 1).as("ref")
+    val df = input1.union(input3).filter(
+      col("id").isin(input1) &&
+        col("id").isin(input2) &&
+        col("id").isin(input3))
+    checkDeduplication(df, numRelationsReduction = -1, sizeReduction = 16)
+
+    // Check if the original WithRelations node is retained and has the proper references.
+    val root = df.optimizedPlan.getRoot
+    RelationTreeUtils.visit(root) { relation =>
+      if (relation.hasWithRelations && (relation ne root)) {
+        val withRelations = relation.getWithRelations
+        assert(PlanId(df.plan.getRoot) == PlanId(relation))
+        assert(withRelations.getReferencesCount == 2)
+        assert(withRelations.getReferences(0) eq input2.plan.getRoot)
+        assert(withRelations.getReferences(1).hasReferencedPlanId)
+        assert(withRelations.getReferences(1).getReferencedPlanId == PlanId(input3.plan.getRoot))
+      }
+      true
+    }
+  }
+
+  test("optimize plan with duplicated relations - 64 duplicate local relations") {
+    // Manually build a local relation.
+    val input = spark.newDataFrame { builder =>
+      val schema = new StructType()
+        .add("key", "long")
+        .add("value", "binary")
+      val rng = new Random(61209389765L)
+      val allocator = new RootAllocator()
+      val byteString = try {
+        ArrowSerializer.serialize(
+          Iterator.tabulate(128) { i =>
+            i.toLong -> rng.nextBytes(1024)
+          },
+          agnosticEncoderFor(Encoders.tuple(longEncoder, Encoders.BINARY)),
+          allocator,
+          timeZoneId = TimeZone.getDefault.toString,
+          largeVarTypes = false)
+      } finally {
+        allocator.close()
+      }
+      builder.getLocalRelationBuilder.setSchema(schema.json).setData(byteString)
+    }
+
+    // Build a tree with massive duplication. It will contain 64 duplicate local relations.
+    val df = Iterator.range(0, 6).foldLeft(input) {
+      case (current, _) => current.union(current)
+    }
+    // Optimization reduces size by 98.4%
+    checkDeduplication(df, numRelationsReduction = 107, sizeReduction = 8396068)
+  }
+
+  private def join(input: Dataset[_], numJoins: Int): DataFrame = {
+    Iterator.fill(numJoins + 1)(input.toDF()).reduce(_.join(_))
+  }
+
+  test("optimize can increase number of relations") {
+    // Optimize can increase the number of relations in a plan. This happens when the number of
+    // relations removed does not offset the addition of the references and the WithRelations node.
+
+    // A single relation duplicated subtree. Optimization always adds two relations
+    val input = spark.range(10)
+    checkDeduplication(join(input, 1), numRelationsReduction = -2, sizeReduction = -5)
+    checkDeduplication(join(input, 2), numRelationsReduction = -2, sizeReduction = 4)
+    checkDeduplication(join(input, 3), numRelationsReduction = -2, sizeReduction = 13)
+    checkDeduplication(join(input, 4), numRelationsReduction = -2, sizeReduction = 22)
+    checkDeduplication(join(input, 5), numRelationsReduction = -2, sizeReduction = 32)
+
+    // A 2-relation duplicated subtree. Optimization only adds a relation if there is a single
+    // relation with 2 duplicates in the tree.
+    val input2 = input.as("a")
+    checkDeduplication(join(input2, 1), numRelationsReduction = -1, sizeReduction = 7)
+    checkDeduplication(join(input2, 2), numRelationsReduction = 0, sizeReduction = 28)
+    checkDeduplication(join(input2, 3), numRelationsReduction = 1, sizeReduction = 50)
+    checkDeduplication(join(input2, 4), numRelationsReduction = 2, sizeReduction = 73)
+    checkDeduplication(join(input2, 5), numRelationsReduction = 3, sizeReduction = 96)
+
+    // A 3-relation duplicated subtree. Optimization always reduces the number of relations.
+    val input3 = input2.select(col("id"))
+    checkDeduplication(join(input3, 1), numRelationsReduction = 0, sizeReduction = 209)
+    checkDeduplication(join(input3, 2), numRelationsReduction = 2, sizeReduction = 434)
+    checkDeduplication(join(input3, 3), numRelationsReduction = 4, sizeReduction = 659)
+  }
+
+  test("optimize can increase the size of the plan") {
+    // Optimize can increase the size of a plan. This happens when the size of the references and
+    // the withRelations node is larger than the sum of the deduplicated relations.
+
+    // An unknown relation is tiny (7-16 bytes depending on planId). That is smaller than the
+    // WithRelations node (6-19 bytes depending on the withRelations planId, and the size of root
+    // plan) and the two references (per reference 3-13 bytes depending on planId) added by the
+    // optimization; as a result the size increases.
+    val input1 = spark.newDataFrame { builder =>
+      builder.getUnknownBuilder
+    }
+
+    // A single relation duplicated subtree. Optimization initially increases size.
+    checkDeduplication(join(input1, 1), numRelationsReduction = -2, sizeReduction = -10)
+    checkDeduplication(join(input1, 2), numRelationsReduction = -2, sizeReduction = -6)
+    checkDeduplication(join(input1, 3), numRelationsReduction = -2, sizeReduction = -2)
+    checkDeduplication(join(input1, 4), numRelationsReduction = -2, sizeReduction = 2)
+
+    // A 2-relation duplicated subtree. Optimization always reduces size.
+    val input2 = input1.as("a")
+    checkDeduplication(join(input2, 1), numRelationsReduction = -1, sizeReduction = 2)
+    checkDeduplication(join(input2, 2), numRelationsReduction = 0, sizeReduction = 18)
+    checkDeduplication(join(input2, 3), numRelationsReduction = 1, sizeReduction = 34)
+  }
+}
diff --git a/sql/connect/common/src/main/protobuf/spark/connect/relations.proto b/sql/connect/common/src/main/protobuf/spark/connect/relations.proto
index ccb674e812dc0..0d6cec38fcce8 100644
--- a/sql/connect/common/src/main/protobuf/spark/connect/relations.proto
+++ b/sql/connect/common/src/main/protobuf/spark/connect/relations.proto
@@ -81,6 +81,14 @@ message Relation {
     UnresolvedTableValuedFunction unresolved_table_valued_function = 43;
     LateralJoin lateral_join = 44;
 
+    // Reference to a node else where in the tree. There are two use cases for this:
+    // 1. Reduce tree duplication. In this case the tree contains two or more subtrees that are
+    //    identical. The referenced plan can only be a back reference, to a subtree that was
+    //    already visited by the planner. The planner is expected to visit the tree bottom-up from
+    //    left to right.
+    // 1. Reduce tree depth.
+    int64 referenced_plan_id = 45;
+
     // NA functions
     NAFill fill_na = 90;
     NADrop drop_na = 91;
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataFrameWriter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataFrameWriter.scala
index 2038037d4439c..5964459d146ee 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataFrameWriter.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataFrameWriter.scala
@@ -96,7 +96,7 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) extends sql.DataFra
   private def executeWriteOperation(f: proto.WriteOperation.Builder => Unit): Unit = {
     val builder = proto.WriteOperation.newBuilder()
 
-    builder.setInput(ds.plan.getRoot)
+    builder.setInput(ds.optimizedPlan.getRoot)
 
     // Set path or table
     f(builder)
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataFrameWriterV2.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataFrameWriterV2.scala
index 06d339487bfb8..1d3ce623c820f 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataFrameWriterV2.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataFrameWriterV2.scala
@@ -37,7 +37,7 @@ final class DataFrameWriterV2[T] private[sql] (table: String, ds: Dataset[T])
 
   private val builder = proto.WriteOperationV2
     .newBuilder()
-    .setInput(ds.plan.getRoot)
+    .setInput(ds.optimizedPlan.getRoot)
     .setTableName(table)
 
   /** @inheritdoc */
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataStreamWriter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataStreamWriter.scala
index a42a463e2c42a..e6bc9ca374051 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataStreamWriter.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/DataStreamWriter.scala
@@ -195,5 +195,5 @@ final class DataStreamWriter[T] private[sql] (ds: Dataset[T])
 
   private val sinkBuilder = WriteStreamOperationStart
     .newBuilder()
-    .setInput(ds.plan.getRoot)
+    .setInput(ds.optimizedPlan.getRoot)
 }
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Dataset.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Dataset.scala
index ec169ba114a3d..8ce08f5d81c20 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Dataset.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Dataset.scala
@@ -147,6 +147,8 @@ class Dataset[T] private[sql] (
 
   private[sql] val agnosticEncoder: AgnosticEncoder[T] = agnosticEncoderFor(encoder)
 
+  private[sql] lazy val optimizedPlan = sparkSession.optimizer.optimize(plan)
+
   override def toString: String = {
     try {
       val builder = new mutable.StringBuilder
@@ -211,7 +213,7 @@ class Dataset[T] private[sql] (
     DataTypeProtoConverter
       .toCatalystType(
         sparkSession
-          .analyze(plan, proto.AnalyzePlanRequest.AnalyzeCase.SCHEMA)
+          .analyze(optimizedPlan, proto.AnalyzePlanRequest.AnalyzeCase.SCHEMA)
           .getSchema
           .getSchema)
       .asInstanceOf[StructType]
@@ -234,7 +236,7 @@ class Dataset[T] private[sql] (
     // scalastyle:off println
     println(
       sparkSession
-        .analyze(plan, proto.AnalyzePlanRequest.AnalyzeCase.EXPLAIN, Some(mode))
+        .analyze(optimizedPlan, proto.AnalyzePlanRequest.AnalyzeCase.EXPLAIN, Some(mode))
         .getExplain
         .getExplainString)
     // scalastyle:on println
@@ -242,7 +244,7 @@ class Dataset[T] private[sql] (
 
   /** @inheritdoc */
   def isLocal: Boolean = sparkSession
-    .analyze(plan, proto.AnalyzePlanRequest.AnalyzeCase.IS_LOCAL)
+    .analyze(optimizedPlan, proto.AnalyzePlanRequest.AnalyzeCase.IS_LOCAL)
     .getIsLocal
     .getIsLocal
 
@@ -253,7 +255,7 @@ class Dataset[T] private[sql] (
 
   /** @inheritdoc */
   def isStreaming: Boolean = sparkSession
-    .analyze(plan, proto.AnalyzePlanRequest.AnalyzeCase.IS_STREAMING)
+    .analyze(optimizedPlan, proto.AnalyzePlanRequest.AnalyzeCase.IS_STREAMING)
     .getIsStreaming
     .getIsStreaming
 
@@ -815,7 +817,7 @@ class Dataset[T] private[sql] (
   protected def createTempView(viewName: String, replace: Boolean, global: Boolean): Unit = {
     val command = sparkSession.newCommand { builder =>
       builder.getCreateDataframeViewBuilder
-        .setInput(plan.getRoot)
+        .setInput(optimizedPlan.getRoot)
         .setName(viewName)
         .setIsGlobal(global)
         .setReplace(replace)
@@ -1054,7 +1056,7 @@ class Dataset[T] private[sql] (
   /** @inheritdoc */
   def inputFiles: Array[String] =
     sparkSession
-      .analyze(plan, proto.AnalyzePlanRequest.AnalyzeCase.INPUT_FILES)
+      .analyze(optimizedPlan, proto.AnalyzePlanRequest.AnalyzeCase.INPUT_FILES)
       .getInputFiles
       .getFilesList
       .asScala
@@ -1092,7 +1094,7 @@ class Dataset[T] private[sql] (
   /** @inheritdoc */
   def persist(): this.type = {
     sparkSession.analyze { builder =>
-      builder.getPersistBuilder.setRelation(plan.getRoot)
+      builder.getPersistBuilder.setRelation(optimizedPlan.getRoot)
     }
     this
   }
@@ -1101,7 +1103,7 @@ class Dataset[T] private[sql] (
   def persist(newLevel: StorageLevel): this.type = {
     sparkSession.analyze { builder =>
       builder.getPersistBuilder
-        .setRelation(plan.getRoot)
+        .setRelation(optimizedPlan.getRoot)
         .setStorageLevel(StorageLevelProtoConverter.toConnectProtoType(newLevel))
     }
     this
@@ -1111,7 +1113,7 @@ class Dataset[T] private[sql] (
   def unpersist(blocking: Boolean): this.type = {
     sparkSession.analyze { builder =>
       builder.getUnpersistBuilder
-        .setRelation(plan.getRoot)
+        .setRelation(optimizedPlan.getRoot)
         .setBlocking(blocking)
     }
     this
@@ -1125,7 +1127,7 @@ class Dataset[T] private[sql] (
     StorageLevelProtoConverter.toStorageLevel(
       sparkSession
         .analyze { builder =>
-          builder.getGetStorageLevelBuilder.setRelation(plan.getRoot)
+          builder.getGetStorageLevelBuilder.setRelation(optimizedPlan.getRoot)
         }
         .getGetStorageLevel
         .getStorageLevel)
@@ -1170,7 +1172,7 @@ class Dataset[T] private[sql] (
         val checkpointBuilder = builder.getCheckpointCommandBuilder
           .setLocal(!reliableCheckpoint)
           .setEager(eager)
-          .setRelation(this.plan.getRoot)
+          .setRelation(this.optimizedPlan.getRoot)
         storageLevel.foreach { storageLevel =>
           checkpointBuilder.setStorageLevel(
             StorageLevelProtoConverter.toConnectProtoType(storageLevel))
@@ -1197,13 +1199,13 @@ class Dataset[T] private[sql] (
   /** @inheritdoc */
   @DeveloperApi
   def sameSemantics(other: sql.Dataset[T]): Boolean = {
-    sparkSession.sameSemantics(this.plan, other.plan)
+    sparkSession.sameSemantics(this.optimizedPlan, other.optimizedPlan)
   }
 
   /** @inheritdoc */
   @DeveloperApi
   def semanticHash(): Int = {
-    sparkSession.semanticHash(this.plan)
+    sparkSession.semanticHash(this.optimizedPlan)
   }
 
   /** @inheritdoc */
@@ -1212,10 +1214,10 @@ class Dataset[T] private[sql] (
   }
 
   private[sql] def analyze: proto.AnalyzePlanResponse = {
-    sparkSession.analyze(plan, proto.AnalyzePlanRequest.AnalyzeCase.SCHEMA)
+    sparkSession.analyze(optimizedPlan, proto.AnalyzePlanRequest.AnalyzeCase.SCHEMA)
   }
 
-  def collectResult(): SparkResult[T] = sparkSession.execute(plan, agnosticEncoder)
+  def collectResult(): SparkResult[T] = sparkSession.execute(optimizedPlan, agnosticEncoder)
 
   private[sql] def withResult[E](f: SparkResult[T] => E): E = {
     val result = collectResult()
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/MergeIntoWriter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/MergeIntoWriter.scala
index 66354e63ca8af..880c64aab7b9f 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/MergeIntoWriter.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/MergeIntoWriter.scala
@@ -49,7 +49,7 @@ class MergeIntoWriter[T] private[sql] (table: String, ds: Dataset[T], on: Column
   private val builder = MergeIntoTableCommand
     .newBuilder()
     .setTargetTableName(table)
-    .setSourceTablePlan(ds.plan.getRoot)
+    .setSourceTablePlan(ds.optimizedPlan.getRoot)
     .setMergeCondition(toExpr(on))
 
   /**
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/SparkSession.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/SparkSession.scala
index 739b0318759e5..dda919e802d43 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/SparkSession.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/SparkSession.scala
@@ -50,7 +50,7 @@ import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, RowEncoder}
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{agnosticEncoderFor, BoxedLongEncoder, UnboundRowEncoder}
 import org.apache.spark.sql.connect.ColumnNodeToProtoConverter.toLiteral
 import org.apache.spark.sql.connect.ConnectConversions._
-import org.apache.spark.sql.connect.client.{ClassFinder, CloseableIterator, SparkConnectClient, SparkResult}
+import org.apache.spark.sql.connect.client.{ClassFinder, CloseableIterator, PlanOptimizer, SparkConnectClient, SparkResult}
 import org.apache.spark.sql.connect.client.SparkConnectClient.Configuration
 import org.apache.spark.sql.connect.client.arrow.ArrowSerializer
 import org.apache.spark.sql.internal.{SessionState, SharedState, SqlApiConf, SubqueryExpression}
@@ -85,6 +85,7 @@ class SparkSession private[sql] (
 
   private[this] val allocator = new RootAllocator()
   private[sql] lazy val cleaner = new SessionCleaner(this)
+  private[sql] val optimizer = new PlanOptimizer(planIdGenerator)
 
   // a unique session ID for this session from client.
   private[sql] def sessionId: String = client.sessionId
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/PlanOptimizer.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/PlanOptimizer.scala
new file mode 100644
index 0000000000000..72b404bb8beff
--- /dev/null
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/PlanOptimizer.scala
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client
+
+import java.util.concurrent.atomic.AtomicLong
+
+import scala.collection.immutable.SeqMap
+import scala.collection.mutable
+
+import org.apache.spark.connect.proto
+
+/**
+ * Optimizer for Spark Connect plans. This optimizer moves all duplicate subtrees from a query tree
+ * (Relation) into a top level WithRelations node, the duplicates in the plan are replaced by
+ * references. This has a couple of advantages: it reduces the number of nodes in the plan, it
+ * reduces the plan size, it avoids redundant work on the server side (both during planning, and -
+ * if supported - analysis).
+ *
+ * This optimization assumes that nodes with the same plan_id are structurally equivalent.
+ *
+ * The optimization will retain all plan_ids in the input plan. This is needed because plan_ids can
+ * be referenced by UnresolvedAttribute, UnresolvedStar, UnresolvedRegex, and SubqueryExpression
+ * expressions. If the plan can be optimized, the new plan will contain an additional plan_id: the
+ * plan_id of the top-level WithRelations node.
+ *
+ * The current optimization uses a 2-pass approach. The first step identifies duplicate subtrees.
+ * This has a runtime and space complexity of O(num_unique_relations). The second step rewrites the
+ * plan. This has a runtime and space complexity of O(num_unique_relations).
+ *
+ * In theory this can be implemented as a single pass algorithm by replace duplicates with a
+ * reference once we identify them. This has two downsides: it requires that the client and the
+ * server have exactly the same traversal order, and it makes the plans much harder to read.
+ *
+ * @param nextPlanId generator for new plan_ids.
+ */
+class PlanOptimizer(nextPlanId: () => Long) {
+  def this(planIdGenerator: AtomicLong) =
+    this(() => planIdGenerator.incrementAndGet())
+
+  /**
+   * Optimize the given plan by deduplicating subtrees.
+   *
+   * @param plan
+   *   The plan to optimize.
+   * @return
+   *   The optimized plan with deduplicated subtrees. If the plan cannot be optimized, this returns
+   *   the original plan.
+   */
+  def optimize(plan: proto.Plan): proto.Plan =
+    PlanOptimizer.optimize(plan, nextPlanId)
+
+  /**
+   * Optimize the given relation by deduplicating subtrees.
+   *
+   * @param relation
+   *   The relation to optimize.
+   * @return
+   *   The optimized relation with deduplicated subtrees. If the relation cannot be optimized, this
+   *   returns the original relation.
+   */
+  def optimize(relation: proto.Relation): proto.Relation =
+    PlanOptimizer.optimize(relation, nextPlanId)
+}
+
+private[connect] object PlanOptimizer {
+  import RelationTreeUtils._
+
+  def optimize(plan: proto.Plan, nextPlanId: () => Long): proto.Plan = {
+    if (plan.hasRoot) {
+      val relation = plan.getRoot
+      val optimizedRelation = optimize(relation, nextPlanId)
+      if (optimizedRelation ne relation) {
+        plan.toBuilder.setRoot(optimizedRelation).build()
+      } else {
+        plan
+      }
+    } else {
+      plan
+    }
+  }
+
+  def optimize(relation: proto.Relation, nextPlanId: () => Long): proto.Relation = {
+    val relations = analyze(relation)
+    if (relations.nonEmpty) {
+      rewriteRelation(relation, relations, nextPlanId)
+    } else {
+      relation
+    }
+  }
+
+  /**
+   * Find all repeated (duplicate) query fragments in a query tree.
+   *
+   * @param root node of the query tree
+   * @return a map that contains all repeated query fragments, keyed by their plan id.
+   */
+  def analyze(root: proto.Relation): SeqMap[Long, proto.Relation] = {
+    // We can reduce memory consumption by using a bitset that tracks the planIds of nodes with a
+    // single occurrence. We only need to start tracking detailed information once there are
+    // multiple occurrences. For this we need a bitset that can deal with sparse planIds; there are
+    // libraries for this (e.g. RoaringBitMap), however that requires us to add a library to the
+    // Spark Connect client classpath which is something we need to trade off against overall size
+    // of that classpath.
+    val relationsMap = mutable.LinkedHashMap.empty[Long, RelationHolder]
+    visit(root) {
+      case relation @ PlanId(id) =>
+        // Increase the stats for the plan id. If we have already seen the plan we will not
+        // visit its children, because we have already seen them before.
+        val holder = relationsMap.getOrElseUpdate(id, new RelationHolder(relation))
+        holder.increaseNumOccurrences() == 1
+      case _ =>
+        // Always visit the subtree if there is no plan id. Its subtree might contain nodes we
+        // have not visited before.
+        true
+    }
+
+    // Retain all relations that are duplicated.
+    relationsMap.to(SeqMap).collect {
+      case (id, holder) if holder.occurrences > 1 =>
+        id -> holder.relation
+    }
+  }
+
+  /**
+   * Rewrite the query tree using the map of reference relations. This transform moves all reference
+   * relations to a top-level WithRelations node, and replaces all instances of these relations with
+   * a reference.
+   *
+   * @param root relation to rewrite.
+   * @param referenceMap a map of relations that will be moved to the top-level withRelations node.
+   * @param nextPlanId function to generate the plan_id of the new root node.
+   * @return the rewritten plan.
+   */
+  def rewriteRelation(
+      root: proto.Relation,
+      referenceMap: SeqMap[Long, proto.Relation],
+      nextPlanId: () => Long): proto.Relation = {
+    val builder = proto.Relation.newBuilder()
+    builder.getCommonBuilder.setPlanId(nextPlanId())
+    val withRelationsBuilder = builder.getWithRelationsBuilder
+    val referencePlanIds = referenceMap.keySet
+    referenceMap.foreach {
+      case (id, reference) =>
+        withRelationsBuilder.addReferences(rewriteSingleRelation(
+          reference,
+          referencePlanIds.filterNot(_ == id)))
+    }
+    withRelationsBuilder.setRoot(rewriteSingleRelation(root, referencePlanIds))
+    builder.build()
+  }
+
+  private def rewriteSingleRelation(
+      relation: proto.Relation,
+      referencePlanIds: Set[Long]): proto.Relation = transform(relation) {
+    case PlanId(id) if referencePlanIds(id) =>
+      createReference(id)
+    case relation if relation.hasWithRelations =>
+      // Rewrite the WithRelations node. We remove all reference plans if they are not a
+      // SubqueryAlias (the reference will be added to the top-level WithRelations node). We replace
+      // all references that are a SubqueryAlias with a reference. The latter is needed because
+      // WithRelations in combination with SubqueryAlias can be used to define named  relations
+      // (like Common Table Expressions); names - unlike plan ids - are not unique.
+      val withRelations = relation.getWithRelations
+      val builder = relation.toBuilder
+      val withRelationsBuilder = builder.getWithRelationsBuilder.clearReferences()
+      withRelations.getReferencesList.forEach {
+        case reference @ PlanId(id) if referencePlanIds(id) =>
+          if (reference.hasSubqueryAlias) {
+            withRelationsBuilder.addReferences(createReference(id))
+          }
+        case reference =>
+          withRelationsBuilder.addReferences(reference)
+      }
+      builder.build()
+  }
+
+  private def createReference(planId: Long): proto.Relation = {
+    // We don't set a plan id here because this is a reference to an existing plan.
+    proto.Relation.newBuilder().setReferencedPlanId(planId).build()
+  }
+
+  object PlanId {
+    def apply(relation: proto.Relation): Long = unapply(relation).get
+    def get(relation: proto.Relation): Option[Long] = unapply(relation)
+    def unapply(relation: proto.Relation): Option[Long] = {
+      val common = relation.getCommon
+      if (common.hasPlanId) {
+        Some(common.getPlanId)
+      } else {
+        None
+      }
+    }
+  }
+
+  private class RelationHolder(val relation: proto.Relation) {
+    private var numOccurrences = 0
+    def occurrences: Int = numOccurrences
+    def increaseNumOccurrences(): Int = {
+      numOccurrences += 1
+      numOccurrences
+    }
+  }
+}
+
+
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/RelationTreeUtils.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/RelationTreeUtils.scala
new file mode 100644
index 0000000000000..9a2bea7753d1b
--- /dev/null
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/RelationTreeUtils.scala
@@ -0,0 +1,491 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client
+
+import java.util
+
+import scala.jdk.CollectionConverters._
+
+import com.google.protobuf.Descriptors.FieldDescriptor
+import com.google.protobuf.Message
+
+import org.apache.spark.connect.proto
+import org.apache.spark.util.SparkEnvUtils
+
+/**
+ * Utility functions for visiting and transforming relation trees (a.k.a. query trees).
+ *
+ * This implementation is efficient for know Relation/Message types. For unknown message types we
+ * use proto reflection.
+ */
+private[connect] object RelationTreeUtils {
+
+  private val NO_INPUT_REL_TYPE_CASES = {
+    val typeCases = util.EnumSet.noneOf(classOf[proto.Relation.RelTypeCase])
+    typeCases.add(proto.Relation.RelTypeCase.RELTYPE_NOT_SET)
+    typeCases.add(proto.Relation.RelTypeCase.READ)
+    typeCases.add(proto.Relation.RelTypeCase.LOCAL_RELATION)
+    typeCases.add(proto.Relation.RelTypeCase.CACHED_LOCAL_RELATION)
+    typeCases.add(proto.Relation.RelTypeCase.CACHED_REMOTE_RELATION)
+    typeCases.add(proto.Relation.RelTypeCase.SQL)
+    typeCases.add(proto.Relation.RelTypeCase.RANGE)
+    typeCases.add(proto.Relation.RelTypeCase.COMMON_INLINE_USER_DEFINED_TABLE_FUNCTION)
+    typeCases.add(proto.Relation.RelTypeCase.COMMON_INLINE_USER_DEFINED_DATA_SOURCE)
+    typeCases.add(proto.Relation.RelTypeCase.UNRESOLVED_TABLE_VALUED_FUNCTION)
+    typeCases.add(proto.Relation.RelTypeCase.REFERENCED_PLAN_ID)
+    typeCases.add(proto.Relation.RelTypeCase.UNKNOWN)
+    typeCases.add(proto.Relation.RelTypeCase.CATALOG)
+    typeCases.add(proto.Relation.RelTypeCase.EXTENSION)
+    typeCases
+  }
+
+  def visit(relation: proto.Relation)(f: proto.Relation => Boolean): Unit = {
+    visit[Null](relation, null) { (current, _) =>
+      (f(current), null)
+    }
+  }
+
+  /**
+   * Visit all [[proto.Relation relations]] in a tree.
+   *
+   * @param relation root of the tree.
+   * @param f        visit callback. The children of a relation will be visited when this function
+   *                 returns true.
+   */
+  def visit[T](
+      relation: proto.Relation, context: T)(
+      f: (proto.Relation, T) => (Boolean, T)): Unit = {
+    val messages = new util.ArrayDeque[(Message, T)]
+    messages.push(relation -> context)
+    while (!messages.isEmpty) {
+      val (message, context) = messages.pop()
+      visitSingleMessage(
+        message,
+        context, (m, c: T) => messages.push(m -> c),
+        f)
+    }
+  }
+
+  private def visitSingleMessage[T](
+      message: Message,
+      context: T,
+      addMessage: (Message, T) => Unit,
+      f: (proto.Relation, T) => (Boolean, T)): Unit = {
+    message match {
+      case relation: proto.Relation =>
+        val (continue, newContext) = f(relation, context)
+        def addRelation(next: proto.Relation): Unit = {
+          if (next ne proto.Relation.getDefaultInstance) {
+            addMessage(next, newContext)
+          }
+        }
+        if (continue) {
+          // TODO check that this is compiled into a table switch...
+          relation.getRelTypeCase match {
+            // Relations without inputs.
+            case relTypeCase if NO_INPUT_REL_TYPE_CASES.contains(relTypeCase) =>
+
+            // Single input relations.
+            case proto.Relation.RelTypeCase.PROJECT =>
+              addRelation(relation.getProject.getInput)
+            case proto.Relation.RelTypeCase.FILTER =>
+              addRelation(relation.getFilter.getInput)
+            case proto.Relation.RelTypeCase.SORT =>
+              addRelation(relation.getSort.getInput)
+            case proto.Relation.RelTypeCase.LIMIT =>
+              addRelation(relation.getLimit.getInput)
+            case proto.Relation.RelTypeCase.AGGREGATE =>
+              addRelation(relation.getAggregate.getInput)
+            case proto.Relation.RelTypeCase.SAMPLE =>
+              addRelation(relation.getSample.getInput)
+            case proto.Relation.RelTypeCase.OFFSET =>
+              addRelation(relation.getOffset.getInput)
+            case proto.Relation.RelTypeCase.DEDUPLICATE =>
+              addRelation(relation.getDeduplicate.getInput)
+            case proto.Relation.RelTypeCase.SUBQUERY_ALIAS =>
+              addRelation(relation.getSubqueryAlias.getInput)
+            case proto.Relation.RelTypeCase.REPARTITION =>
+              addRelation(relation.getRepartition.getInput)
+            case proto.Relation.RelTypeCase.TO_DF =>
+              addRelation(relation.getToDf.getInput)
+            case proto.Relation.RelTypeCase.WITH_COLUMNS_RENAMED =>
+              addRelation(relation.getWithColumnsRenamed.getInput)
+            case proto.Relation.RelTypeCase.SHOW_STRING =>
+              addRelation(relation.getShowString.getInput)
+            case proto.Relation.RelTypeCase.DROP =>
+              addRelation(relation.getDrop.getInput)
+            case proto.Relation.RelTypeCase.TAIL =>
+              addRelation(relation.getTail.getInput)
+            case proto.Relation.RelTypeCase.WITH_COLUMNS =>
+              addRelation(relation.getWithColumns.getInput)
+            case proto.Relation.RelTypeCase.HINT =>
+              addRelation(relation.getHint.getInput)
+            case proto.Relation.RelTypeCase.UNPIVOT =>
+              addRelation(relation.getUnpivot.getInput)
+            case proto.Relation.RelTypeCase.TO_SCHEMA =>
+              addRelation(relation.getToSchema.getInput)
+            case proto.Relation.RelTypeCase.REPARTITION_BY_EXPRESSION =>
+              addRelation(relation.getRepartitionByExpression.getInput)
+            case proto.Relation.RelTypeCase.MAP_PARTITIONS =>
+              addRelation(relation.getMapPartitions.getInput)
+            case proto.Relation.RelTypeCase.COLLECT_METRICS =>
+              addRelation(relation.getCollectMetrics.getInput)
+            case proto.Relation.RelTypeCase.PARSE =>
+              addRelation(relation.getParse.getInput)
+            case proto.Relation.RelTypeCase.WITH_WATERMARK =>
+              addRelation(relation.getWithWatermark.getInput)
+            case proto.Relation.RelTypeCase.APPLY_IN_PANDAS_WITH_STATE =>
+              addRelation(relation.getApplyInPandasWithState.getInput)
+            case proto.Relation.RelTypeCase.HTML_STRING =>
+              addRelation(relation.getHtmlString.getInput)
+            case proto.Relation.RelTypeCase.TRANSPOSE =>
+              addRelation(relation.getTranspose.getInput)
+            case proto.Relation.RelTypeCase.FILL_NA =>
+              addRelation(relation.getFillNa.getInput)
+            case proto.Relation.RelTypeCase.DROP_NA =>
+              addRelation(relation.getDropNa.getInput)
+            case proto.Relation.RelTypeCase.REPLACE =>
+              addRelation(relation.getReplace.getInput)
+            case proto.Relation.RelTypeCase.SUMMARY =>
+              addRelation(relation.getSummary.getInput)
+            case proto.Relation.RelTypeCase.CROSSTAB =>
+              addRelation(relation.getCrosstab.getInput)
+            case proto.Relation.RelTypeCase.DESCRIBE =>
+              addRelation(relation.getDescribe.getInput)
+            case proto.Relation.RelTypeCase.COV =>
+              addRelation(relation.getCov.getInput)
+            case proto.Relation.RelTypeCase.CORR =>
+              addRelation(relation.getCorr.getInput)
+            case proto.Relation.RelTypeCase.APPROX_QUANTILE =>
+              addRelation(relation.getApproxQuantile.getInput)
+            case proto.Relation.RelTypeCase.FREQ_ITEMS =>
+              addRelation(relation.getFreqItems.getInput)
+            case proto.Relation.RelTypeCase.SAMPLE_BY =>
+              addRelation(relation.getSampleBy.getInput)
+
+            // Multi input relations
+            case proto.Relation.RelTypeCase.JOIN =>
+              val join = relation.getJoin
+              addRelation(join.getLeft)
+              addRelation(join.getRight)
+            case proto.Relation.RelTypeCase.SET_OP =>
+              val setOp = relation.getSetOp
+              addRelation(setOp.getLeftInput)
+              addRelation(setOp.getRightInput)
+            case proto.Relation.RelTypeCase.GROUP_MAP =>
+              val groupMap = relation.getGroupMap
+              addRelation(groupMap.getInput)
+              addRelation(groupMap.getInitialInput)
+            case proto.Relation.RelTypeCase.CO_GROUP_MAP =>
+              val coGroupMap = relation.getCoGroupMap
+              addRelation(coGroupMap.getInput)
+              addRelation(coGroupMap.getOther)
+            case proto.Relation.RelTypeCase.AS_OF_JOIN =>
+              val asOfJoin = relation.getAsOfJoin
+              addRelation(asOfJoin.getLeft)
+              addRelation(asOfJoin.getRight)
+            case proto.Relation.RelTypeCase.WITH_RELATIONS =>
+              val withRelations = relation.getWithRelations
+              withRelations.getReferencesList.forEach(addRelation(_))
+              addRelation(withRelations.getRoot)
+            case proto.Relation.RelTypeCase.LATERAL_JOIN =>
+              val lateralJoin = relation.getLateralJoin
+              addRelation(lateralJoin.getLeft)
+              addRelation(lateralJoin.getRight)
+            case proto.Relation.RelTypeCase.ML_RELATION =>
+              val mlRelation = relation.getMlRelation
+              if (mlRelation.hasTransform) {
+                addRelation(mlRelation.getTransform.getInput)
+              } else if (mlRelation.hasFetch) {
+                mlRelation.getFetch.getMethodsList.forEach { method =>
+                  method.getArgsList.forEach { args =>
+                    if (args.hasInput) {
+                      addRelation(args.getInput)
+                    }
+                  }
+                }
+              }
+              addRelation(mlRelation.getModelSummaryDataset)
+
+            // Unhandled relation type. Fall back to proto reflection.
+            case relTypeCase =>
+              assert(!SparkEnvUtils.isTesting,
+                "Unhandled relTypeCase: " + relTypeCase)
+              val descriptor = relation.getDescriptorForType
+                .findFieldByNumber(relTypeCase.getNumber)
+              if (descriptor != null && descriptor.getType == FieldDescriptor.Type.MESSAGE) {
+                addMessage(relation.getField(descriptor).asInstanceOf[Message], newContext)
+              }
+          }
+        }
+
+      case message =>
+        // Unknown message. Fall back to proto reflection.
+        assert(!SparkEnvUtils.isTesting,
+          "Unhandled Message type: " + message.getDescriptorForType.getName)
+        message.getAllFields.forEach { (desc, value) =>
+          if (desc.getType == FieldDescriptor.Type.MESSAGE) {
+            value match {
+              case list: util.List[Message @unchecked] =>
+                list.forEach(addMessage(_, context))
+              case message: Message =>
+                addMessage(message, context)
+            }
+          }
+        }
+    }
+  }
+
+  /**
+   * Recursively transform a [[proto.Relation relation]].
+   *
+   * @param message entry point.
+   * @param pf      transformation to apply to all relations.
+   * @tparam M type of the current message.
+   * @return the transformed relation.
+   */
+  private[connect] def transform[M <: Message](
+      message: M)(
+      pf: PartialFunction[proto.Relation, proto.Relation]): M = {
+    def transformRelation(relation: proto.Relation, set: proto.Relation => Any): Unit = {
+      if (relation ne proto.Relation.getDefaultInstance) {
+        set(transform(relation)(pf))
+      }
+    }
+    def transformMessage(value: Message, fd: FieldDescriptor, builder: Message.Builder): Unit = {
+      builder.setField(fd, transform(value)(pf))
+    }
+    def result(builder: Message.Builder): M = {
+      val result = builder.build()
+      if (result == message) {
+        message
+      } else {
+        result.asInstanceOf[M]
+      }
+    }
+
+    message match {
+      case relation: proto.Relation =>
+        val transformed = pf.applyOrElse(relation, identity[proto.Relation])
+        val builder = transformed.toBuilder
+        // Transform input relations.
+        builder.getRelTypeCase match {
+          // Relations without inputs.
+          case relTypeCase if NO_INPUT_REL_TYPE_CASES.contains(relTypeCase) =>
+
+          // Single input relations
+          case proto.Relation.RelTypeCase.PROJECT =>
+            val typeCaseBuilder = builder.getProjectBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.FILTER =>
+            val typeCaseBuilder = builder.getFilterBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.SORT =>
+            val typeCaseBuilder = builder.getSortBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.LIMIT =>
+            val typeCaseBuilder = builder.getLimitBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.AGGREGATE =>
+            val typeCaseBuilder = builder.getAggregateBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.SAMPLE =>
+            val typeCaseBuilder = builder.getSampleBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.OFFSET =>
+            val typeCaseBuilder = builder.getOffsetBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.DEDUPLICATE =>
+            val typeCaseBuilder = builder.getDeduplicateBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.SUBQUERY_ALIAS =>
+            val typeCaseBuilder = builder.getSubqueryAliasBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.REPARTITION =>
+            val typeCaseBuilder = builder.getRepartitionBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.TO_DF =>
+            val typeCaseBuilder = builder.getToDfBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.WITH_COLUMNS_RENAMED =>
+            val typeCaseBuilder = builder.getWithColumnsRenamedBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.SHOW_STRING =>
+            val typeCaseBuilder = builder.getShowStringBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.DROP =>
+            val typeCaseBuilder = builder.getDropBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.TAIL =>
+            val typeCaseBuilder = builder.getTailBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.WITH_COLUMNS =>
+            val typeCaseBuilder = builder.getWithColumnsBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.HINT =>
+            val typeCaseBuilder = builder.getHintBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.UNPIVOT =>
+            val typeCaseBuilder = builder.getUnpivotBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.TO_SCHEMA =>
+            val typeCaseBuilder = builder.getToSchemaBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.REPARTITION_BY_EXPRESSION =>
+            val typeCaseBuilder = builder.getRepartitionByExpressionBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.MAP_PARTITIONS =>
+            val typeCaseBuilder = builder.getMapPartitionsBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.COLLECT_METRICS =>
+            val typeCaseBuilder = builder.getCollectMetricsBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.PARSE =>
+            val typeCaseBuilder = builder.getParseBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.WITH_WATERMARK =>
+            val typeCaseBuilder = builder.getWithWatermarkBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.APPLY_IN_PANDAS_WITH_STATE =>
+            val typeCaseBuilder = builder.getApplyInPandasWithStateBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.HTML_STRING =>
+            val typeCaseBuilder = builder.getHtmlStringBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.TRANSPOSE =>
+            val typeCaseBuilder = builder.getTransposeBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.FILL_NA =>
+            val typeCaseBuilder = builder.getFillNaBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.DROP_NA =>
+            val typeCaseBuilder = builder.getDropNaBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.REPLACE =>
+            val typeCaseBuilder = builder.getReplaceBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.SUMMARY =>
+            val typeCaseBuilder = builder.getSummaryBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.CROSSTAB =>
+            val typeCaseBuilder = builder.getCrosstabBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.DESCRIBE =>
+            val typeCaseBuilder = builder.getDescribeBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.COV =>
+            val typeCaseBuilder = builder.getCovBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.CORR =>
+            val typeCaseBuilder = builder.getCorrBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.APPROX_QUANTILE =>
+            val typeCaseBuilder = builder.getApproxQuantileBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.FREQ_ITEMS =>
+            val typeCaseBuilder = builder.getFreqItemsBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+          case proto.Relation.RelTypeCase.SAMPLE_BY =>
+            val typeCaseBuilder = builder.getSampleByBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+
+          // Multi-input relations.
+          case proto.Relation.RelTypeCase.JOIN =>
+            val typeCaseBuilder = builder.getJoinBuilder
+            transformRelation(typeCaseBuilder.getLeft, typeCaseBuilder.setLeft)
+            transformRelation(typeCaseBuilder.getRight, typeCaseBuilder.setRight)
+          case proto.Relation.RelTypeCase.SET_OP =>
+            val typeCaseBuilder = builder.getSetOpBuilder
+            transformRelation(typeCaseBuilder.getLeftInput, typeCaseBuilder.setLeftInput)
+            transformRelation(typeCaseBuilder.getRightInput, typeCaseBuilder.setRightInput)
+          case proto.Relation.RelTypeCase.GROUP_MAP =>
+            val typeCaseBuilder = builder.getGroupMapBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+            transformRelation(typeCaseBuilder.getInitialInput, typeCaseBuilder.setInitialInput)
+          case proto.Relation.RelTypeCase.CO_GROUP_MAP =>
+            val typeCaseBuilder = builder.getCoGroupMapBuilder
+            transformRelation(typeCaseBuilder.getInput, typeCaseBuilder.setInput)
+            transformRelation(typeCaseBuilder.getOther, typeCaseBuilder.setOther)
+          case proto.Relation.RelTypeCase.AS_OF_JOIN =>
+            val typeCaseBuilder = builder.getAsOfJoinBuilder
+            transformRelation(typeCaseBuilder.getLeft, typeCaseBuilder.setLeft)
+            transformRelation(typeCaseBuilder.getRight, typeCaseBuilder.setRight)
+          case proto.Relation.RelTypeCase.WITH_RELATIONS =>
+            val typeCaseBuilder = builder.getWithRelationsBuilder
+            (0 until typeCaseBuilder.getReferencesCount).foreach { i =>
+              transformRelation(
+                typeCaseBuilder.getReferences(i),
+                typeCaseBuilder.setReferences(i, _))
+            }
+            transformRelation(typeCaseBuilder.getRoot, typeCaseBuilder.setRoot)
+          case proto.Relation.RelTypeCase.LATERAL_JOIN =>
+            val typeCaseBuilder = builder.getLateralJoinBuilder
+            transformRelation(typeCaseBuilder.getLeft, typeCaseBuilder.setLeft)
+            transformRelation(typeCaseBuilder.getRight, typeCaseBuilder.setRight)
+          case proto.Relation.RelTypeCase.ML_RELATION =>
+            val typeCaseBuilder = builder.getMlRelationBuilder
+            if (typeCaseBuilder.hasTransform) {
+              val transformBuilder = typeCaseBuilder.getTransformBuilder
+              transformRelation(transformBuilder.getInput, transformBuilder.setInput)
+            } else if (typeCaseBuilder.hasFetch) {
+              val fetchBuilder = typeCaseBuilder.getFetchBuilder
+              (0 until fetchBuilder.getMethodsCount).foreach { i =>
+                val methodBuilder = fetchBuilder.getMethodsBuilder(i)
+                (0 until methodBuilder.getArgsCount).foreach { j =>
+                  val argsBuilder = methodBuilder.getArgsBuilder(j)
+                  if (argsBuilder.hasInput) {
+                    transformRelation(argsBuilder.getInput, argsBuilder.setInput)
+                  }
+                }
+              }
+            }
+            transformRelation(
+              typeCaseBuilder.getModelSummaryDataset,
+              typeCaseBuilder.setModelSummaryDataset)
+
+          // Unhandled relation type. Fall back to proto reflection.
+          case relTypeCase =>
+            assert(!SparkEnvUtils.isTesting)
+            val descriptor = builder.getDescriptorForType
+              .findFieldByNumber(relTypeCase.getNumber)
+            if (descriptor != null && descriptor.getType == FieldDescriptor.Type.MESSAGE) {
+              val value = builder.getField(descriptor).asInstanceOf[Message]
+              transformMessage(value, descriptor, builder)
+            }
+        }
+        result(builder)
+
+      case message =>
+        // Unknown message type. Fall back to proto reflection.
+        val builder = message.toBuilder
+        message.getAllFields.forEach { (desc, value) =>
+          if (desc.getType == FieldDescriptor.Type.MESSAGE) {
+            value match {
+              case list: util.List[Message @unchecked] =>
+                list.asScala.zipWithIndex.foreach {
+                  case (element, i) =>
+                    builder.setRepeatedField(desc, i, transform(element)(pf))
+                }
+              case item: Message =>
+                transformMessage(item, desc, builder)
+            }
+          }
+        }
+        result(builder)
+    }
+  }
+}
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index bcd643a30253f..1b44ee2644c69 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.sql.connect.planner
 
-import java.util.Properties
-import java.util.UUID
+import java.util.{Properties, UUID}
 
 import scala.collection.mutable
 import scala.jdk.CollectionConverters._
@@ -45,24 +44,23 @@ import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.LogKeys.{DATAFRAME_ID, SESSION_ID}
 import org.apache.spark.resource.{ExecutorResourceRequest, ResourceProfile, TaskResourceProfile, TaskResourceRequest}
 import org.apache.spark.sql.{Column, Encoders, ForeachWriter, Observation, Row}
-import org.apache.spark.sql.catalyst.{expressions, AliasIdentifier, FunctionIdentifier, QueryPlanningTracker}
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, GlobalTempView, LocalTempView, MultiAlias, NameParameterizedQuery, PosParameterizedQuery, UnresolvedAlias, UnresolvedAttribute, UnresolvedDataFrameStar, UnresolvedDeserializer, UnresolvedExtractValue, UnresolvedFunction, UnresolvedOrdinal, UnresolvedPlanId, UnresolvedRegex, UnresolvedRelation, UnresolvedStar, UnresolvedStarWithColumns, UnresolvedStarWithColumnsRenames, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction, UnresolvedTranspose}
+import org.apache.spark.sql.catalyst.{expressions, AliasIdentifier, FunctionIdentifier}
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, GlobalTempView, LocalTempView, MultiAlias, NameParameterizedQuery, PosParameterizedQuery, UnresolvedAlias, UnresolvedAttribute, UnresolvedDataFrameStar, UnresolvedDeserializer, UnresolvedExtractValue, UnresolvedFunction, UnresolvedOrdinal, UnresolvedRegex, UnresolvedRelation, UnresolvedStar, UnresolvedStarWithColumns, UnresolvedStarWithColumnsRenames, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction, UnresolvedTranspose}
 import org.apache.spark.sql.catalyst.encoders.{encoderFor, AgnosticEncoder, ExpressionEncoder, RowEncoder}
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ProductEncoder, RowEncoder => AgnosticRowEncoder, StringEncoder, UnboundRowEncoder}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.parser.{ParseException, ParserUtils}
-import org.apache.spark.sql.catalyst.plans.{Cross, FullOuter, Inner, JoinType, LeftAnti, LeftOuter, LeftSemi, RightOuter, UsingJoin}
-import org.apache.spark.sql.catalyst.plans.logical
-import org.apache.spark.sql.catalyst.plans.logical.{AppendColumns, Assignment, CoGroup, CollectMetrics, CommandResult, CompoundBody, Deduplicate, DeduplicateWithinWatermark, DeleteAction, DeserializeToObject, Except, FlatMapGroupsWithState, InsertAction, InsertStarAction, Intersect, JoinWith, LocalRelation, LogicalGroupState, LogicalPlan, MapGroups, MapPartitions, MergeAction, Project, Sample, SerializeFromObject, Sort, SubqueryAlias, TimeModes, TransformWithState, TypedFilter, Union, Unpivot, UnresolvedHint, UpdateAction, UpdateEventTimeWatermarkColumn, UpdateStarAction}
+import org.apache.spark.sql.catalyst.plans.{logical, Cross, FullOuter, Inner, JoinType, LeftAnti, LeftOuter, LeftSemi, RightOuter, UsingJoin}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendColumns, Assignment, CoGroup, CollectMetrics, CommandResult, CompoundBody, Deduplicate, DeduplicateWithinWatermark, DeleteAction, DeserializeToObject, Except, FlatMapGroupsWithState, InsertAction, InsertStarAction, Intersect, JoinWith, LocalRelation, LogicalGroupState, LogicalPlan, MapGroups, MapPartitions, MergeAction, Project, Sample, SerializeFromObject, Sort, SubqueryAlias, TimeModes, TransformWithState, TypedFilter, Union, Unpivot, UnresolvedHint, UnresolvedWith, UpdateAction, UpdateEventTimeWatermarkColumn, UpdateStarAction}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
-import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin, TreePattern}
+import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils}
 import org.apache.spark.sql.classic.{Catalog, Dataset, MergeIntoWriter, RelationalGroupedDataset, SparkSession, TypedAggUtils, UserDefinedFunctionUtils}
 import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.connect.client.arrow.ArrowSerializer
-import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, ForeachWriterPacket, LiteralValueProtoConverter, StorageLevelProtoConverter, StreamingListenerPacket, UdfPacket}
+import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, ForeachWriterPacket, InvalidPlanInput, LiteralValueProtoConverter, StorageLevelProtoConverter, StreamingListenerPacket, UdfPacket}
 import org.apache.spark.sql.connect.config.Connect.CONNECT_GRPC_ARROW_MAX_BATCH_SIZE
 import org.apache.spark.sql.connect.ml.MLHandler
 import org.apache.spark.sql.connect.pipelines.PipelinesHandler
@@ -120,6 +118,24 @@ class SparkConnectPlanner(
   private lazy val pythonExec =
     sys.env.getOrElse("PYSPARK_PYTHON", sys.env.getOrElse("PYSPARK_DRIVER_PYTHON", "python3"))
 
+  private val RELATION_COMPUTATION_IN_PROGRESS = new AnyRef
+  private val relationCache = mutable.Map.empty[Long, AnyRef]
+
+  private def getCachedRelation(planId: Long): LogicalPlan = {
+    relationCache.get(planId) match {
+      case Some(plan: LogicalPlan) => plan
+      case Some(RELATION_COMPUTATION_IN_PROGRESS) =>
+        throw InvalidPlanInput(s"Cyclic plan reference for plan ID: $planId")
+      case Some(relation: proto.Relation) =>
+        relationCache.put(planId, RELATION_COMPUTATION_IN_PROGRESS)
+        val plan = transformRelation(relation)
+        relationCache.update(planId, plan)
+        plan
+      case _ =>
+        throw InvalidInputErrors.invalidWithRelationReference()
+    }
+  }
+
   /**
    * The root of the query plan is a relation and we apply the transformations to it. The resolved
    * logical plan will not get cached. If the result needs to be cached, use
@@ -221,6 +237,8 @@ class SparkConnectPlanner(
         case proto.Relation.RelTypeCase.COLLECT_METRICS =>
           transformCollectMetrics(rel.getCollectMetrics, rel.getCommon.getPlanId)
         case proto.Relation.RelTypeCase.PARSE => transformParse(rel.getParse)
+        case proto.Relation.RelTypeCase.REFERENCED_PLAN_ID =>
+          getCachedRelation(rel.getReferencedPlanId)
 
         // Catalog API (internal-only)
         case proto.Relation.RelTypeCase.CATALOG => transformCatalog(rel.getCatalog)
@@ -344,13 +362,6 @@ class SparkConnectPlanner(
     }
   }
 
-  private def transformSqlWithRefs(query: proto.WithRelations): LogicalPlan = {
-    if (!isValidSQLWithRefs(query)) {
-      throw InvalidInputErrors.invalidSQLWithReferences(query)
-    }
-    executeSQLWithRefs(query).logicalPlan
-  }
-
   private def transformSubqueryAlias(alias: proto.SubqueryAlias): LogicalPlan = {
     val aliasIdentifier =
       if (alias.getQualifierCount > 0) {
@@ -2793,14 +2804,12 @@ class SparkConnectPlanner(
         .build()
     }
 
-    val df = relation.getRelTypeCase match {
-      case proto.Relation.RelTypeCase.SQL =>
-        executeSQL(relation.getSql, tracker)
-      case proto.Relation.RelTypeCase.WITH_RELATIONS =>
-        executeSQLWithRefs(relation.getWithRelations, tracker)
-      case other =>
-        throw InvalidInputErrors.sqlCommandExpectsSqlOrWithRelations(other)
+    // Only allow a SQL relation or a SQL relation nested in a WithRelations relation.
+    if (!relation.hasSql &&
+      !(relation.hasWithRelations && relation.getWithRelations.getRoot.hasSql)) {
+      throw InvalidInputErrors.sqlCommandExpectsSqlOrWithRelations(relation.getRelTypeCase)
     }
+    val df = Dataset.ofRows(session, transformRelation(relation), tracker)
 
     // Check if command or SQL Script has been executed.
     val isCommand = df.queryExecution.commandExecuted.isInstanceOf[CommandResult]
@@ -2881,79 +2890,6 @@ class SparkConnectPlanner(
     }
   }
 
-  private def isValidSQLWithRefs(query: proto.WithRelations): Boolean = {
-    query.getRoot.getRelTypeCase match {
-      case proto.Relation.RelTypeCase.SQL =>
-      case _ => return false
-    }
-    if (query.getReferencesCount == 0) {
-      return false
-    }
-    query.getReferencesList.iterator().asScala.foreach { ref =>
-      ref.getRelTypeCase match {
-        case proto.Relation.RelTypeCase.SUBQUERY_ALIAS =>
-        case _ => return false
-      }
-    }
-    true
-  }
-
-  private def executeSQLWithRefs(
-      query: proto.WithRelations,
-      tracker: QueryPlanningTracker = new QueryPlanningTracker) = {
-    if (!isValidSQLWithRefs(query)) {
-      throw InvalidInputErrors.invalidSQLWithReferences(query)
-    }
-
-    // Eagerly execute commands of the provided SQL string, with given references.
-    val sql = query.getRoot.getSql
-    this.synchronized {
-      try {
-        query.getReferencesList.asScala.foreach { ref =>
-          Dataset
-            .ofRows(session, transformRelation(ref.getSubqueryAlias.getInput))
-            .createOrReplaceTempView(ref.getSubqueryAlias.getAlias)
-        }
-        executeSQL(sql, tracker)
-      } finally {
-        // drop all temporary views
-        query.getReferencesList.asScala.foreach { ref =>
-          session.catalog.dropTempView(ref.getSubqueryAlias.getAlias)
-        }
-      }
-    }
-  }
-
-  private def executeSQL(
-      sql: proto.SQL,
-      tracker: QueryPlanningTracker = new QueryPlanningTracker) = {
-    // Eagerly execute commands of the provided SQL string.
-    val args = sql.getArgsMap
-    val namedArguments = sql.getNamedArgumentsMap
-    val posArgs = sql.getPosArgsList
-    val posArguments = sql.getPosArgumentsList
-    if (!namedArguments.isEmpty) {
-      session.sql(
-        sql.getQuery,
-        namedArguments.asScala.toMap.transform((_, e) => Column(transformExpression(e))),
-        tracker)
-    } else if (!posArguments.isEmpty) {
-      session.sql(
-        sql.getQuery,
-        posArguments.asScala.map(e => Column(transformExpression(e))).toArray,
-        tracker)
-    } else if (!args.isEmpty) {
-      session.sql(
-        sql.getQuery,
-        args.asScala.toMap.transform((_, v) => transformLiteral(v)),
-        tracker)
-    } else if (!posArgs.isEmpty) {
-      session.sql(sql.getQuery, posArgs.asScala.map(transformLiteral).toArray, tracker)
-    } else {
-      session.sql(sql.getQuery, Map.empty[String, Any], tracker)
-    }
-  }
-
   private def handleRegisterUserDefinedFunction(
       fun: proto.CommonInlineUserDefinedFunction): Unit = {
     fun.getFunctionCase match {
@@ -4057,72 +3993,73 @@ class SparkConnectPlanner(
 
   private def transformSubqueryExpression(
       getSubqueryExpression: proto.SubqueryExpression): Expression = {
-    val planId = getSubqueryExpression.getPlanId
+    val plan = getCachedRelation(getSubqueryExpression.getPlanId)
     getSubqueryExpression.getSubqueryType match {
       case proto.SubqueryExpression.SubqueryType.SUBQUERY_TYPE_SCALAR =>
-        UnresolvedScalarSubqueryPlanId(planId)
+        ScalarSubquery(plan)
       case proto.SubqueryExpression.SubqueryType.SUBQUERY_TYPE_EXISTS =>
-        UnresolvedExistsPlanId(planId)
+        Exists(plan)
       case proto.SubqueryExpression.SubqueryType.SUBQUERY_TYPE_TABLE_ARG =>
         if (getSubqueryExpression.hasTableArgOptions) {
           val options = getSubqueryExpression.getTableArgOptions
-          UnresolvedTableArgPlanId(
-            planId,
-            partitionSpec = options.getPartitionSpecList.asScala
+          FunctionTableSubqueryArgumentExpression(
+            plan,
+            partitionByExpressions = options.getPartitionSpecList.asScala
               .map(transformExpression)
               .toSeq,
-            orderSpec = options.getOrderSpecList.asScala
+            orderByExpressions = options.getOrderSpecList.asScala
               .map(transformSortOrder)
               .toSeq,
             withSinglePartition =
               options.hasWithSinglePartition && options.getWithSinglePartition)
         } else {
-          UnresolvedTableArgPlanId(planId)
+          FunctionTableSubqueryArgumentExpression(plan)
         }
       case proto.SubqueryExpression.SubqueryType.SUBQUERY_TYPE_IN =>
-        UnresolvedInSubqueryPlanId(
-          getSubqueryExpression.getInSubqueryValuesList.asScala.map { value =>
-            transformExpression(value)
-          }.toSeq,
-          planId)
+        val values = getSubqueryExpression.getInSubqueryValuesList.asScala.map { value =>
+          transformExpression(value)
+        }.toSeq
+        InSubquery(values, ListQuery(plan))
       case other => throw InvalidInputErrors.invalidEnum(other)
     }
   }
 
   private def transformWithRelations(getWithRelations: proto.WithRelations): LogicalPlan = {
-    if (isValidSQLWithRefs(getWithRelations)) {
-      transformSqlWithRefs(getWithRelations)
+    // Register the plans in the relation cache, so they can be resolved while
+    // transforming the root relation into a LogicalPlan.
+    val namedReferences = mutable.Buffer.empty[(String, proto.Relation)]
+    getWithRelations.getReferencesList.forEach { ref =>
+      val common = ref.getCommon
+      if (!common.hasPlanId && !ref.hasSubqueryAlias) {
+        throw InvalidInputErrors.invalidWithRelationReference()
+      }
+      if (common.hasPlanId) {
+        relationCache.put(common.getPlanId, ref)
+      }
+      if (ref.hasSubqueryAlias) {
+        namedReferences += ref.getSubqueryAlias.getAlias -> ref
+      }
+    }
+
+    val root = transformRelation(getWithRelations.getRoot)
+    if (namedReferences.nonEmpty) {
+      // If WithRelations contains named references we create a CTE. This is needed because it is
+      // allowed to nest WithRelations nodes and names used in a parent node can be reused
+      // (overwritten) by a child node.
+      val ctes = namedReferences.map {
+        case (name, relation) =>
+          assert(relation.hasSubqueryAlias)
+          val plan = if (relation.getCommon.hasPlanId) {
+            getCachedRelation(relation.getCommon.getPlanId)
+          } else {
+            transformRelation(relation)
+          }
+          (name, plan.asInstanceOf[SubqueryAlias], None)
+      }
+      UnresolvedWith(root, ctes.toSeq)
     } else {
       // Wrap the plan to keep the original planId.
-      val plan = Project(Seq(UnresolvedStar(None)), transformRelation(getWithRelations.getRoot))
-
-      val relations = getWithRelations.getReferencesList.asScala.map { ref =>
-        if (ref.hasCommon && ref.getCommon.hasPlanId) {
-          val planId = ref.getCommon.getPlanId
-          val plan = transformRelation(ref)
-          planId -> plan
-        } else {
-          throw InvalidInputErrors.invalidWithRelationReference()
-        }
-      }.toMap
-
-      val missingPlanIds = mutable.Set.empty[Long]
-      val withRelations = plan
-        .transformAllExpressionsWithPruning(_.containsPattern(TreePattern.UNRESOLVED_PLAN_ID)) {
-          case u: UnresolvedPlanId =>
-            if (relations.contains(u.planId)) {
-              u.withPlan(relations(u.planId))
-            } else {
-              missingPlanIds += u.planId
-              u
-            }
-        }
-      assertPlan(
-        missingPlanIds.isEmpty,
-        "Missing relation in WithRelations: " +
-          s"${missingPlanIds.mkString("(", ", ", ")")} not in " +
-          s"${relations.keys.mkString("(", ", ", ")")}")
-      withRelations
+      Project(Seq(UnresolvedStar(None)), root)
     }
   }