Skip to content

Commit c6ee6a8

Browse files
committed
proto changes
1 parent 09efee7 commit c6ee6a8

File tree

2 files changed

+73
-5
lines changed

2 files changed

+73
-5
lines changed

sql/connect/common/src/main/protobuf/spark/connect/base.proto

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,19 @@ message Plan {
4343
}
4444
}
4545

46-
47-
4846
// User Context is used to refer to one particular user session that is executing
4947
// queries in the backend.
5048
message UserContext {
5149
string user_id = 1;
5250
string user_name = 2;
5351

52+
// (Optional) Should be non-null for RPCs that are sent during the execution of a Declarative
53+
// Pipelines flow query function. Identifies the flow and the dataflow graph that it's a part of.
54+
// Any plans that are analyzed within the RPC are analyzed "relative to" the dataflow graph.
55+
// I.e., when determining the existence and schema of a data source that's defined in the graph,
56+
// the definition from the graph is used instead of the definition in physical storage.
57+
optional FlowAnalysisContext pipeline_flow_analysis_context = 3;
58+
5459
// To extend the existing user context message that is used to identify incoming requests,
5560
// Spark Connect leverages the Any protobuf type that can be used to inject arbitrary other
5661
// messages into this message. Extensions are stored as a `repeated` type to be able to
@@ -406,6 +411,10 @@ message ExecutePlanResponse {
406411
// Pipeline command response
407412
PipelineCommandResult pipeline_command_result = 22;
408413

414+
// A signal from the server to the client to execute the query function for a flow, and to
415+
// register its result with the server.
416+
PipelineQueryFunctionExecutionSignal pipeline_query_function_execution_signal = 23;
417+
409418
// Support arbitrary result objects.
410419
google.protobuf.Any extension = 999;
411420
}

sql/connect/common/src/main/protobuf/spark/connect/pipelines.proto

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,24 @@ message PipelineCommand {
9090
optional string format = 8;
9191
}
9292

93+
// Metadata about why a query function failed to be executed successfully.
94+
message QueryFunctionFailure {
95+
// Identifier for a dataset within the graph that the query function needed to know the schema
96+
// of but which had not yet been analyzed itself.
97+
optional string missing_dependency = 1;
98+
}
99+
100+
// The result of executing a user-defined query function.
101+
message QueryFunctionResult {
102+
oneof flow_function_evaluation_result {
103+
// If the query function executed successfully, the unresolved logical plan produced by it.
104+
spark.connect.Relation plan = 1;
105+
106+
// If the query function failed, metadata about the failure.
107+
QueryFunctionFailure failure = 2;
108+
}
109+
}
110+
93111
// Request to define a flow targeting a dataset.
94112
message DefineFlow {
95113
// The graph to attach this flow to.
@@ -101,14 +119,18 @@ message PipelineCommand {
101119
// Name of the dataset this flow writes to. Can be partially or fully qualified.
102120
optional string target_dataset_name = 3;
103121

104-
// An unresolved relation that defines the dataset's flow.
105-
optional spark.connect.Relation relation = 4;
122+
// The result of executing the flow's query function
123+
optional QueryFunctionResult result = 4;
106124

107125
// SQL configurations set when running this flow.
108126
map<string, string> sql_conf = 5;
109127

110128
// If true, this flow will only be run once per full refresh.
111129
optional bool once = 6;
130+
131+
// Identifier for the client making the request. The server uses this to determine what flow
132+
// evaluation request stream to dispatch evaluation requests to for this flow.
133+
optional string client_id = 7;
112134
}
113135

114136
// Resolves all datasets and flows and start a pipeline update. Should be called after all
@@ -129,8 +151,29 @@ message PipelineCommand {
129151
// The contents of the SQL file.
130152
optional string sql_text = 3;
131153
}
132-
}
133154

155+
// Request to get the stream of query function execution signals for a graph.
156+
message GetQueryFunctionExecutionSignalStream {
157+
// The graph to get the query function execution signal stream for.
158+
optional string dataflow_graph_id = 1;
159+
160+
// Identifier for the client that is requesting the stream.
161+
optional string client_id = 2;
162+
}
163+
164+
// Request from the client to update the flow function evaluation result
165+
// for a previously unanalyzed flow.
166+
message DefineFlowQueryFunctionResult {
167+
// The fully qualified name of the flow being updated.
168+
optional string flow_name = 1;
169+
170+
// The ID of the graph this flow belongs to.
171+
optional string dataflow_graph_id = 2;
172+
173+
// The result of executing the flow's query function
174+
optional QueryFunctionResult result = 3;
175+
}
176+
}
134177

135178
// Dispatch object for pipelines command results.
136179
message PipelineCommandResult {
@@ -166,3 +209,19 @@ message PipelineEvent {
166209
// The message that should be displayed to users.
167210
optional string message = 2;
168211
}
212+
213+
// A signal from the server to the client to execute the query function for a flow, and to register
214+
// its result with the server.
215+
message PipelineQueryFunctionExecutionSignal {
216+
optional string flow_name = 1;
217+
}
218+
219+
// Context describing the flow being resolved within a graph.
220+
message FlowAnalysisContext {
221+
// (Required) The ID of the dataflow graph that the flow belongs to. I.e. the value returned by
222+
// CreateDataflowGraph.
223+
string dataflow_graph_id = 1;
224+
225+
// (Required) The name of the flow within the dataflow graph.
226+
string flow_name = 2;
227+
}

0 commit comments

Comments
 (0)