Skip to content

Commit 90a077f

Browse files
committed
proto changes
1 parent 09efee7 commit 90a077f

File tree

2 files changed

+72
-4
lines changed

2 files changed

+72
-4
lines changed

sql/connect/common/src/main/protobuf/spark/connect/base.proto

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,29 @@ message Plan {
4343
}
4444
}
4545

46-
46+
// Composite identifier for referencing for a flow within a dataflow graph.
47+
message DataflowGraphFlowIdentifier {
48+
// (Required) The id of the dataflow graph that this flow belongs to. I.e. the value returned by
49+
// CreateDataflowGraph.
50+
string dataflow_graph_id = 1;
51+
52+
// (Required) The name of the flow within the dataflow graph.
53+
string flow_name = 2;
54+
}
4755

4856
// User Context is used to refer to one particular user session that is executing
4957
// queries in the backend.
5058
message UserContext {
5159
string user_id = 1;
5260
string user_name = 2;
5361

62+
// (Optional) Should be non-null for RPCs that are sent during the execution of a Declarative
63+
// Pipelines flow query function. Identifies the flow and the dataflow graph that it's a part of.
64+
// Any plans that are analyzed within the RPC are analyzed "relative to" the dataflow graph.
65+
// "Relative to" means that, when determining the existence and schema of a table that's defined
66+
// in the graph, the definitions from the graph is used instead of the definition of the catalog.
67+
optional DataflowGraphFlowIdentifier dataflow_graph_flow_identifier = 3;
68+
5469
// To extend the existing user context message that is used to identify incoming requests,
5570
// Spark Connect leverages the Any protobuf type that can be used to inject arbitrary other
5671
// messages into this message. Extensions are stored as a `repeated` type to be able to
@@ -406,6 +421,10 @@ message ExecutePlanResponse {
406421
// Pipeline command response
407422
PipelineCommandResult pipeline_command_result = 22;
408423

424+
// A signal from the server to the client to execute the query function for a flow, and to
425+
// register its result with the server.
426+
PipelineQueryFunctionExecutionSignal pipeline_query_function_execution_signal = 23;
427+
409428
// Support arbitrary result objects.
410429
google.protobuf.Any extension = 999;
411430
}

sql/connect/common/src/main/protobuf/spark/connect/pipelines.proto

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,24 @@ message PipelineCommand {
9090
optional string format = 8;
9191
}
9292

93+
// Metadata about why a query function failed to be executed successfully.
94+
message QueryFunctionFailure {
95+
// Identifier for a dataset within the graph that the query function needed to know the schema
96+
// of but which had not yet been analyzed itself.
97+
optional string missing_dependency = 1;
98+
}
99+
100+
// The result of executing a user-defined query function.
101+
message QueryFunctionResult {
102+
oneof flow_function_evaluation_result {
103+
// If the query function executed successfully, the unresolved logical plan produced by it.
104+
spark.connect.Relation plan = 1;
105+
106+
// If the query function failed, metadata about the failure.
107+
QueryFunctionFailure failure = 2;
108+
}
109+
}
110+
93111
// Request to define a flow targeting a dataset.
94112
message DefineFlow {
95113
// The graph to attach this flow to.
@@ -101,14 +119,18 @@ message PipelineCommand {
101119
// Name of the dataset this flow writes to. Can be partially or fully qualified.
102120
optional string target_dataset_name = 3;
103121

104-
// An unresolved relation that defines the dataset's flow.
105-
optional spark.connect.Relation relation = 4;
122+
// The result of executing the flow's query function
123+
optional QueryFunctionResult result = 4;
106124

107125
// SQL configurations set when running this flow.
108126
map<string, string> sql_conf = 5;
109127

110128
// If true, this flow will only be run once per full refresh.
111129
optional bool once = 6;
130+
131+
// Identifier for the client making the request. The server uses this to determine what flow
132+
// evaluation request stream to dispatch evaluation requests to for this flow.
133+
optional string client_id = 7;
112134
}
113135

114136
// Resolves all datasets and flows and start a pipeline update. Should be called after all
@@ -129,8 +151,29 @@ message PipelineCommand {
129151
// The contents of the SQL file.
130152
optional string sql_text = 3;
131153
}
132-
}
133154

155+
// Request to get the stream of query function execution signals for a graph.
156+
message GetQueryFunctionExecutionSignalStream {
157+
// The graph to get the query function execution signal stream for.
158+
optional string dataflow_graph_id = 1;
159+
160+
// Identifier for the client that is requesting the stream.
161+
optional string client_id = 2;
162+
}
163+
164+
// Request from the client to update the flow function evaluation result
165+
// for a previously unanalyzed flow.
166+
message DefineFlowQueryFunctionResult {
167+
// The fully qualified name of the flow being updated.
168+
optional string flow_name = 1;
169+
170+
// The ID of the graph this flow belongs to.
171+
optional string dataflow_graph_id = 2;
172+
173+
// The result of executing the flow's query function
174+
optional QueryFunctionResult result = 3;
175+
}
176+
}
134177

135178
// Dispatch object for pipelines command results.
136179
message PipelineCommandResult {
@@ -166,3 +209,9 @@ message PipelineEvent {
166209
// The message that should be displayed to users.
167210
optional string message = 2;
168211
}
212+
213+
// A signal from the server to the client to execute the query function for a flow, and to register
214+
// its result with the server.
215+
message PipelineQueryFunctionExecutionSignal {
216+
optional string flow_name = 1;
217+
}

0 commit comments

Comments
 (0)