Skip to content

Commit 26faa9b

Browse files
committed
Added model fields for chunk/result latency
Signed-off-by: Nikhil Suri <[email protected]>
1 parent b9830a2 commit 26faa9b

File tree

3 files changed

+103
-3
lines changed

3 files changed

+103
-3
lines changed

src/databricks/sql/common/unified_http_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def __init__(self, client_context):
5050
"""
5151
self.config = client_context
5252
# Since the unified http client is used for all requests, we need to have proxy and direct pool managers
53-
# for per-request proxy decisions.
53+
# for per-reques ̰ˇt proxy decisions.
5454
self._direct_pool_manager = None
5555
self._proxy_pool_manager = None
5656
self._retry_policy = None

src/databricks/sql/telemetry/models/event.py

Lines changed: 101 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,100 @@ class DriverErrorInfo(JsonSerializableMixin):
149149
stack_trace: str
150150

151151

152+
@dataclass
153+
class ChunkDetails(JsonSerializableMixin):
154+
"""
155+
Contains detailed metrics about chunk downloads during result fetching.
156+
157+
These metrics are accumulated across all chunk downloads for a single statement.
158+
In Java, this is populated by the StatementTelemetryDetails tracker as chunks are downloaded.
159+
160+
Tracking approach:
161+
- Initialize total_chunks_present from result manifest
162+
- For each chunk downloaded:
163+
* Increment total_chunks_iterated
164+
* Add chunk latency to sum_chunks_download_time_millis
165+
* Update initial_chunk_latency_millis (first chunk only)
166+
* Update slowest_chunk_latency_millis (if current chunk is slower)
167+
168+
Attributes:
169+
initial_chunk_latency_millis (int): Latency of the first chunk download
170+
slowest_chunk_latency_millis (int): Latency of the slowest chunk download
171+
total_chunks_present (int): Total number of chunks available
172+
total_chunks_iterated (int): Number of chunks actually downloaded
173+
sum_chunks_download_time_millis (int): Total time spent downloading all chunks
174+
"""
175+
176+
initial_chunk_latency_millis: Optional[int] = None
177+
slowest_chunk_latency_millis: Optional[int] = None
178+
total_chunks_present: Optional[int] = None
179+
total_chunks_iterated: Optional[int] = None
180+
sum_chunks_download_time_millis: Optional[int] = None
181+
182+
183+
@dataclass
184+
class ResultLatency(JsonSerializableMixin):
185+
"""
186+
Contains latency metrics for different phases of query execution.
187+
188+
This tracks two distinct phases:
189+
1. result_set_ready_latency_millis: Time from query submission until results are available (execute phase)
190+
- Set when execute() completes
191+
2. result_set_consumption_latency_millis: Time spent iterating/fetching results (fetch phase)
192+
- Measured from first fetch call until no more rows available
193+
- In Java: tracked via markResultSetConsumption(hasNext) method
194+
- Records start time on first fetch, calculates total on last fetch
195+
196+
Attributes:
197+
result_set_ready_latency_millis (int): Time until query results are ready (execution phase)
198+
result_set_consumption_latency_millis (int): Time spent fetching/consuming results (fetch phase)
199+
200+
Note:
201+
Java implementation includes private field 'startTimeOfResultSetIterationNano' for internal
202+
tracking (not serialized to JSON). When implementing tracking in Python, use similar approach:
203+
- Record start time on first fetchone/fetchmany/fetchall call
204+
- Calculate total consumption latency when iteration completes or cursor closes
205+
"""
206+
207+
result_set_ready_latency_millis: Optional[int] = None
208+
result_set_consumption_latency_millis: Optional[int] = None
209+
210+
211+
@dataclass
212+
class OperationDetail(JsonSerializableMixin):
213+
"""
214+
Contains detailed information about the operation being performed.
215+
216+
This provides more granular operation tracking than statement_type, allowing
217+
differentiation between similar operations (e.g., EXECUTE_STATEMENT vs EXECUTE_STATEMENT_ASYNC).
218+
219+
Tracking approach:
220+
- operation_type: Map method name to operation type enum
221+
* Java maps: executeStatement -> EXECUTE_STATEMENT
222+
* Java maps: listTables -> LIST_TABLES
223+
* Python could use similar mapping from method names
224+
225+
- is_internal_call: Track if operation is initiated by driver internally
226+
* Set to true for driver-initiated metadata calls
227+
* Set to false for user-initiated operations
228+
229+
- Status polling: For async operations
230+
* Increment n_operation_status_calls for each status check
231+
* Accumulate operation_status_latency_millis across all status calls
232+
233+
Attributes:
234+
n_operation_status_calls (int): Number of status polling calls made
235+
operation_status_latency_millis (int): Total latency of all status calls
236+
operation_type (str): Specific operation type (e.g., EXECUTE_STATEMENT, LIST_TABLES, CANCEL_STATEMENT)
237+
is_internal_call (bool): Whether this is an internal driver operation
238+
"""
239+
240+
n_operation_status_calls: Optional[int] = None
241+
operation_status_latency_millis: Optional[int] = None
242+
operation_type: Optional[str] = None
243+
is_internal_call: Optional[bool] = None
244+
245+
152246
@dataclass
153247
class SqlExecutionEvent(JsonSerializableMixin):
154248
"""
@@ -160,14 +254,20 @@ class SqlExecutionEvent(JsonSerializableMixin):
160254
is_compressed (bool): Whether the result is compressed
161255
execution_result (ExecutionResultFormat): Format of the execution result
162256
retry_count (int): Number of retry attempts made
163-
chunk_id (int): ID of the chunk if applicable
257+
chunk_id (int): ID of the chunk if applicable (used for error tracking)
258+
chunk_details (ChunkDetails): Aggregated chunk download metrics
259+
result_latency (ResultLatency): Latency breakdown by execution phase
260+
operation_detail (OperationDetail): Detailed operation information
164261
"""
165262

166263
statement_type: StatementType
167264
is_compressed: bool
168265
execution_result: ExecutionResultFormat
169266
retry_count: Optional[int]
170267
chunk_id: Optional[int]
268+
chunk_details: Optional[ChunkDetails] = None
269+
result_latency: Optional[ResultLatency] = None
270+
operation_detail: Optional[OperationDetail] = None
171271

172272

173273
@dataclass

src/databricks/sql/telemetry/telemetry_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ class TelemetryClientFactory:
380380
# Shared flush thread for all clients
381381
_flush_thread = None
382382
_flush_event = threading.Event()
383-
_flush_interval_seconds = 90
383+
_flush_interval_seconds = 300 # 5 minutes
384384

385385
DEFAULT_BATCH_SIZE = 100
386386

0 commit comments

Comments
 (0)