Skip to content

Commit be73073

Browse files
committed
feat(native): Separate exchange get-data-size vs get-data counters for better tracking
Exchange has two phase protocol 1. First request is a get-data-size request, it uses long-poll protocol and wait until exchange source has data or timeout 2. Second request is get-data request, it will be be non-blocking call to transmit data Differentiating those two would be helpful to differentiate the time waiting on server and time for transmitting data
1 parent 1a4b462 commit be73073

File tree

4 files changed

+60
-15
lines changed

4 files changed

+60
-15
lines changed

presto-native-execution/presto_cpp/main/PrestoExchangeSource.cpp

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,8 @@ void PrestoExchangeSource::handleDataResponse(
246246
} else if (response->hasError()) {
247247
processDataError(httpRequestPath, maxBytes, maxWait, response->error());
248248
} else {
249-
processDataResponse(std::move(response));
249+
const bool isGetDataSizeRequest = (maxBytes == 0);
250+
processDataResponse(std::move(response), isGetDataSizeRequest);
250251
}
251252
} catch (const std::exception& e) {
252253
processDataError(httpRequestPath, maxBytes, maxWait, e.what());
@@ -255,11 +256,20 @@ void PrestoExchangeSource::handleDataResponse(
255256
}
256257

257258
void PrestoExchangeSource::processDataResponse(
258-
std::unique_ptr<http::HttpResponse> response) {
259-
RECORD_HISTOGRAM_METRIC_VALUE(
260-
kCounterExchangeRequestDuration, dataRequestRetryState_.durationMs());
261-
RECORD_HISTOGRAM_METRIC_VALUE(
262-
kCounterExchangeRequestNumTries, dataRequestRetryState_.numTries());
259+
std::unique_ptr<http::HttpResponse> response, bool isGetDataSizeRequest) {
260+
if (isGetDataSizeRequest) {
261+
RECORD_HISTOGRAM_METRIC_VALUE(
262+
kCounterExchangeGetDataSizeDuration,
263+
dataRequestRetryState_.durationMs());
264+
RECORD_HISTOGRAM_METRIC_VALUE(
265+
kCounterExchangeGetDataSizeNumTries,
266+
dataRequestRetryState_.numTries());
267+
} else {
268+
RECORD_HISTOGRAM_METRIC_VALUE(
269+
kCounterExchangeRequestDuration, dataRequestRetryState_.durationMs());
270+
RECORD_HISTOGRAM_METRIC_VALUE(
271+
kCounterExchangeRequestNumTries, dataRequestRetryState_.numTries());
272+
}
263273
if (closed_.load()) {
264274
// If PrestoExchangeSource is already closed, just free all buffers
265275
// allocated without doing any processing. This can happen when a super slow
@@ -332,6 +342,12 @@ void PrestoExchangeSource::processDataResponse(
332342
}
333343
PrestoExchangeSource::updateMemoryUsage(totalBytes);
334344

345+
// Record page size counter when not a get-data-size request
346+
if (!isGetDataSizeRequest) {
347+
RECORD_HISTOGRAM_METRIC_VALUE(
348+
kCounterExchangeRequestPageSize, totalBytes);
349+
}
350+
335351
if (enableBufferCopy_) {
336352
page = std::make_unique<exec::SerializedPage>(
337353
std::move(singleChain), [pool = pool_](folly::IOBuf& iobuf) {

presto-native-execution/presto_cpp/main/PrestoExchangeSource.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ class PrestoExchangeSource : public velox::exec::ExchangeSource {
200200
// response without an end marker. Sends delete-results if received an end
201201
// marker. The sequence of operations is: add data or end marker to the
202202
// queue; complete the future, send ack or delete-results.
203-
void processDataResponse(std::unique_ptr<http::HttpResponse> response);
203+
void processDataResponse(std::unique_ptr<http::HttpResponse> response, bool getDataSize);
204204

205205
// If 'retry' is true, then retry the http request failure until reaches the
206206
// retry limit, otherwise just set exchange source error without retry. As

presto-native-execution/presto_cpp/main/common/Counters.cpp

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -117,28 +117,49 @@ void registerPrestoMetrics() {
117117
99,
118118
100);
119119

120-
// Tracks exchange request duration in range of [0, 300s] with
121-
// 300 buckets and reports P50, P90, P99, and P100.
120+
// Tracks exchange request duration in range of [0, 10s] with
121+
// 500 buckets and reports P50, P90, P99, and P100.
122122
DEFINE_HISTOGRAM_METRIC(
123123
kCounterExchangeRequestDuration,
124-
1'000,
124+
20, // 20ms bucket size
125125
0,
126-
300'000,
126+
10'000, // 10s max
127127
50,
128128
90,
129129
99,
130130
100);
131-
// Tracks exchange request num of retris in range of [0, 20] with
131+
// Tracks exchange request num of tries in range of [0, 20] with
132132
// 20 buckets and reports P50, P90, P99, and P100.
133133
DEFINE_HISTOGRAM_METRIC(
134-
kCounterExchangeRequestNumTries,
135-
1,
134+
kCounterExchangeRequestNumTries, 1, 0, 20, 50, 90, 99, 100);
135+
// Tracks exchange request page size in range of [0, 20MB] with
136+
// 20K buckets and reports P50, P90, P99, and P100.
137+
DEFINE_HISTOGRAM_METRIC(
138+
kCounterExchangeRequestPageSize,
139+
10 * 1024, // 10KB bucket size
136140
0,
137-
20,
141+
20 * 1024 * 1024, // 20MB max
138142
50,
139143
90,
140144
99,
141145
100);
146+
147+
// Tracks exchange get-data-size request duration in range of [0, 300s] with
148+
// 300 buckets and reports P50, P90, P99, and P100.
149+
DEFINE_HISTOGRAM_METRIC(
150+
kCounterExchangeGetDataSizeDuration,
151+
1'000,
152+
0,
153+
300'000,
154+
50,
155+
90,
156+
99,
157+
100);
158+
// Tracks exchange get-data-size request num of tries in range of [0, 20] with
159+
// 20 buckets and reports P50, P90, P99, and P100.
160+
DEFINE_HISTOGRAM_METRIC(
161+
kCounterExchangeGetDataSizeNumTries, 1, 0, 20, 50, 90, 99, 100);
162+
142163
DEFINE_METRIC(kCounterMemoryPushbackCount, facebook::velox::StatType::COUNT);
143164
DEFINE_HISTOGRAM_METRIC(
144165
kCounterMemoryPushbackLatencyMs, 10'000, 0, 100'000, 50, 90, 99, 100);

presto-native-execution/presto_cpp/main/common/Counters.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@ constexpr folly::StringPiece kCounterExchangeRequestDuration{
5151
"presto_cpp.exchange.request.duration"};
5252
constexpr folly::StringPiece kCounterExchangeRequestNumTries{
5353
"presto_cpp.exchange.request.num_tries"};
54+
constexpr folly::StringPiece kCounterExchangeRequestPageSize{
55+
"presto_cpp.exchange.request.page_size"};
56+
57+
58+
constexpr folly::StringPiece kCounterExchangeGetDataSizeDuration{
59+
"presto_cpp.exchange.get_data_size.duration"};
60+
constexpr folly::StringPiece kCounterExchangeGetDataSizeNumTries{
61+
"presto_cpp.exchange.get_data_size.num_tries"};
5462

5563
constexpr folly::StringPiece kCounterNumQueryContexts{
5664
"presto_cpp.num_query_contexts"};

0 commit comments

Comments
 (0)