Skip to content

Commit 9117c91

Browse files
authored
Data Explorer: Request profiles one at a time for larger tables (#9096)
These are some changes to make profile rendering a little smoother. Before we were batching 4 profiles at a time, but this changes to 1-at-a-time for over 1 million rows (below that they the in-window profiles are requested in a single big batch): I also noticed that the background task queue in Python was using 5 x # CPUs by default and for compute heavy profile computation this can result in inefficient context switching when computing many profiles. Now the number of workers is equal to the number of logical CPU cores.
1 parent dbb9d7e commit 9117c91

File tree

2 files changed

+65
-19
lines changed

2 files changed

+65
-19
lines changed

extensions/positron-python/python_files/posit/positron/utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import inspect
1010
import json
1111
import logging
12+
import os
1213
import re
1314
import sys
1415
import threading
@@ -149,7 +150,9 @@ class BackgroundJobQueue:
149150

150151
def __init__(self, max_workers=None):
151152
# Initialize the ThreadPoolExecutor with the specified number
152-
# of workers
153+
# of workers. Default to the number of CPU cores for optimal performance.
154+
if max_workers is None:
155+
max_workers = os.cpu_count() or 4
153156
self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers)
154157
self.pending_futures = set()
155158
self.lock = threading.Lock()

src/vs/workbench/services/positronDataExplorer/common/tableSummaryCache.ts

Lines changed: 61 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ const SMALL_HISTOGRAM_NUM_BINS = 80;
2121
const LARGE_HISTOGRAM_NUM_BINS = 200;
2222
const SMALL_FREQUENCY_TABLE_LIMIT = 8;
2323
const LARGE_FREQUENCY_TABLE_LIMIT = 16;
24+
const UPDATE_EVENT_DEBOUNCE_DELAY = 50;
2425

2526
/**
2627
* UpdateDescriptor interface.
@@ -54,6 +55,11 @@ export class TableSummaryCache extends Disposable {
5455
*/
5556
private _trimCacheTimeout?: Timeout;
5657

58+
/**
59+
* Gets or sets the debounced update event timeout.
60+
*/
61+
private _debouncedUpdateTimeout?: Timeout;
62+
5763
/**
5864
* The search text used to filter the dataset in the column schema
5965
* and column profile caches. The last search text value is maintained
@@ -146,6 +152,9 @@ export class TableSummaryCache extends Disposable {
146152
// Clear the trim cache timeout.
147153
this.clearTrimCacheTimeout();
148154

155+
// Clear the debounced update timeout.
156+
this.clearDebouncedUpdateTimeout();
157+
149158
// Call the base class's dispose method.
150159
super.dispose();
151160
}
@@ -676,27 +685,36 @@ export class TableSummaryCache extends Disposable {
676685

677686
const tableState = await this._dataExplorerClientInstance.getBackendState();
678687

679-
// For more than 10 million rows, we request profiles one by one rather than as a batch for
688+
// For more than 1 million rows, we request profiles one by one rather than as a batch for
680689
// better responsiveness
681-
const BATCHING_THRESHOLD = 5_000_000;
690+
const BATCHING_THRESHOLD = 1_000_000;
682691
if (tableState.table_shape.num_rows > BATCHING_THRESHOLD) {
683-
const BATCH_SIZE = 4;
684-
for (let i = 0; i < columnIndices.length; i += BATCH_SIZE) {
685-
// Get the next batch of up to 4 requests
686-
const batchColumnRequests = columnRequests.slice(i, i + BATCH_SIZE);
687-
const batchColumnIndices = columnIndices.slice(i, i + BATCH_SIZE);
688-
689-
// Send the batch of requests to getColumnProfiles
690-
const results = await this._dataExplorerClientInstance.getColumnProfiles(batchColumnRequests);
691-
692-
// Cache the returned column profiles for each index in the batch
693-
for (let j = 0; j < results.length; j++) {
694-
this._columnProfileCache.set(batchColumnIndices[j], results[j]);
695-
}
692+
// Start all requests and store promises
693+
const profilePromises = columnRequests.map((columnRequest, index) => {
694+
const columnIndex = columnIndices[index];
695+
696+
// Start the request and handle result immediately when it completes
697+
const promise = this._dataExplorerClientInstance.getColumnProfiles([columnRequest])
698+
.then(results => {
699+
// Cache the result as soon as it's available
700+
if (results.length > 0) {
701+
this._columnProfileCache.set(columnIndex, results[0]);
702+
}
703+
// Fire the onDidUpdate event with debouncing for smoother updates
704+
this.fireOnDidUpdateDebounced();
705+
return results;
706+
})
707+
.catch(error => {
708+
// Handle errors gracefully
709+
console.error(`Failed to get column profile for index ${columnIndex}:`, error);
710+
throw error;
711+
});
712+
713+
return promise;
714+
});
696715

697-
// Fire the onDidUpdate event so things update as soon as they are returned
698-
this._onDidUpdateEmitter.fire();
699-
}
716+
// Wait for all requests to complete
717+
await Promise.allSettled(profilePromises);
700718
} else {
701719
// Load the column profiles as a batch
702720
const columnProfileResults = await this._dataExplorerClientInstance.getColumnProfiles(
@@ -764,6 +782,31 @@ export class TableSummaryCache extends Disposable {
764782
}
765783
}
766784

785+
/**
786+
* Clears the debounced update timeout.
787+
*/
788+
private clearDebouncedUpdateTimeout() {
789+
// If there is a debounced update timeout scheduled, clear it.
790+
if (this._debouncedUpdateTimeout) {
791+
clearTimeout(this._debouncedUpdateTimeout);
792+
this._debouncedUpdateTimeout = undefined;
793+
}
794+
}
795+
796+
/**
797+
* Fires the onDidUpdate event with debouncing to smooth incremental updates.
798+
*/
799+
private fireOnDidUpdateDebounced() {
800+
// Clear any existing debounced update timeout.
801+
this.clearDebouncedUpdateTimeout();
802+
803+
// Set a new debounced update timeout.
804+
this._debouncedUpdateTimeout = setTimeout(() => {
805+
this._debouncedUpdateTimeout = undefined;
806+
this._onDidUpdateEmitter.fire();
807+
}, UPDATE_EVENT_DEBOUNCE_DELAY);
808+
}
809+
767810
/**
768811
* Trims the data in the cache if the key is not in the provided list.
769812
* @param columnIndicesToKeep The array of column indices to keep in the cache.

0 commit comments

Comments
 (0)