Skip to content

Commit 2dba03b

Browse files
authored
Merge pull request #33 from DenisaCG/maxNoFiles
Add configurable limit for number of objects listed in `DriveBrowser`
2 parents 1c4e61e + 7937a60 commit 2dba03b

File tree

9 files changed

+124
-38
lines changed

9 files changed

+124
-38
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ c = get_config()
3838

3939
c.DrivesConfig.access_key_id = "<Drives Access Key ID / IAM Access Key ID>"
4040
c.DrivesConfig.secret_access_key = "<Drives Secret Access Key / IAM Secret>"
41-
c.DrivesConfig.session_token = "<Drives Session Token / IAM Session Token>"
41+
c.DrivesConfig.session_token = "<Drives Session Token / IAM Session Token (optional)>"
42+
c.DrivesConfig.provider = "<Drives provider e.g.: s3, gcs>"
4243
```
4344

4445
### Custom credentials file path

jupyter_drives/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def __init__(self, **kwargs):
8080
super().__init__(**kwargs)
8181
self._load_credentials()
8282

83-
def _load_credentials(self):
83+
def _load_credentials(self):
8484
# check if credentials were already set in jupyter_notebook_config.py
8585
if self.access_key_id is not None and self.secret_access_key is not None:
8686
return
@@ -114,7 +114,7 @@ def _extract_credentials_from_file(self, file_path):
114114
provider, access_key_id, secret_access_key, session_token = None, None, None, None
115115
lines = file.readlines()
116116
for line in lines:
117-
if line.startswith("provider ="):
117+
if line.startswith("drives_provider ="):
118118
provider = line.split("=")[1].strip()
119119
elif line.startswith("drives_access_key_id ="):
120120
access_key_id = line.split("=")[1].strip()

jupyter_drives/handlers.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,19 @@ def write_error(self, status_code, **kwargs):
4242
reply["error"] = "".join(traceback.format_exception(*exc_info))
4343
self.finish(json.dumps(reply))
4444

45+
class ConfigJupyterDrivesHandler(JupyterDrivesAPIHandler):
46+
"""
47+
Set certain configuration variables in drives manager.
48+
"""
49+
def initialize(self, logger: logging.Logger, manager: JupyterDrivesManager):
50+
return super().initialize(logger, manager)
51+
52+
@tornado.web.authenticated
53+
async def post(self):
54+
body = self.get_json_body()
55+
result = self._manager.set_listing_limit(**body)
56+
self.finish(result)
57+
4558
class ListJupyterDrivesHandler(JupyterDrivesAPIHandler):
4659
"""
4760
List available drives. Mounts drives.
@@ -106,7 +119,8 @@ async def head(self, drive: str = "", path: str = ""):
106119
self.finish(result)
107120

108121
handlers = [
109-
("drives", ListJupyterDrivesHandler)
122+
("drives", ListJupyterDrivesHandler),
123+
("drives/config", ConfigJupyterDrivesHandler),
110124
]
111125

112126
handlers_with_path = [

jupyter_drives/manager.py

Lines changed: 56 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,16 @@ def __init__(self, config: traitlets.config.Config) -> None:
4040
self._config = DrivesConfig(config=config)
4141
self._client = httpx.AsyncClient()
4242
self._content_managers = {}
43+
self._max_files_listed = 1000
4344

4445
# initiate boto3 session if we are dealing with S3 drives
4546
if self._config.provider == 's3':
4647
self._s3_clients = {}
4748
if self._config.access_key_id and self._config.secret_access_key:
48-
self._s3_session = boto3.Session(aws_access_key_id = self._config.access_key_id, aws_secret_access_key = self._config.secret_access_key)
49+
if self._config.session_token is None:
50+
self._s3_session = boto3.Session(aws_access_key_id = self._config.access_key_id, aws_secret_access_key = self._config.secret_access_key)
51+
else:
52+
self._s3_session = boto3.Session(aws_access_key_id = self._config.access_key_id, aws_secret_access_key = self._config.secret_access_key, aws_session_token = self._config.session_token)
4953
else:
5054
raise tornado.web.HTTPError(
5155
status_code= httpx.codes.BAD_REQUEST,
@@ -71,6 +75,22 @@ def per_page_argument(self) -> Optional[Tuple[str, int]]:
7175
"""
7276
return ("per_page", 100)
7377

78+
def set_listing_limit(self, new_limit):
79+
"""Set new limit for listing.
80+
81+
Args:
82+
new_limit: new maximum to be set
83+
"""
84+
try:
85+
self._max_files_listed = new_limit
86+
except Exception as e:
87+
raise tornado.web.HTTPError(
88+
status_code= httpx.codes.BAD_REQUEST,
89+
reason= f"The following error occured when setting the new listing limit: {e}"
90+
)
91+
92+
return
93+
7494
async def list_drives(self):
7595
"""Get list of available drives.
7696
@@ -126,15 +146,25 @@ async def mount_drive(self, drive_name, provider, region):
126146
127147
Args:
128148
drive_name: name of drive to mount
129-
130-
Returns:
131-
The content manager for the drive.
132149
"""
133150
try:
134151
# check if content manager doesn't already exist
135152
if drive_name not in self._content_managers or self._content_managers[drive_name] is None:
136153
if provider == 's3':
137-
store = obs.store.S3Store.from_url("s3://" + drive_name + "/", config = {"aws_access_key_id": self._config.access_key_id, "aws_secret_access_key": self._config.secret_access_key, "aws_region": region})
154+
if self._config.session_token is None:
155+
configuration = {
156+
"aws_access_key_id": self._config.access_key_id,
157+
"aws_secret_access_key": self._config.secret_access_key,
158+
"aws_region": region
159+
}
160+
else:
161+
configuration = {
162+
"aws_access_key_id": self._config.access_key_id,
163+
"aws_secret_access_key": self._config.secret_access_key,
164+
"aws_session_token": self._config.session_token,
165+
"aws_region": region
166+
}
167+
store = obs.store.S3Store.from_url("s3://" + drive_name + "/", config = configuration)
138168
elif provider == 'gcs':
139169
store = obs.store.GCSStore.from_url("gs://" + drive_name + "/", config = {}) # add gcs config
140170
elif provider == 'http':
@@ -193,23 +223,43 @@ async def get_contents(self, drive_name, path):
193223
isDir = False
194224
emptyDir = True # assume we are dealing with an empty directory
195225

226+
chunk_size = 100
227+
if self._max_files_listed < chunk_size:
228+
chunk_size = self._max_files_listed
229+
no_batches = int(self._max_files_listed/chunk_size)
230+
196231
# using Arrow lists as they are recommended for large results
197232
# stream will be an async iterable of RecordBatch
198-
stream = obs.list(self._content_managers[drive_name]["store"], path, chunk_size=100, return_arrow=True)
233+
current_batch = 0
234+
stream = obs.list(self._content_managers[drive_name]["store"], path, chunk_size=chunk_size, return_arrow=True)
199235
async for batch in stream:
236+
current_batch += 1
237+
# reached last batch that can be shown (partially)
238+
if current_batch == no_batches + 1:
239+
remaining_files = self._max_files_listed - no_batches*chunk_size
240+
200241
# if content exists we are dealing with a directory
201242
if isDir is False and batch:
202243
isDir = True
203244
emptyDir = False
204245

205246
contents_list = pyarrow.record_batch(batch).to_pylist()
206247
for object in contents_list:
248+
# when listing the last batch (partially), make sure we don't exceed limit
249+
if current_batch == no_batches + 1:
250+
if remaining_files <= 0:
251+
break
252+
remaining_files -= 1
207253
data.append({
208254
"path": object["path"],
209255
"last_modified": object["last_modified"].isoformat(),
210256
"size": object["size"],
211257
})
212258

259+
# check if we reached the limit of files that can be listed
260+
if current_batch == no_batches + 1:
261+
break
262+
213263
# check if we are dealing with an empty drive
214264
if isDir is False and path != '':
215265
content = b""

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ classifiers = [
2222
"Programming Language :: Python :: 3.11",
2323
]
2424
dependencies = [
25-
"obstore>=0.2.0,<0.3",
25+
"obstore>=0.3.0b,<0.4",
26+
"arro3-core>=0.2.1,<0.3",
2627
"pyarrow>=18.0.0,<19.0.0",
2728
"jupyter_server>=2.14.2,<3",
2829
"s3contents>=0.11.1,<0.12.0",

schema/drives-file-browser.json

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"title": "Jupyter Drives Settings",
2+
"title": "Drives Browser Settings",
33
"description": "jupyter-drives settings.",
44
"jupyter.lab.toolbars": {
55
"DriveBrowser": [
@@ -40,6 +40,12 @@
4040
"type": "object",
4141
"jupyter.lab.transform": true,
4242
"properties": {
43+
"maxFilesListed": {
44+
"type": "integer",
45+
"title": "Maximum number of objects listed",
46+
"description": "Configure maximum number of objects that will be shown in a listing, given any path.",
47+
"default": 1000
48+
},
4349
"toolbar": {
4450
"title": "Drive browser toolbar items",
4551
"description": "Note: To disable a toolbar item,\ncopy it to User Preferences and add the\n\"disabled\" key.",

src/contents.ts

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ export class Drive implements Contents.IDrive {
4545
this._serverSettings = ServerConnection.makeSettings();
4646
this._name = options.name ?? '';
4747
this._drivesList = options.drivesList ?? [];
48-
//this._apiEndpoint = options.apiEndpoint ?? SERVICE_DRIVE_URL;
4948
}
5049

5150
/**
@@ -719,16 +718,6 @@ export class Drive implements Contents.IDrive {
719718
}
720719
}
721720

722-
/**
723-
* Get a REST url for a file given a path.
724-
*/
725-
/*private _getUrl(...args: string[]): string {
726-
const parts = args.map(path => URLExt.encodeParts(path));
727-
const baseUrl = this.serverSettings.baseUrl;
728-
return URLExt.join(baseUrl, this._apiEndpoint, ...parts);
729-
}*/
730-
731-
// private _apiEndpoint: string;
732721
private _drivesList: IDriveInfo[] = [];
733722
private _serverSettings: ServerConnection.ISettings;
734723
private _name: string = '';
@@ -771,17 +760,3 @@ export namespace Drive {
771760
apiEndpoint?: string;
772761
}
773762
}
774-
775-
/*namespace Private {
776-
/**
777-
* Normalize a file extension to be of the type `'.foo'`.
778-
*
779-
* Adds a leading dot if not present and converts to lower case.
780-
*/
781-
/*export function normalizeExtension(extension: string): string {
782-
if (extension.length > 0 && extension.indexOf('.') !== 0) {
783-
extension = `.${extension}`;
784-
}
785-
return extension;
786-
}
787-
}*/

src/index.ts

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import { CommandRegistry } from '@lumino/commands';
2626
import { DriveListModel, DriveListView, IDrive } from './drivelistmanager';
2727
import { DriveIcon, driveBrowserIcon } from './icons';
2828
import { Drive } from './contents';
29-
import { getDrivesList } from './requests';
29+
import { getDrivesList, setListingLimit } from './requests';
3030
import { IDriveInfo, IDrivesList } from './token';
3131

3232
/**
@@ -288,6 +288,34 @@ const driveFileBrowser: JupyterFrontEndPlugin<void> = {
288288
translator
289289
)
290290
);
291+
292+
/**
293+
* Load the settings for this extension
294+
*
295+
* @param setting Extension settings
296+
*/
297+
function loadSetting(setting: ISettingRegistry.ISettings): void {
298+
// Read the settings and convert to the correct type
299+
const maxFilesListed = setting.get('maxFilesListed').composite as number;
300+
// Set new limit.
301+
setListingLimit(maxFilesListed);
302+
}
303+
304+
// Wait for the application to be restored and
305+
// for the settings for this plugin to be loaded
306+
Promise.all([app.restored, settingsRegistry.load(driveFileBrowser.id)])
307+
.then(([, setting]) => {
308+
// Read the settings
309+
loadSetting(setting);
310+
311+
// Listen for your plugin setting changes using Signal
312+
setting.changed.connect(loadSetting);
313+
})
314+
.catch(reason => {
315+
console.error(
316+
`Something went wrong when reading the settings.\n${reason}`
317+
);
318+
});
291319
}
292320
};
293321

src/requests.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,17 @@ let data: Contents.IModel = {
2121
type: ''
2222
};
2323

24+
/**
25+
* Set new limit for number of objects to be listed inside the DriveBrowser, given any path.
26+
*
27+
* @returns
28+
*/
29+
export async function setListingLimit(newLimit: number) {
30+
await requestAPI<any>('drives/config', 'POST', {
31+
new_limit: newLimit
32+
});
33+
}
34+
2435
/**
2536
* Fetch the list of available drives.
2637
* @returns The list of available drives.

0 commit comments

Comments
 (0)