Skip to content

Commit b187010

Browse files
committed
Ruff format
1 parent ef10981 commit b187010

File tree

4 files changed

+92
-76
lines changed

4 files changed

+92
-76
lines changed

examples/basic_usage.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
# Step 1: Open a trace hosted on S3 (find more via https://github.com/cacheMon/cache_dataset)
44
URI = "s3://cache-datasets/cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
55
reader = lcs.TraceReader(
6-
trace = URI,
7-
trace_type = lcs.TraceType.ORACLE_GENERAL_TRACE,
8-
reader_init_params = lcs.ReaderInitParam(ignore_obj_size=False)
6+
trace=URI,
7+
trace_type=lcs.TraceType.ORACLE_GENERAL_TRACE,
8+
reader_init_params=lcs.ReaderInitParam(ignore_obj_size=False),
99
)
1010

1111
# Step 2: Initialize cache
1212
cache = lcs.S3FIFO(
13-
cache_size=1024*1024,
13+
cache_size=1024 * 1024,
1414
# Cache specific parameters
1515
small_size_ratio=0.2,
1616
ghost_size_ratio=0.8,
@@ -22,9 +22,5 @@
2222
print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
2323

2424
# Step 3.1: Further process the first 1000 requests again
25-
req_miss_ratio, byte_miss_ratio = cache.process_trace(
26-
reader,
27-
start_req=0,
28-
max_req=1000
29-
)
30-
print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
25+
req_miss_ratio, byte_miss_ratio = cache.process_trace(reader, start_req=0, max_req=1000)
26+
print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")

libcachesim/_s3_cache.py

Lines changed: 65 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,36 @@
1616

1717
class _DataLoader:
1818
"""Internal S3 data loader with local caching."""
19-
19+
2020
DEFAULT_BUCKET = "cache-datasets"
2121
DEFAULT_CACHE_DIR = Path(os.environ.get("LCS_HUB_CACHE", Path.home() / ".cache/libcachesim/hub"))
2222

2323
# Characters that are problematic on various filesystems
2424
INVALID_CHARS = set('<>:"|?*\x00')
2525
# Reserved names on Windows
2626
RESERVED_NAMES = {
27-
'CON', 'PRN', 'AUX', 'NUL',
28-
'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9',
29-
'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'
27+
"CON",
28+
"PRN",
29+
"AUX",
30+
"NUL",
31+
"COM1",
32+
"COM2",
33+
"COM3",
34+
"COM4",
35+
"COM5",
36+
"COM6",
37+
"COM7",
38+
"COM8",
39+
"COM9",
40+
"LPT1",
41+
"LPT2",
42+
"LPT3",
43+
"LPT4",
44+
"LPT5",
45+
"LPT6",
46+
"LPT7",
47+
"LPT8",
48+
"LPT9",
3049
}
3150

3251
def __init__(
@@ -42,65 +61,65 @@ def _validate_bucket_name(self, bucket_name: str) -> str:
4261
"""Validate S3 bucket name according to AWS rules."""
4362
if not bucket_name:
4463
raise ValueError("Bucket name cannot be empty")
45-
64+
4665
if len(bucket_name) < 3 or len(bucket_name) > 63:
4766
raise ValueError("Bucket name must be between 3 and 63 characters")
48-
49-
if not re.match(r'^[a-z0-9.-]+$', bucket_name):
67+
68+
if not re.match(r"^[a-z0-9.-]+$", bucket_name):
5069
raise ValueError("Bucket name can only contain lowercase letters, numbers, periods, and hyphens")
51-
52-
if bucket_name.startswith('.') or bucket_name.endswith('.'):
70+
71+
if bucket_name.startswith(".") or bucket_name.endswith("."):
5372
raise ValueError("Bucket name cannot start or end with a period")
54-
55-
if bucket_name.startswith('-') or bucket_name.endswith('-'):
73+
74+
if bucket_name.startswith("-") or bucket_name.endswith("-"):
5675
raise ValueError("Bucket name cannot start or end with a hyphen")
57-
58-
if '..' in bucket_name:
76+
77+
if ".." in bucket_name:
5978
raise ValueError("Bucket name cannot contain consecutive periods")
60-
79+
6180
return bucket_name
6281

6382
def _validate_and_sanitize_key(self, key: str) -> str:
6483
"""Validate and sanitize S3 key for safe local filesystem usage."""
6584
if not key:
6685
raise ValueError("S3 key cannot be empty")
67-
86+
6887
if len(key) > 1024: # S3 limit is 1024 bytes
6988
raise ValueError("S3 key is too long (max 1024 characters)")
70-
89+
7190
# Check for path traversal attempts
72-
if '..' in key:
91+
if ".." in key:
7392
raise ValueError("S3 key cannot contain '..' (path traversal not allowed)")
74-
75-
if key.startswith('/'):
93+
94+
if key.startswith("/"):
7695
raise ValueError("S3 key cannot start with '/'")
77-
96+
7897
# Split key into parts and validate each part
79-
parts = key.split('/')
98+
parts = key.split("/")
8099
sanitized_parts = []
81-
100+
82101
for part in parts:
83102
if not part: # Empty part (double slash)
84103
continue
85-
104+
86105
# Check for reserved names (case insensitive)
87106
if part.upper() in self.RESERVED_NAMES:
88107
raise ValueError(f"S3 key contains reserved name: {part}")
89-
108+
90109
# Check for invalid characters
91110
if any(c in self.INVALID_CHARS for c in part):
92111
raise ValueError(f"S3 key contains invalid characters in part: {part}")
93-
112+
94113
# Check if part is too long for filesystem
95114
if len(part) > 255: # Most filesystems have 255 char limit per component
96115
raise ValueError(f"S3 key component too long: {part}")
97-
116+
98117
sanitized_parts.append(part)
99-
118+
100119
if not sanitized_parts:
101120
raise ValueError("S3 key resulted in empty path after sanitization")
102-
103-
return '/'.join(sanitized_parts)
121+
122+
return "/".join(sanitized_parts)
104123

105124
def _ensure_cache_dir(self) -> None:
106125
(self.cache_dir / self.bucket_name).mkdir(parents=True, exist_ok=True)
@@ -114,10 +133,11 @@ def _get_available_disk_space(self, path: Path) -> int:
114133
# Fallback for Windows or other systems
115134
try:
116135
import shutil
136+
117137
return shutil.disk_usage(path).free
118138
except Exception:
119139
logger.warning("Could not determine available disk space")
120-
return float('inf') # Assume unlimited space if we can't check
140+
return float("inf") # Assume unlimited space if we can't check
121141

122142
@property
123143
def s3_client(self):
@@ -138,20 +158,20 @@ def _cache_path(self, key: str) -> Path:
138158
"""Create cache path that mirrors S3 structure after validation."""
139159
sanitized_key = self._validate_and_sanitize_key(key)
140160
cache_path = self.cache_dir / self.bucket_name / sanitized_key
141-
161+
142162
# Double-check that the resolved path is still within cache directory
143163
try:
144164
cache_path.resolve().relative_to(self.cache_dir.resolve())
145165
except ValueError:
146166
raise ValueError(f"S3 key resolves outside cache directory: {key}")
147-
167+
148168
return cache_path
149169

150170
def _get_object_size(self, key: str) -> int:
151171
"""Get the size of an S3 object without downloading it."""
152172
try:
153173
response = self.s3_client.head_object(Bucket=self.bucket_name, Key=key)
154-
return response['ContentLength']
174+
return response["ContentLength"]
155175
except Exception as e:
156176
logger.warning(f"Could not determine object size for s3://{self.bucket_name}/{key}: {e}")
157177
return 0
@@ -254,10 +274,10 @@ def list_s3_objects(self, prefix: str = "", delimiter: str = "/") -> dict:
254274
def set_cache_dir(cache_dir: Union[str, Path]) -> None:
255275
"""
256276
Set the global cache directory for S3 downloads.
257-
277+
258278
Args:
259279
cache_dir: Path to the cache directory
260-
280+
261281
Example:
262282
>>> import libcachesim as lcs
263283
>>> lcs.set_cache_dir("/tmp/my_cache")
@@ -269,10 +289,10 @@ def set_cache_dir(cache_dir: Union[str, Path]) -> None:
269289
def get_cache_dir() -> Path:
270290
"""
271291
Get the current cache directory.
272-
292+
273293
Returns:
274294
Path to the current cache directory
275-
295+
276296
Example:
277297
>>> import libcachesim as lcs
278298
>>> print(lcs.get_cache_dir())
@@ -284,10 +304,10 @@ def get_cache_dir() -> Path:
284304
def clear_cache(s3_path: Optional[str] = None) -> None:
285305
"""
286306
Clear cached files.
287-
307+
288308
Args:
289309
s3_path: Specific S3 path to clear, or None to clear all cache
290-
310+
291311
Example:
292312
>>> import libcachesim as lcs
293313
>>> # Clear specific file
@@ -298,7 +318,7 @@ def clear_cache(s3_path: Optional[str] = None) -> None:
298318
if s3_path and s3_path.startswith("s3://"):
299319
parsed = urlparse(s3_path)
300320
bucket = parsed.netloc
301-
key = parsed.path.lstrip('/')
321+
key = parsed.path.lstrip("/")
302322
if bucket == _data_loader.bucket_name:
303323
_data_loader.clear_cache(key)
304324
else:
@@ -310,10 +330,10 @@ def clear_cache(s3_path: Optional[str] = None) -> None:
310330
def get_cache_size() -> int:
311331
"""
312332
Get total size of cached files in bytes.
313-
333+
314334
Returns:
315335
Total cache size in bytes
316-
336+
317337
Example:
318338
>>> import libcachesim as lcs
319339
>>> size_mb = lcs.get_cache_size() / (1024**2)
@@ -325,10 +345,10 @@ def get_cache_size() -> int:
325345
def list_cached_files() -> list[str]:
326346
"""
327347
List all cached files.
328-
348+
329349
Returns:
330350
List of cached file paths
331-
351+
332352
Example:
333353
>>> import libcachesim as lcs
334354
>>> files = lcs.list_cached_files()
@@ -344,4 +364,4 @@ def get_data_loader(bucket_name: str = None) -> _DataLoader:
344364
if bucket_name is None or bucket_name == _data_loader.bucket_name:
345365
return _data_loader
346366
else:
347-
return _DataLoader(bucket_name=bucket_name, cache_dir=_data_loader.cache_dir.parent)
367+
return _DataLoader(bucket_name=bucket_name, cache_dir=_data_loader.cache_dir.parent)

libcachesim/trace_reader.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -46,65 +46,65 @@ def __init__(
4646
def _validate_s3_uri(self, s3_uri: str) -> tuple[str, str]:
4747
"""
4848
Validate and parse S3 URI.
49-
49+
5050
Args:
5151
s3_uri: S3 URI like "s3://bucket/key"
52-
52+
5353
Returns:
5454
Tuple of (bucket, key)
55-
55+
5656
Raises:
5757
ValueError: If URI is invalid
5858
"""
5959
parsed = urlparse(s3_uri)
60-
60+
6161
if parsed.scheme != "s3":
6262
raise ValueError(f"Invalid S3 URI scheme. Expected 's3', got '{parsed.scheme}': {s3_uri}")
63-
63+
6464
if not parsed.netloc:
6565
raise ValueError(f"Missing bucket name in S3 URI: {s3_uri}")
66-
66+
6767
bucket = parsed.netloc
68-
key = parsed.path.lstrip('/')
69-
68+
key = parsed.path.lstrip("/")
69+
7070
if not key:
7171
raise ValueError(f"Missing key (object path) in S3 URI: {s3_uri}")
72-
72+
7373
# Check for path traversal in the key part only
74-
if '..' in key:
74+
if ".." in key:
7575
raise ValueError(f"S3 key contains path traversal patterns: {key}")
76-
76+
7777
# Check for double slashes in the key part (after s3://)
78-
if '//' in key:
78+
if "//" in key:
7979
raise ValueError(f"S3 key contains double slashes: {key}")
80-
80+
8181
# Check for backslashes (not valid in URLs)
82-
if '\\' in s3_uri:
82+
if "\\" in s3_uri:
8383
raise ValueError(f"S3 URI contains backslashes: {s3_uri}")
84-
84+
8585
return bucket, key
8686

8787
def _resolve_s3_path(self, s3_path: str) -> str:
8888
"""
8989
Resolve S3 path to local cached file path.
90-
90+
9191
Args:
9292
s3_path: S3 URI like "s3://bucket/key"
93-
93+
9494
Returns:
9595
Local file path
9696
"""
9797
try:
9898
bucket, key = self._validate_s3_uri(s3_path)
9999
except ValueError as e:
100100
raise ValueError(f"Invalid S3 URI: {e}")
101-
101+
102102
# Get data loader for this bucket
103103
try:
104104
loader = get_data_loader(bucket)
105105
except ValueError as e:
106106
raise ValueError(f"Invalid bucket name '{bucket}': {e}")
107-
107+
108108
logger.info(f"Resolving S3 path: {s3_path}")
109109
try:
110110
return loader.get_cached_path(key)
@@ -296,4 +296,4 @@ def __getitem__(self, index: int) -> Request:
296296
self._reader.reset()
297297
self._reader.skip_n_req(index)
298298
req = Request()
299-
return self._reader.read_one_req(req)
299+
return self._reader.read_one_req(req)

tests/test_reader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ def test_invalid_sampling_ratio(self):
330330

331331
finally:
332332
os.unlink(temp_file)
333-
333+
334334
def test_trace_reader_s3(self):
335335
"""Test trace reader with S3"""
336336
URI = "s3://cache-datasets/cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"

0 commit comments

Comments
 (0)