Skip to content

Commit d0b1729

Browse files
Prevent returning cached entry if the entry is degenerate (#1873)
Co-authored-by: Martin Durant <[email protected]>
1 parent 81a5fd5 commit d0b1729

9 files changed

+2050
-1077
lines changed

fsspec/implementations/dbfs.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
from __future__ import annotations
2+
13
import base64
24
import urllib
35

46
import requests
5-
import requests.exceptions
67
from requests.adapters import HTTPAdapter, Retry
8+
from typing_extensions import override
79

810
from fsspec import AbstractFileSystem
911
from fsspec.spec import AbstractBufferedFile
@@ -57,6 +59,24 @@ def __init__(self, instance, token, **kwargs):
5759

5860
super().__init__(**kwargs)
5961

62+
@override
63+
def _ls_from_cache(self, path) -> list[dict[str, str | int]] | None:
64+
"""Check cache for listing
65+
66+
Returns listing, if found (may be empty list for a directory that
67+
exists but contains nothing), None if not in cache.
68+
"""
69+
self.dircache.pop(path.rstrip("/"), None)
70+
71+
parent = self._parent(path)
72+
if parent in self.dircache:
73+
for entry in self.dircache[parent]:
74+
if entry["name"] == path.rstrip("/"):
75+
if entry["type"] != "directory":
76+
return [entry]
77+
return []
78+
raise FileNotFoundError(path)
79+
6080
def ls(self, path, detail=True, **kwargs):
6181
"""
6282
List the contents of the given path.
@@ -70,7 +90,15 @@ def ls(self, path, detail=True, **kwargs):
7090
but also additional information on file sizes
7191
and types.
7292
"""
73-
out = self._ls_from_cache(path)
93+
try:
94+
out = self._ls_from_cache(path)
95+
except FileNotFoundError:
96+
# This happens if the `path`'s parent was cached, but `path` is not
97+
# there. This suggests that `path` is new since the parent was
98+
# cached. Attempt to invalidate parent's cache before continuing.
99+
self.dircache.pop(self._parent(path), None)
100+
out = None
101+
74102
if not out:
75103
try:
76104
r = self._send_to_api(
@@ -460,7 +488,7 @@ def _fetch_range(self, start, end):
460488
return return_buffer
461489

462490
def _to_sized_blocks(self, length, start=0):
463-
"""Helper function to split a range from 0 to total_length into bloksizes"""
491+
"""Helper function to split a range from 0 to total_length into blocksizes"""
464492
end = start + length
465493
for data_chunk in range(start, end, self.blocksize):
466494
data_start = data_chunk

fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_listing.yaml

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ interactions:
1313
Content-Type:
1414
- application/json
1515
User-Agent:
16-
- python-requests/2.32.3
16+
- python-requests/2.32.4
1717
authorization:
1818
- DUMMY
1919
method: POST
@@ -44,7 +44,7 @@ interactions:
4444
x-content-type-options:
4545
- nosniff
4646
x-request-id:
47-
- e71bb75f-b05a-4523-b8bc-ce331812fbf0
47+
- 8b8f96eb-6260-476f-9bee-5579fe95ce97
4848
status:
4949
code: 200
5050
message: OK
@@ -62,17 +62,17 @@ interactions:
6262
Content-Type:
6363
- application/json
6464
User-Agent:
65-
- python-requests/2.32.3
65+
- python-requests/2.32.4
6666
authorization:
6767
- DUMMY
6868
method: GET
6969
uri: https://my_instance.com/api/2.0/dbfs/list
7070
response:
7171
body:
7272
string: !!binary |
73-
H4sIAAAAAAAEAxzMMQ7CMAwF0Lv8OVKYcwAuwIiQ1SaOMKI4ss3SqncvZX3D29DlzY5y3zCmeKIg
74-
X39yCzXO3X1wpTZ3z8P0xTXIVAMJ4tTEUMK+nP4JuayMcklYtEmXOoXoh0KWU/fHfgAAAP//AwCB
75-
uORhbAAAAA==
73+
H4sIAAAAAAAEAxzMOw7CMAwA0Lt4jhQX8VMOwAUYURWVxBFGFEe2Wah6dyrWN7wFGr/IIN0W6JM/
74+
IEG8bHJ1UYrNrFPJ9d4sdpUnFc8q4hCALVdWSK4fCv8kG38JEgaYpXLjMjnLOzvPmw6nw2444x6P
75+
iLiO6w8AAP//AwBhkoZWeAAAAA==
7676
headers:
7777
access-control-allow-headers:
7878
- Authorization, X-Databricks-Azure-Workspace-Resource-Id, X-Databricks-Org-Id,
@@ -100,7 +100,7 @@ interactions:
100100
x-content-type-options:
101101
- nosniff
102102
x-request-id:
103-
- 6e66514c-c883-49c7-b215-2fecc5ef8222
103+
- 4dab76b2-a193-45d7-8000-642f1c003aa6
104104
status:
105105
code: 200
106106
message: OK
@@ -118,7 +118,7 @@ interactions:
118118
Content-Type:
119119
- application/json
120120
User-Agent:
121-
- python-requests/2.32.3
121+
- python-requests/2.32.4
122122
authorization:
123123
- DUMMY
124124
method: GET
@@ -149,7 +149,7 @@ interactions:
149149
x-content-type-options:
150150
- nosniff
151151
x-request-id:
152-
- 59b8c763-be19-4402-8d37-135a7d6c7aed
152+
- 9a476bcf-3900-47ca-b21d-412419e70a11
153153
status:
154154
code: 200
155155
message: OK
@@ -167,19 +167,19 @@ interactions:
167167
Content-Type:
168168
- application/json
169169
User-Agent:
170-
- python-requests/2.32.3
170+
- python-requests/2.32.4
171171
authorization:
172172
- DUMMY
173173
method: GET
174174
uri: https://my_instance.com/api/2.0/dbfs/list
175175
response:
176176
body:
177177
string: !!binary |
178-
H4sIAAAAAAAEAzyOwWrDMBBEf0XoXNtx4hqSU6B1IZcY4hQKpQhFWrkqtlfdVUtDyL9XbqCnZdh5
179-
M3ORQISkDFqQG3louvb58NCox7bp1L49quZl1x3lnRyBWfezZ4/C+QEEkrCewESks4Afz5EFTiLo
180-
+C6Kp+To0gcKxxzAKHtyXATCjwQoQozF6Jn91Kv/kDzVWIjaDyw3rxe5jecwF84n7xH7AXTwnBsc
181-
i5vMKZj8AJ9fwHE3OUwBdFPK20TWTjuoa8jK2q6yamGq7ATrZbYuYelW9aLU91ViGOj7b4mOOlHy
182-
+nb9BQAA//8DADr+FHAYAQAA
178+
H4sIAAAAAAAEAzyOUUvEMBCE/0rIs20VC7nek3BWuJcrtCcIIiFNtjXSduNuFI/j/rupBz4tw843
179+
M2cJREjaogO5lW3dNc/trtaPTd3pQ3PU9cu+O8obOQOzGVfPAcXgJxBIwnkCG5FOAn48Rxa4iGDi
180+
uyiekqNLHygG5gBWu37gIhB+JEATYixmz+yXUf+H5KnGQTR+Yrl9PcuHeApr4XryEXGcwATPucW5
181+
uMqcgs1b+PwCjvtlwBRAV6W9S2R/V92qsiwzZXqTlRu1yYy5d1mvKuhLU1m1UYlhoO+/JSaaRMnL
182+
2+UXAAD//wMA9nt9bhgBAAA=
183183
headers:
184184
access-control-allow-headers:
185185
- Authorization, X-Databricks-Azure-Workspace-Resource-Id, X-Databricks-Org-Id,
@@ -207,7 +207,7 @@ interactions:
207207
x-content-type-options:
208208
- nosniff
209209
x-request-id:
210-
- 6fafe66e-16d3-40c4-be92-91e2f3601a54
210+
- b1907444-7aba-4878-aa3d-b79eb4a9c787
211211
status:
212212
code: 404
213213
message: Not Found

0 commit comments

Comments
 (0)