diff --git a/pfio/v2/fs.py b/pfio/v2/fs.py index 9bc681a7..8f3615d1 100644 --- a/pfio/v2/fs.py +++ b/pfio/v2/fs.py @@ -87,9 +87,16 @@ class FS(abc.ABC): ''' _cwd = '' + _scheme = '' - def __init__(self): + def __init__(self, scheme=None): self.pid = os.getpid() + if scheme: + self._scheme = str(scheme) + + @property + def scheme(self): + return self._scheme @property def cwd(self): @@ -426,32 +433,33 @@ def _zip_check_create_not_supported(): def _from_scheme(scheme, dirname, kwargs, bucket=None): known_scheme = ['file', 'hdfs', 's3'] + actual_scheme = scheme # Custom scheme; using configparser for older Python. Will # update to toml in Python 3.11 once 3.10 is in the end. - if scheme not in known_scheme: + if actual_scheme not in known_scheme: config_dict = config.get_custom_scheme(scheme) if config_dict is not None: - scheme = config_dict.pop('scheme') # Get the real scheme + actual_scheme = config_dict.pop('scheme') # Get the real scheme # Custom scheme expected here - if scheme not in known_scheme: - raise ValueError("Scheme {} is not supported", scheme) + if actual_scheme not in known_scheme: + raise ValueError("Scheme {} is not supported", actual_scheme) for k in config_dict: if k not in kwargs: # Don't overwrite with configuration value kwargs[k] = config_dict[k] - if scheme == 'file': + if actual_scheme == 'file': from .local import Local - fs = Local(dirname, **kwargs) - elif scheme == 'hdfs': + fs = Local(dirname, scheme=scheme, **kwargs) + elif actual_scheme == 'hdfs': from .hdfs import Hdfs - fs = Hdfs(dirname, **kwargs) - elif scheme == 's3': + fs = Hdfs(dirname, scheme=scheme, **kwargs) + elif actual_scheme == 's3': from .s3 import S3 - fs = S3(bucket=bucket, prefix=dirname, **kwargs) + fs = S3(bucket=bucket, prefix=dirname, scheme=scheme, **kwargs) else: - raise RuntimeError("scheme '{}' is not defined".format(scheme)) + raise RuntimeError("scheme '{}' is not defined".format(actual_scheme)) return fs diff --git a/pfio/v2/hdfs.py b/pfio/v2/hdfs.py index a54428d8..1efd7908 100644 --- a/pfio/v2/hdfs.py +++ b/pfio/v2/hdfs.py @@ -219,8 +219,8 @@ class Hdfs(FS): ''' - def __init__(self, cwd=None, create=False, **_): - super().__init__() + def __init__(self, cwd=None, create=False, scheme=None, **_): + super().__init__(scheme=scheme) self._nameservice, self._fs = _create_fs() assert self._fs is not None self.username = self._get_principal_name() diff --git a/pfio/v2/http_cache.py b/pfio/v2/http_cache.py index cc2347c4..7b05f876 100644 --- a/pfio/v2/http_cache.py +++ b/pfio/v2/http_cache.py @@ -50,7 +50,7 @@ def __init__(self, max_cache_size: int = 1024 * 1024 * 1024, bearer_token_path: Optional[str] = None): assert not isinstance(fs, HTTPCachedFS) - super().__init__() + super().__init__(scheme=fs.scheme) self.fs = fs self.max_cache_size = max_cache_size diff --git a/pfio/v2/local.py b/pfio/v2/local.py index 9611d232..3d17c55f 100644 --- a/pfio/v2/local.py +++ b/pfio/v2/local.py @@ -45,8 +45,8 @@ def __init__(self, _stat, filename): class Local(FS): - def __init__(self, cwd=None, create=False, **_): - super().__init__() + def __init__(self, cwd=None, create=False, scheme=None, **_): + super().__init__(scheme=scheme) if cwd is None: self._cwd = '' diff --git a/pfio/v2/pathlib.py b/pfio/v2/pathlib.py index 36aa80e8..50e6432e 100644 --- a/pfio/v2/pathlib.py +++ b/pfio/v2/pathlib.py @@ -100,7 +100,6 @@ class PurePath(PathLike): Args: args: construct paths. fs: target file system. - scheme: specify URL scheme. (for `as_uri` method) Note: It conforms to `pathlib.PurePosixPath` of Python 3.12 specification. @@ -108,32 +107,16 @@ class PurePath(PathLike): this class not inherits any `pathlib` classes because pfio filesystems is not suitable for pathlib abstact and helper classes. - - TODO: - `scheme` should moves to `FS`. """ def __init__( self, *args: Union[str, PathLike], fs: FS, - scheme: Optional[str] = None, ) -> None: - if isinstance(fs, Local): - scheme = scheme or "file" - elif isinstance(fs, S3): - scheme = scheme or "s3" - elif isinstance(fs, Hdfs): - scheme = scheme or "hdfs" - elif isinstance(fs, Zip): - scheme = scheme or "" - else: - raise ValueError(f"unsupported FS: {fs}") - self._fs: FS = fs - self._scheme = scheme self._pure = PurePosixPath(*args) - self._hash = hash(self._pure) + hash(self._fs) + hash(self._scheme) + self._hash = hash(self._pure) + hash(self._fs) + hash(self.scheme) @property def sep(self) -> str: @@ -141,7 +124,7 @@ def sep(self) -> str: @property def scheme(self) -> str: - return self._scheme + return self._fs.scheme def __hash__(self) -> int: return self._hash @@ -339,7 +322,7 @@ def with_segments( self: SelfPurePathType, *args: Union[str, PathLike], ) -> SelfPurePathType: - return type(self)(*args, fs=self._fs, scheme=self.scheme) + return type(self)(*args, fs=self._fs) class Path(PurePath): @@ -349,7 +332,6 @@ class Path(PurePath): Args: args: construct paths. fs: target file system. - scheme: specify URL scheme. (for `as_uri` method) Note: many methods raise `NotImplementedError` @@ -364,9 +346,8 @@ def __init__( self, *args: str, fs: FS, - scheme: Optional[str] = None, ) -> None: - super().__init__(*args, fs=fs, scheme=scheme) + super().__init__(*args, fs=fs) def _as_relative_to_fs(self) -> str: return _removeprefix(self.as_posix(), self.anchor) diff --git a/pfio/v2/s3.py b/pfio/v2/s3.py index 1c1cfb38..c022aa16 100644 --- a/pfio/v2/s3.py +++ b/pfio/v2/s3.py @@ -313,9 +313,10 @@ def __init__(self, bucket, prefix=None, mpu_chunksize=32*1024*1024, buffering=-1, create=False, + scheme=None, _skip_connect=None, # For test purpose **_): - super().__init__() + super().__init__(scheme=scheme) self.bucket = bucket self.create_bucket = create_bucket if prefix is not None: diff --git a/pfio/v2/zip.py b/pfio/v2/zip.py index 638227bb..2077039a 100644 --- a/pfio/v2/zip.py +++ b/pfio/v2/zip.py @@ -53,7 +53,7 @@ class Zip(FS): def __init__(self, backend, file_path, mode='r', create=False, local_cache=False, local_cachedir=None, **kwargs): - super().__init__() + super().__init__(scheme=backend.scheme) self.backend = backend self.file_path = file_path self.mode = mode diff --git a/tests/v2_tests/test_custom_scheme.py b/tests/v2_tests/test_custom_scheme.py index c3efee6d..999e117d 100644 --- a/tests/v2_tests/test_custom_scheme.py +++ b/tests/v2_tests/test_custom_scheme.py @@ -15,9 +15,11 @@ def test_ini(): with pfio.v2.from_url('foobar://pfio/') as fs: assert isinstance(fs, pfio.v2.Local) + assert fs.scheme == "foobar" with pfio.v2.from_url('baz://pfio/', _skip_connect=True) as s3: assert isinstance(s3, pfio.v2.S3) + assert s3.scheme == "baz" assert 'https://s3.example.com' == s3.kwargs['endpoint_url'] assert 'hoge' == s3.kwargs['aws_access_key_id'] @@ -56,9 +58,11 @@ def test_add_custom_scheme(): with pfio.v2.from_url('foobar2://pfio/') as fs: assert isinstance(fs, pfio.v2.Local) + assert fs.scheme == "foobar2" with pfio.v2.from_url('baz2://pfio/', _skip_connect=True) as s3: assert isinstance(s3, pfio.v2.S3) + assert s3.scheme == "baz2" assert 'https://s3.example.com' == s3.kwargs['endpoint_url'] assert 'hoge' == s3.kwargs['aws_access_key_id'] diff --git a/tests/v2_tests/test_hdfs.py b/tests/v2_tests/test_hdfs.py index 523cc5a3..925260d5 100644 --- a/tests/v2_tests/test_hdfs.py +++ b/tests/v2_tests/test_hdfs.py @@ -55,6 +55,10 @@ def test_repr_str(self): repr(fs) str(fs) + def test_scheme(self): + with Hdfs(self.dirname, scheme="hdfs") as fs: + assert fs.scheme == "hdfs" + def test_read_non_exist(self): non_exist_file = "non_exist_file.txt" diff --git a/tests/v2_tests/test_http_cache.py b/tests/v2_tests/test_http_cache.py index 1adf43a5..382eb96d 100644 --- a/tests/v2_tests/test_http_cache.py +++ b/tests/v2_tests/test_http_cache.py @@ -71,6 +71,8 @@ def test_httpcache_simple(target): with gen_fs(target) as underlay: fs = HTTPCachedFS(http_cache, underlay) + + assert fs.scheme == underlay.scheme with fs.open(filename, mode="wb") as fp: fp.write(content) with fs.open(filename, mode="rb") as fp: @@ -94,6 +96,8 @@ def test_httpcache_too_large(): with gen_fs("local") as underlay: fs = HTTPCachedFS(http_cache, underlay) + assert fs.scheme == underlay.scheme + with fs.open(filename, mode="wb") as fp: for _ in range(1024 + 1): # 1 MB exceeds fp.write(one_mb_array) @@ -135,6 +139,8 @@ def test_httpcache_zipfile_flat(target): with gen_fs(target) as underlay: with underlay.open_zip(zipfilename, mode="w") as zipfs: fs = HTTPCachedFS(http_cache, zipfs) + assert fs.scheme == underlay.scheme + with fs.open(filename1, mode="wb") as fp: fp.write(filecontent1) with fs.open(filename2, mode="wb") as fp: @@ -144,6 +150,8 @@ def test_httpcache_zipfile_flat(target): with underlay.open_zip(zipfilename, mode="r") as zipfs: fs = HTTPCachedFS(http_cache, zipfs) + assert fs.scheme == underlay.scheme + with fs.open(filename1, mode="rb") as fp: assert fp.read(-1) == filecontent1 with fs.open(filename2, mode="rb") as fp: @@ -178,6 +186,8 @@ def test_httpcache_zipfile_archived(target): cached_fs = HTTPCachedFS(http_cache, underlay) with cached_fs.open_zip(zipfilename, mode="w") as fs: + assert cached_fs.scheme == underlay.scheme + with fs.open(filename1, mode="wb") as fp: fp.write(filecontent1) with fs.open(filename2, mode="wb") as fp: @@ -186,6 +196,8 @@ def test_httpcache_zipfile_archived(target): assert len(cache_content) == 0 with cached_fs.open_zip(zipfilename, mode="r") as fs: + assert cached_fs.scheme == underlay.scheme + with fs.open(filename1, mode="rb") as fp: assert fp.read(-1) == filecontent1 with fs.open(filename2, mode="rb") as fp: diff --git a/tests/v2_tests/test_local.py b/tests/v2_tests/test_local.py index be83663b..23f63b6a 100644 --- a/tests/v2_tests/test_local.py +++ b/tests/v2_tests/test_local.py @@ -26,6 +26,13 @@ def test_repr_str(self): str(fs) repr(fs) + def test_scheme(self): + with Local(self.testdir.name) as fs: + assert fs.scheme == "" + + with Local(self.testdir.name, scheme="file") as fs: + assert fs.scheme == "file" + def test_read_string(self): with Local() as fs: diff --git a/tests/v2_tests/test_pathlib.py b/tests/v2_tests/test_pathlib.py index cce9ff7b..a86232a9 100644 --- a/tests/v2_tests/test_pathlib.py +++ b/tests/v2_tests/test_pathlib.py @@ -648,6 +648,17 @@ def test_path_iterdir(storage: str, path: str) -> None: assert sorted(actual_entries) == sorted(filtered) +def test_path_scheme_property(storage: str) -> None: + scheme = urlparse(storage).scheme or "file" + + with from_url(url=storage) as fs: + p = PurePath(fs=fs) + assert p.scheme == scheme + + q = Path(fs=fs) + assert q.scheme == scheme + + def test_unlink(storage: str) -> None: with from_url(url=storage) as fs: target = Path("my", fs=fs) diff --git a/tests/v2_tests/test_s3.py b/tests/v2_tests/test_s3.py index 31834bc4..9ac03b78 100644 --- a/tests/v2_tests/test_s3.py +++ b/tests/v2_tests/test_s3.py @@ -54,6 +54,11 @@ def test_s3_repr_str(s3_fixture): str(s3) +def test_scheme(s3_fixture): + with from_url('s3://test-bucket/base', **s3_fixture.aws_kwargs) as s3: + assert s3.scheme == "s3" + + def test_s3_files(s3_fixture): with from_url('s3://test-bucket/base', **s3_fixture.aws_kwargs) as s3: diff --git a/tests/v2_tests/test_s3_zip.py b/tests/v2_tests/test_s3_zip.py index c7857aa8..77fd430d 100644 --- a/tests/v2_tests/test_s3_zip.py +++ b/tests/v2_tests/test_s3_zip.py @@ -34,6 +34,7 @@ def test_s3_zip(): with from_url('s3://{}/test.zip'.format(bucket)) as z: assert isinstance(z, Zip) assert isinstance(z.fileobj, io.BufferedReader) + assert z.scheme == "s3" assert zipfile.is_zipfile(z.fileobj) with z.open('file', 'rb') as fp: @@ -42,6 +43,7 @@ def test_s3_zip(): with from_url('s3://{}/test.zip'.format(bucket), buffering=0) as z: assert isinstance(z, Zip) + assert z.scheme == "s3" assert 'buffering' in z.kwargs assert isinstance(z.fileobj, pfio.v2.s3._ObjectReader) diff --git a/tests/v2_tests/test_zip.py b/tests/v2_tests/test_zip.py index adbb55f8..ebdf84b7 100644 --- a/tests/v2_tests/test_zip.py +++ b/tests/v2_tests/test_zip.py @@ -119,6 +119,10 @@ def test_repr_str(self): repr(z) str(z) + def test_scheme(self): + with local.open_zip(self.zip_file_path) as z: + assert z.scheme == local.scheme + def test_read_bytes(self): with local.open_zip(os.path.abspath(self.zip_file_path)) as z: with z.open(self.zipped_file_path, "rb") as zipped_file: