diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index 6febff0ae6..ec2147c4c5 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -351,6 +351,10 @@ def new_input(self, location: str) -> FsspecInputFile: FsspecInputFile: An FsspecInputFile instance for the given location. """ uri = urlparse(location) + if uri.scheme in ("", "file"): + path_to_check = uri.path if uri.scheme else location + if not os.path.isabs(path_to_check): + raise ValueError(f"FileIO implementation for local files requires absolute paths: {location}") fs = self.get_fs(uri.scheme) return FsspecInputFile(location=location, fs=fs) @@ -364,6 +368,10 @@ def new_output(self, location: str) -> FsspecOutputFile: FsspecOutputFile: An FsspecOutputFile instance for the given location. """ uri = urlparse(location) + if uri.scheme in ("", "file"): + path_to_check = uri.path if uri.scheme else location + if not os.path.isabs(path_to_check): + raise ValueError(f"FileIO implementation for local files requires absolute paths: {location}") fs = self.get_fs(uri.scheme) return FsspecOutputFile(location=location, fs=fs) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 2797371028..bbbf0442e9 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -391,7 +391,13 @@ def parse_location(location: str) -> Tuple[str, str, str]: """Return the path without the scheme.""" uri = urlparse(location) if not uri.scheme: - return "file", uri.netloc, os.path.abspath(location) + if not os.path.isabs(location): + raise ValueError(f"FileIO implementation for local files requires absolute paths: {location}") + return "file", uri.netloc, location + elif uri.scheme == "file": + if not os.path.isabs(uri.path): + raise ValueError(f"FileIO implementation for local files requires absolute paths: {location}") + return uri.scheme, uri.netloc, uri.path elif uri.scheme in ("hdfs", "viewfs"): return uri.scheme, uri.netloc, uri.path else: diff --git a/tests/io/test_io.py b/tests/io/test_io.py index ac1d7b4fe4..8896a3355c 100644 --- a/tests/io/test_io.py +++ b/tests/io/test_io.py @@ -170,7 +170,6 @@ def test_output_file_to_input_file() -> None: @pytest.mark.parametrize( "string_uri", [ - "foo/bar/baz.parquet", "file:/foo/bar/baz.parquet", "file:/foo/bar/baz.parquet", ], @@ -185,6 +184,16 @@ def test_custom_file_io_locations(string_uri: str) -> None: output_file = file_io.new_output(location=string_uri) assert output_file.location == string_uri +def test_custom_file_io_location_relative_path() -> None: + string_uri = "foo/bar/baz.parquet" + # Instantiate the file-io and create a new input and output file + file_io = PyArrowFileIO() + with pytest.raises(ValueError) as exc_info: + file_io.new_input(location=string_uri) + + assert "FileIO implementation for local files requires absolute paths" in str(exc_info.value) + + def test_deleting_local_file_using_file_io() -> None: """Test deleting a local file using FileIO.delete(...)"""