Skip to content

Commit 891bd2f

Browse files
committed
fix: harden path inference with root guard and strict validation
- Add root-level input/ guard in _infer_data_dir_and_name to prevent parent.parent from yielding '/' or '.' (defense-in-depth, fixes #93) - Replace UserWarning with ValueError for data_dir ending in 'input/' since path doubling is always a bug, not a recoverable state (#94) - Parametrize nested input dir test with INPUT variant and add result.input assertion in test_io_utils (#95) Fixes #93, fixes #94, fixes #95
1 parent 378ab8c commit 891bd2f

File tree

3 files changed

+42
-29
lines changed

3 files changed

+42
-29
lines changed

src/simple_resume/shell/generate/core.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from __future__ import annotations
1313

1414
import time
15-
import warnings
1615
from collections.abc import Iterable, Sequence
1716
from dataclasses import dataclass
1817
from pathlib import Path
@@ -553,12 +552,10 @@ def _infer_data_dir_and_name(
553552
if data_dir is not None:
554553
base_dir = Path(data_dir)
555554
if base_dir.name.lower() == "input":
556-
warnings.warn(
555+
raise ValueError(
557556
f"data_dir '{base_dir}' ends in 'input/'. Downstream path "
558-
"resolution appends '/input' automatically, which may cause "
559-
"path doubling. Consider passing the parent directory instead.",
560-
UserWarning,
561-
stacklevel=2,
557+
"resolution appends '/input' automatically, which causes "
558+
"path doubling. Pass the parent directory instead."
562559
)
563560
if source_path.exists() and source_path.is_dir():
564561
return source_path, None
@@ -572,7 +569,13 @@ def _infer_data_dir_and_name(
572569
if source_path.suffix.lower() in _YAML_SUFFIXES:
573570
parent = source_path.parent
574571
if parent.name.lower() == "input":
575-
return parent.parent, source_path.stem
572+
grandparent = parent.parent
573+
if grandparent == Path("/") or grandparent == Path("."):
574+
raise ValueError(
575+
f"Cannot infer data_dir: '{source_path}' is in a root-level "
576+
"'input/' directory. Provide data_dir explicitly."
577+
)
578+
return grandparent, source_path.stem
576579
return parent, source_path.stem
577580

578581
raise ValueError(

tests/unit/shell/test_infer_data_dir.py

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
from __future__ import annotations
1010

11-
import warnings
1211
from pathlib import Path
1312

1413
import pytest
@@ -44,19 +43,20 @@ def test_yaml_extensions_in_input_dir_return_grandparent(
4443
assert data_dir == tmp_path
4544
assert name == expected_stem
4645

46+
@pytest.mark.parametrize("dirname", ["input", "INPUT"])
4747
def test_yaml_in_nested_input_dir_returns_grandparent(
48-
self, tmp_path: Path, story: Scenario
48+
self, tmp_path: Path, story: Scenario, dirname: str
4949
) -> None:
50-
story.given("a YAML file inside a deeply nested input/ subdirectory")
51-
nested = tmp_path / "projects" / "resumes" / "input"
50+
story.given(f"a YAML file inside a deeply nested {dirname}/ subdirectory")
51+
nested = tmp_path / "projects" / "resumes" / dirname
5252
nested.mkdir(parents=True)
5353
yaml_file = nested / "resume.yaml"
5454
yaml_file.touch()
5555

5656
story.when("inferring data_dir without an explicit override")
5757
data_dir, name = _infer_data_dir_and_name(yaml_file, data_dir=None)
5858

59-
story.then("data_dir points to the grandparent of input/")
59+
story.then("data_dir points to the grandparent of the input/ directory")
6060
assert data_dir == tmp_path / "projects" / "resumes"
6161
assert name == "resume"
6262

@@ -181,10 +181,10 @@ def test_explicit_data_dir_with_nonexistent_yaml(
181181

182182

183183
class TestExplicitDataDirEndingInInput:
184-
"""Warn when explicit data_dir ends in ``input/`` (path-doubling risk)."""
184+
"""Raise ValueError when data_dir ends in ``input/`` (path-doubling)."""
185185

186186
@pytest.mark.parametrize("dirname", ["input", "Input", "INPUT", "iNpUt"])
187-
def test_warns_when_data_dir_named_input(
187+
def test_raises_when_data_dir_named_input(
188188
self, tmp_path: Path, story: Scenario, dirname: str
189189
) -> None:
190190
story.given(f"an explicit data_dir whose basename is '{dirname}'")
@@ -194,17 +194,11 @@ def test_warns_when_data_dir_named_input(
194194
yaml_file.touch()
195195

196196
story.when("inferring with that data_dir")
197-
with warnings.catch_warnings(record=True) as caught:
198-
warnings.simplefilter("always")
199-
data_dir, name = _infer_data_dir_and_name(yaml_file, data_dir=input_dir)
200-
201-
story.then("a UserWarning about path-doubling is emitted regardless of case")
202-
assert len(caught) == 1
203-
assert "path doubling" in str(caught[0].message).lower()
204-
assert data_dir == input_dir
205-
assert name == "sample"
197+
story.then("a ValueError about path-doubling is raised regardless of case")
198+
with pytest.raises(ValueError, match="path doubling"):
199+
_infer_data_dir_and_name(yaml_file, data_dir=input_dir)
206200

207-
def test_no_warning_for_normal_data_dir(
201+
def test_no_error_for_normal_data_dir(
208202
self, tmp_path: Path, story: Scenario
209203
) -> None:
210204
story.given("an explicit data_dir with a normal name")
@@ -214,12 +208,11 @@ def test_no_warning_for_normal_data_dir(
214208
yaml_file.touch()
215209

216210
story.when("inferring with that data_dir")
217-
with warnings.catch_warnings(record=True) as caught:
218-
warnings.simplefilter("always")
219-
_infer_data_dir_and_name(yaml_file, data_dir=custom_dir)
211+
data_dir, name = _infer_data_dir_and_name(yaml_file, data_dir=custom_dir)
220212

221-
story.then("no warning is emitted")
222-
assert len(caught) == 0
213+
story.then("no error is raised and data_dir is used as-is")
214+
assert data_dir == custom_dir
215+
assert name == "sample"
223216

224217

225218
class TestRootLevelInputGuard:
@@ -251,6 +244,22 @@ def test_root_level_input_raises(self, story: Scenario) -> None:
251244
with pytest.raises(ValueError, match="Unable to infer data_dir"):
252245
_infer_data_dir_and_name("/input/resume.yaml", data_dir=None)
253246

247+
def test_relative_input_dir_guard(
248+
self, tmp_path: Path, story: Scenario, monkeypatch: pytest.MonkeyPatch
249+
) -> None:
250+
story.given("a YAML at relative input/resume.yaml (grandparent is '.')")
251+
input_dir = tmp_path / "input"
252+
input_dir.mkdir()
253+
yaml_file = input_dir / "resume.yaml"
254+
yaml_file.touch()
255+
256+
monkeypatch.chdir(tmp_path)
257+
258+
story.when("inferring data_dir via the relative path")
259+
story.then("ValueError is raised because grandparent resolves to '.'")
260+
with pytest.raises(ValueError, match="root-level"):
261+
_infer_data_dir_and_name(Path("input/resume.yaml"), data_dir=None)
262+
254263

255264
class TestInferDataDirErrorCases:
256265
"""Invalid inputs raise ValueError."""

tests/unit/shell/test_io_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ def test_handles_candidate_in_case_variant_input_dir(
101101

102102
result = resolve_paths_for_read(None, {}, candidate)
103103
assert result.data == tmp_path
104+
assert result.input == input_dir
104105

105106
def test_root_level_input_dir_raises(self) -> None:
106107
"""Reject candidate in root-level input/ where grandparent is /."""

0 commit comments

Comments
 (0)