Skip to content
Open
136 changes: 123 additions & 13 deletions tests/test_flow_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ async def test_remote_repository_analysis(request: pytest.FixtureRequest) -> Non
"""Test the complete flow of analyzing a remote repository."""
client = request.getfixturevalue("test_client")
form_data = {
"input_text": "https://github.com/octocat/Hello-World",
"max_file_size": "243",
"input_text": "https://github.com/pallets/flask",
"max_file_size": "200",
"pattern_type": "exclude",
"pattern": "",
"token": "",
Expand All @@ -75,7 +75,7 @@ async def test_invalid_repository_url(request: pytest.FixtureRequest) -> None:
client = request.getfixturevalue("test_client")
form_data = {
"input_text": "https://github.com/nonexistent/repo",
"max_file_size": "243",
"max_file_size": "200",
"pattern_type": "exclude",
"pattern": "",
"token": "",
Expand All @@ -92,12 +92,11 @@ async def test_invalid_repository_url(request: pytest.FixtureRequest) -> None:

@pytest.mark.asyncio
async def test_large_repository(request: pytest.FixtureRequest) -> None:
"""Simulate analysis of a large repository with nested folders."""
"""Simulate analysis of a large repository with nested folders and many files."""
client = request.getfixturevalue("test_client")
# TODO: ingesting a large repo take too much time (eg: godotengine/godot repository)
form_data = {
"input_text": "https://github.com/octocat/hello-world",
"max_file_size": "10",
"input_text": "https://github.com/pallets/flask",
"max_file_size": "100", # Lower this to force skipping large files
"pattern_type": "exclude",
"pattern": "",
"token": "",
Expand All @@ -109,7 +108,7 @@ async def test_large_repository(request: pytest.FixtureRequest) -> None:
response_data = response.json()
if response.status_code == status.HTTP_200_OK:
assert "content" in response_data
assert response_data["content"]
assert isinstance(response_data["content"], str)
else:
assert "error" in response_data

Expand All @@ -121,8 +120,8 @@ async def test_concurrent_requests(request: pytest.FixtureRequest) -> None:

def make_request() -> None:
form_data = {
"input_text": "https://github.com/octocat/hello-world",
"max_file_size": "243",
"input_text": "https://github.com/pallets/flask",
"max_file_size": "200",
"pattern_type": "exclude",
"pattern": "",
"token": "",
Expand All @@ -148,7 +147,7 @@ async def test_large_file_handling(request: pytest.FixtureRequest) -> None:
"""Test handling of repositories with large files."""
client = request.getfixturevalue("test_client")
form_data = {
"input_text": "https://github.com/octocat/Hello-World",
"input_text": "https://github.com/pallets/flask",
"max_file_size": "1",
"pattern_type": "exclude",
"pattern": "",
Expand All @@ -171,8 +170,8 @@ async def test_repository_with_patterns(request: pytest.FixtureRequest) -> None:
"""Test repository analysis with include/exclude patterns."""
client = request.getfixturevalue("test_client")
form_data = {
"input_text": "https://github.com/octocat/Hello-World",
"max_file_size": "243",
"input_text": "https://github.com/pallets/flask",
"max_file_size": "200",
"pattern_type": "include",
"pattern": "*.md",
"token": "",
Expand All @@ -184,9 +183,120 @@ async def test_repository_with_patterns(request: pytest.FixtureRequest) -> None:
response_data = response.json()
if response.status_code == status.HTTP_200_OK:
assert "content" in response_data
assert isinstance(response_data["content"], str)

assert "repo_url" in response_data
assert response_data["repo_url"].startswith("https://github.com/")

assert "summary" in response_data
assert isinstance(response_data["summary"], str)
assert "pallets/flask" in response_data["summary"].lower()

assert "tree" in response_data
assert isinstance(response_data["tree"], str)
assert "pallets-flask" in response_data["tree"].lower()

assert "pattern_type" in response_data
assert response_data["pattern_type"] == "include"

assert "pattern" in response_data
assert response_data["pattern"] == "*.md"
else:
assert "error" in response_data
assert isinstance(response_data["error"], str)
assert response_data["error"]


@pytest.mark.asyncio
async def test_missing_required_fields(request: pytest.FixtureRequest) -> None:
"""Test API response when required fields are missing."""
client = request.getfixturevalue("test_client")
form_data = {
"max_file_size": "200",
"pattern_type": "exclude",
"pattern": "",
"token": "",
}
response = client.post("/api/ingest", json=form_data)
assert response.status_code in (
status.HTTP_422_UNPROCESSABLE_ENTITY,
status.HTTP_429_TOO_MANY_REQUESTS,
)

form_data = {
"input_text": "https://github.com/pallets/flask",
"max_file_size": "200",
"pattern": "",
"token": "",
}
response = client.post("/api/ingest", json=form_data)
assert response.status_code in (
status.HTTP_422_UNPROCESSABLE_ENTITY,
status.HTTP_429_TOO_MANY_REQUESTS,
status.HTTP_200_OK,
)


@pytest.mark.asyncio
async def test_invalid_field_types(request: pytest.FixtureRequest) -> None:
"""Test API response when fields have invalid types."""
client = request.getfixturevalue("test_client")

form_data = {
"input_text": 12345,
"max_file_size": "200",
"pattern_type": "exclude",
"pattern": "",
"token": "",
}
response = client.post("/api/ingest", json=form_data)
assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY

form_data = {
"input_text": "https://github.com/pallets/flask",
"max_file_size": "200",
"pattern_type": "exclude",
"pattern": ["*.md"],
"token": "",
}
response = client.post("/api/ingest", json=form_data)
assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY


@pytest.mark.asyncio
async def test_unsupported_pattern_type(request: pytest.FixtureRequest) -> None:
"""Test API response for unsupported pattern_type."""
client = request.getfixturevalue("test_client")
form_data = {
"input_text": "https://github.com/pallets/flask",
"max_file_size": "200",
"pattern_type": "invalid_type",
"pattern": "*.md",
"token": "",
}
response = client.post("/api/ingest", json=form_data)
assert response.status_code in (status.HTTP_400_BAD_REQUEST, status.HTTP_422_UNPROCESSABLE_ENTITY)
response_data = response.json()
assert "error" in response_data or "detail" in response_data


@pytest.mark.asyncio
async def test_invalid_token(request: pytest.FixtureRequest) -> None:
"""Test API response for an invalid or expired token."""
client = request.getfixturevalue("test_client")
form_data = {
"input_text": "https://github.com/pallets/flask",
"max_file_size": "200",
"pattern_type": "exclude",
"pattern": "",
"token": "invalid_token_1234567890",
}
response = client.post("/api/ingest", json=form_data)
# Accept all likely error codes for invalid token
assert response.status_code in (
status.HTTP_401_UNAUTHORIZED,
status.HTTP_400_BAD_REQUEST,
status.HTTP_429_TOO_MANY_REQUESTS,
), f"Unexpected status code: {response.status_code}"
response_data = response.json()
assert "error" in response_data or "detail" in response_data
27 changes: 27 additions & 0 deletions tests/test_git_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,3 +277,30 @@ def test_create_git_command_ignores_non_github_urls(
# Should only have base command and -C option, no auth headers
expected = [*base_cmd, "-C", local_path]
assert cmd == expected


@pytest.mark.parametrize(
"url",
[
"",
"not-a-url",
"ftp://github.com/owner/repo.git",
"github.com/owner/repo.git",
"https://",
],
)
def test_is_github_host_edge_cases(url: str) -> None:
"""Test is_github_host with malformed or edge-case URLs."""
try:
result = is_github_host(url)
assert isinstance(result, bool)
except (ValueError, TypeError) as exc:
pytest.fail(f"is_github_host raised {exc.__class__.__name__} for url: {url}")


def test_token_not_in_command_plaintext() -> None:
"""Ensure the token is not present in the command as plain text."""
token = "ghp_" + "x" * 36
cmd = create_git_command(["git", "clone"], "/tmp", "https://github.com/owner/repo.git", token)
for part in cmd:
assert token not in part or "Basic" in part
Loading