From dab978af6ac32d7a408fdf6e18756bea0b88f845 Mon Sep 17 00:00:00 2001 From: Rodion Mostovoi <36400912+rodion-m@users.noreply.github.com> Date: Wed, 1 Jul 2026 16:21:26 +0500 Subject: [PATCH 1/3] Switch MCP tools to Tool API v3 --- README.md | 25 +- manifest.json | 30 +- pyproject.toml | 2 +- server.json | 26 +- src/codealive_mcp_server.py | 65 +- src/tests/test_artifact_relationships.py | 577 ------- src/tests/test_chat_tool.py | 306 ---- src/tests/test_datasources.py | 356 ----- src/tests/test_e2e_tools.py | 1604 -------------------- src/tests/test_error_handling.py | 6 +- src/tests/test_fetch_artifacts.py | 794 ---------- src/tests/test_observability_middleware.py | 4 +- src/tests/test_response_transformer.py | 4 +- src/tests/test_search_tool.py | 197 --- src/tests/test_stdio_smoke.py | 56 +- src/tests/test_tool_api_v3.py | 174 +++ src/tests/test_tool_metadata.py | 9 +- src/tools/__init__.py | 19 +- src/tools/artifact_query.py | 32 + src/tools/artifact_relationships.py | 389 +---- src/tools/chat.py | 340 +---- src/tools/datasources.py | 242 +-- src/tools/fetch_artifacts.py | 362 +---- src/tools/repository.py | 49 + src/tools/search.py | 444 +----- src/tools/tool_api.py | 91 ++ src/utils/errors.py | 14 +- 27 files changed, 605 insertions(+), 5612 deletions(-) delete mode 100644 src/tests/test_artifact_relationships.py delete mode 100644 src/tests/test_chat_tool.py delete mode 100644 src/tests/test_datasources.py delete mode 100644 src/tests/test_e2e_tools.py delete mode 100644 src/tests/test_fetch_artifacts.py delete mode 100644 src/tests/test_search_tool.py create mode 100644 src/tests/test_tool_api_v3.py create mode 100644 src/tools/artifact_query.py create mode 100644 src/tools/repository.py create mode 100644 src/tools/tool_api.py diff --git a/README.md b/README.md index 304bf72..d0b3381 100644 --- a/README.md +++ b/README.md @@ -28,11 +28,14 @@ Once connected, you'll have access to these powerful tools: 1. **`get_data_sources`** - List your indexed repositories and workspaces 2. **`semantic_search`** - Canonical semantic search across indexed artifacts 3. **`grep_search`** - Exact literal or regex text search inside file content, plus literal file-name/path matching (returns files like `Form.xml` even when their content never mentions the name), with line-level previews for content matches -4. **`fetch_artifacts`** - Load the full source for relevant search hits (missing or inaccessible identifiers are reported back in a `` block, not silently dropped) -5. **`get_artifact_relationships`** - Expand call graph, inheritance, and reference relationships for one artifact -6. **`chat`** - Slower synthesized codebase Q&A, typically only after search -7. **`codebase_search`** - Deprecated legacy semantic search alias kept for backward compatibility -8. **`codebase_consultant`** - Deprecated alias for `chat` +4. **`get_repository_ontology`** - Get repository-level orientation for one selected repository +5. **`get_file_tree`** - Inspect a bounded file tree for one repository +6. **`read_file`** - Read a repository-relative file path, optionally with a line range +7. **`fetch_artifacts`** - Load the full source for relevant search hits (missing or inaccessible identifiers are reported back, not silently dropped) +8. **`get_artifact_relationships`** - Expand call graph, inheritance, and reference relationships for one artifact +9. **`get_artifact_query_schema`** - Inspect supported ArtifactQuery entities, fields, and examples +10. **`query_artifact_metadata`** - Run read-only metadata analytics across selected repositories +11. **`chat`** - Stateless, slower synthesized codebase Q&A; call only when explicitly requested ## 🎯 Usage Examples @@ -43,7 +46,7 @@ After setup, try these commands with your AI assistant: - *"Find the exact regex that matches JWT tokens"* → Uses `grep_search` - *"Explain how the payment flow works in this codebase"* → Usually starts with `semantic_search`/`grep_search`, then optionally uses `chat` -`semantic_search` and `grep_search` should be the default tools for most agents. `chat` is a slower synthesis fallback, can take up to 30 seconds, and is usually unnecessary when an agent can run a multi-step workflow with search, fetch, relationships, and local file reads. If your agent supports subagents, the highest-confidence path is to delegate a focused subagent that orchestrates `semantic_search` and `grep_search` first. +`semantic_search` and `grep_search` should be the default tools for most agents. `chat` is a slower stateless synthesis fallback, can take up to 30 seconds, and is usually unnecessary when an agent can run a multi-step workflow with ontology, search, fetch/read, relationships, ArtifactQuery, and local file reads. If your agent supports subagents, the highest-confidence path is to delegate a focused subagent that orchestrates `semantic_search` and `grep_search` first. ## 📚 Agent Skill @@ -840,10 +843,14 @@ See [JetBrains MCP Documentation](https://www.jetbrains.com/help/ai-assistant/mc - `get_data_sources` - List available repositories - `semantic_search` - Search code semantically - `grep_search` - Search by exact text or regex + - `get_repository_ontology` - Orient around one repository + - `get_file_tree` - Inspect repository files + - `read_file` - Read one repository-relative file + - `fetch_artifacts` - Fetch source for search result identifiers - `get_artifact_relationships` - Expand relationships for one artifact - - `chat` - Slower synthesized codebase Q&A, usually after search - - `codebase_search` - Legacy semantic search alias - - `codebase_consultant` - Deprecated alias for `chat` + - `get_artifact_query_schema` - Inspect metadata query schema + - `query_artifact_metadata` - Run metadata analytics + - `chat` - Stateless synthesized codebase Q&A, only when explicitly requested **Example Workflow:** ``` diff --git a/manifest.json b/manifest.json index 63afca3..ab29b02 100644 --- a/manifest.json +++ b/manifest.json @@ -2,7 +2,7 @@ "manifest_version": "0.4", "name": "codealive-mcp", "display_name": "CodeAlive", - "version": "2.0.4", + "version": "3.0.0", "description": "Semantic code search and codebase Q&A for Claude Desktop using your CodeAlive account or self-hosted deployment.", "long_description": "CodeAlive gives Claude Desktop access to semantic code search, artifact fetch, repository discovery, and architecture-aware codebase Q&A. This extension runs locally via MCP and supports both CodeAlive Cloud and self-hosted deployments.", "author": { @@ -53,10 +53,6 @@ "name": "get_data_sources", "description": "List indexed repositories and workspaces that are ready for search and chat." }, - { - "name": "codebase_search", - "description": "Deprecated legacy semantic search tool kept for backward compatibility." - }, { "name": "semantic_search", "description": "Default discovery tool — search by meaning to find code by concepts, behavior, or architecture." @@ -70,16 +66,32 @@ "description": "Synthesized codebase Q&A. Do NOT call unless the user explicitly names this tool (e.g. 'use chat'). 'Ask CodeAlive' means use search tools, not chat. Slow (up to 30 seconds)." }, { - "name": "fetch_artifacts", - "description": "Fetch full source for specific search results when you need the underlying code." + "name": "get_repository_ontology", + "description": "Get repository-level ontology and orientation for a single selected repository." + }, + { + "name": "get_file_tree", + "description": "List repository files and folders for a single selected repository." + }, + { + "name": "read_file", + "description": "Read a repository-relative file path, optionally bounded by line range." }, { - "name": "codebase_consultant", - "description": "Deprecated alias for chat kept for backward compatibility." + "name": "fetch_artifacts", + "description": "Fetch full source for specific search results when you need the underlying code." }, { "name": "get_artifact_relationships", "description": "Inspect relationships between artifacts returned by CodeAlive search." + }, + { + "name": "get_artifact_query_schema", + "description": "Return supported ArtifactQuery entities, fields, operators, and examples." + }, + { + "name": "query_artifact_metadata", + "description": "Run read-only ArtifactQuery metadata analytics across selected repositories." } ], "user_config": { diff --git a/pyproject.toml b/pyproject.toml index 3f15e35..bf1bdba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ packages = ["src"] package-dir = {"" = "."} [tool.setuptools_scm] -fallback_version = "2.0.4" +fallback_version = "3.0.0" [tool.uv] # Relative dates in exclude-newer (e.g. "7 days") require uv ≥ 0.11. diff --git a/server.json b/server.json index 845c773..0f64d3c 100644 --- a/server.json +++ b/server.json @@ -1,7 +1,7 @@ { "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json", "name": "io.github.CodeAlive-AI/codealive-mcp", - "version": "2.0.4", + "version": "3.0.0", "description": "Semantic code search and analysis from CodeAlive for AI assistants and agents.", "keywords": [ "context-engineering", @@ -54,10 +54,6 @@ "name": "get_data_sources", "description": "Retrieve all available repositories and workspaces indexed in your CodeAlive account. Use this first to discover what codebases you can search and analyze." }, - { - "name": "codebase_search", - "description": "Deprecated legacy semantic search tool retained for backward compatibility." - }, { "name": "semantic_search", "description": "Default discovery tool — search by meaning to find code by concepts, behavior, or architecture." @@ -71,8 +67,16 @@ "description": "Synthesized codebase Q&A. Do NOT call unless the user explicitly names this tool (e.g. 'use chat'). 'Ask CodeAlive' means use search tools, not chat. Slow (up to 30 seconds)." }, { - "name": "codebase_consultant", - "description": "Deprecated alias for chat retained for backward compatibility." + "name": "get_repository_ontology", + "description": "Get repository-level ontology and orientation for a single selected repository." + }, + { + "name": "get_file_tree", + "description": "List repository files and folders for a single selected repository." + }, + { + "name": "read_file", + "description": "Read a repository-relative file path, optionally bounded by line range." }, { "name": "fetch_artifacts", @@ -81,6 +85,14 @@ { "name": "get_artifact_relationships", "description": "Explore an artifact's relationships — call graph, inheritance hierarchy, or references. Drill down after search or fetch to understand how code connects across the codebase." + }, + { + "name": "get_artifact_query_schema", + "description": "Return supported ArtifactQuery entities, fields, operators, and examples." + }, + { + "name": "query_artifact_metadata", + "description": "Run read-only ArtifactQuery metadata analytics across selected repositories." } ] } diff --git a/src/codealive_mcp_server.py b/src/codealive_mcp_server.py index 6c597f3..54f6e05 100644 --- a/src/codealive_mcp_server.py +++ b/src/codealive_mcp_server.py @@ -30,14 +30,17 @@ import core.client as _client_module # for /ready flag access from middleware import N8NRemoveParametersMiddleware, ObservabilityMiddleware from tools import ( - chat, - codebase_consultant, - codebase_search, - fetch_artifacts, - get_artifact_relationships, get_data_sources, - grep_search, semantic_search, + grep_search, + get_repository_ontology, + get_file_tree, + read_file, + fetch_artifacts, + get_artifact_relationships, + get_artifact_query_schema, + query_artifact_metadata, + chat, ) # Initialize FastMCP server with lifespan and enhanced system instructions @@ -55,19 +58,18 @@ Default workflow (used for ALL tasks unless the user explicitly requests `chat`): 1. First use `get_data_sources` to identify available repositories and workspaces - 2. Use `semantic_search` for natural-language retrieval by meaning - 3. Use `grep_search` for literal string or regex matching when the pattern matters - 4. To get full content: + 2. Use `get_repository_ontology` for high-level orientation when exactly one repository is in scope + 3. Use `semantic_search` for natural-language retrieval by meaning + 4. Use `grep_search` for literal string or regex matching when the pattern matters + 5. To get full content: - For repos in your working directory: use `Read()` on the local files - - For external repos: use `fetch_artifacts` with identifiers from search results - 5. Use `get_artifact_relationships` or `fetch_artifacts` to drill into the most relevant hits - 6. If your environment supports subagents and you need the highest reliability or depth, - prefer an agentic workflow where a subagent combines `semantic_search`, `grep_search`, - artifact fetches, relationship inspection, and local file reads + - For external repos: use `fetch_artifacts` with identifiers from search results or `read_file` + 6. Use `get_artifact_relationships` for graph expansion and `query_artifact_metadata` + for aggregate metadata analytics after checking `get_artifact_query_schema` User-invoked tool — `chat`: - `chat` is disabled by default. Call it ONLY when the user has explicitly - named the tool (e.g. "use chat", "use codebase_consultant", "call the chat tool"). + named the tool (e.g. "use chat", "call the chat tool"). Phrases like "ask CodeAlive" or "search CodeAlive" do NOT qualify — they refer to CodeAlive tools in general (semantic_search, grep_search, etc.). - For every other case — lookups, architecture understanding, debugging, @@ -80,11 +82,12 @@ - Use `grep_search(regex=false)` for exact strings and `grep_search(regex=true)` for regex patterns - Use specific function/class names or file path scopes when looking for particular implementations - Treat `semantic_search` and `grep_search` as the default discovery tools - - Prefer `semantic_search` over the deprecated `codebase_search` legacy alias + - MCP v3 exposes only Tool API v3 tools; deprecated aliases are intentionally absent - Use `get_artifact_relationships` only with exact artifact identifiers from prior search/fetch results. It expands a known artifact's relationship graph; it does not search by path, class name, or guessed symbol. For exact source code, call `fetch_artifacts` on identifiers returned by search or relationships. - - Remember that context from previous messages is maintained in the same conversation + - `chat` is stateless in v3. Include prior findings, artifact identifiers, + assumptions, scope, and constraints in each question. Flexible data source usage: - You can use a workspace name as a single data source to search or chat across all its repositories at once @@ -160,10 +163,6 @@ async def readiness_check(request: Request) -> JSONResponse: title="List Data Sources", annotations=_READ_ONLY_TOOL, )(get_data_sources) -mcp.tool( - title="Search Codebase (Deprecated)", - annotations=_READ_ONLY_TOOL, -)(codebase_search) mcp.tool( title="Semantic Search", annotations=_READ_ONLY_TOOL, @@ -173,9 +172,17 @@ async def readiness_check(request: Request) -> JSONResponse: annotations=_READ_ONLY_TOOL, )(grep_search) mcp.tool( - title="Chat About Codebase", + title="Get Repository Ontology", annotations=_READ_ONLY_TOOL, -)(chat) +)(get_repository_ontology) +mcp.tool( + title="Get File Tree", + annotations=_READ_ONLY_TOOL, +)(get_file_tree) +mcp.tool( + title="Read File", + annotations=_READ_ONLY_TOOL, +)(read_file) mcp.tool( title="Fetch Artifacts", annotations=_READ_ONLY_TOOL, @@ -185,9 +192,17 @@ async def readiness_check(request: Request) -> JSONResponse: annotations=_READ_ONLY_TOOL, )(get_artifact_relationships) mcp.tool( - title="Consult Codebase (Deprecated)", + title="Get ArtifactQuery Schema", annotations=_READ_ONLY_TOOL, -)(codebase_consultant) +)(get_artifact_query_schema) +mcp.tool( + title="Query Artifact Metadata", + annotations=_READ_ONLY_TOOL, +)(query_artifact_metadata) +mcp.tool( + title="Chat About Codebase", + annotations=_READ_ONLY_TOOL, +)(chat) def main(): diff --git a/src/tests/test_artifact_relationships.py b/src/tests/test_artifact_relationships.py deleted file mode 100644 index ef2a977..0000000 --- a/src/tests/test_artifact_relationships.py +++ /dev/null @@ -1,577 +0,0 @@ -"""Tests for the get_artifact_relationships tool.""" - -import pytest -from unittest.mock import AsyncMock, MagicMock, patch - -from fastmcp import Context -from fastmcp.exceptions import ToolError - -from tools.artifact_relationships import ( - PROFILE_MAP, - _build_relationships_dict, - get_artifact_relationships, -) - - -class TestProfileMapping: - """Test MCP profile names map to backend enum values.""" - - def test_default_profile_is_calls_only(self): - """callsOnly is the default and maps to CallsOnly.""" - assert PROFILE_MAP["callsOnly"] == "CallsOnly" - - def test_inheritance_only_maps_correctly(self): - assert PROFILE_MAP["inheritanceOnly"] == "InheritanceOnly" - - def test_all_relevant_maps_correctly(self): - assert PROFILE_MAP["allRelevant"] == "AllRelevant" - - def test_references_only_maps_correctly(self): - assert PROFILE_MAP["referencesOnly"] == "ReferencesOnly" - - -class TestBuildRelationshipsDict: - """Test dict shape of relationship responses (FastMCP handles serialization).""" - - def test_found_with_grouped_relationships(self): - data = { - "sourceIdentifier": "org/repo::path::Symbol", - "profile": "CallsOnly", - "found": True, - "availableRelationshipCounts": { - "outgoingCalls": 57, - "incomingCalls": 3, - "ancestors": 0, - "descendants": 2, - "references": 11, - }, - "relationships": [ - { - "relationType": "OutgoingCalls", - "totalCount": 57, - "returnedCount": 50, - "truncated": True, - "items": [ - { - "identifier": "org/repo::path::Dep", - "filePath": "src/Data/Repository.cs", - "startLine": 88, - "shortSummary": "Stores the aggregate", - } - ], - }, - { - "relationType": "IncomingCalls", - "totalCount": 3, - "returnedCount": 3, - "truncated": False, - "items": [ - { - "identifier": "org/repo::path::Caller", - "filePath": "src/Services/Worker.cs", - "startLine": 142, - } - ], - }, - ], - } - - parsed = _build_relationships_dict(data) - assert parsed["sourceIdentifier"] == "org/repo::path::Symbol" - assert parsed["profile"] == "callsOnly" - assert parsed["found"] is True - assert parsed["availableRelationshipCounts"]["outgoingCalls"] == 57 - assert parsed["availableRelationshipCounts"]["references"] == 11 - assert "truncated" in parsed["hint"] - assert "higher max_count_per_type" in parsed["hint"] - - outgoing = parsed["relationships"][0] - assert outgoing["type"] == "outgoing_calls" - assert outgoing["totalCount"] == 57 - assert outgoing["returnedCount"] == 50 - assert outgoing["truncated"] is True - assert outgoing["items"][0]["filePath"] == "src/Data/Repository.cs" - assert outgoing["items"][0]["startLine"] == 88 - assert outgoing["items"][0]["shortSummary"] == "Stores the aggregate" - - incoming = parsed["relationships"][1] - assert incoming["type"] == "incoming_calls" - assert incoming["truncated"] is False - # Incoming call has no shortSummary - assert "shortSummary" not in incoming["items"][0] - - def test_not_found_omits_relationships(self): - data = { - "sourceIdentifier": "org/repo::path::Missing", - "profile": "CallsOnly", - "found": False, - "relationships": [], - } - - parsed = _build_relationships_dict(data) - assert parsed["found"] is False - assert "relationships" not in parsed - assert "availableRelationshipCounts" not in parsed - assert "fresh identifier" in parsed["hint"] - - def test_empty_groups_still_rendered(self): - data = { - "sourceIdentifier": "org/repo::path::Symbol", - "profile": "InheritanceOnly", - "found": True, - "relationships": [ - { - "relationType": "Ancestors", - "totalCount": 0, - "returnedCount": 0, - "truncated": False, - "items": [], - }, - { - "relationType": "Descendants", - "totalCount": 0, - "returnedCount": 0, - "truncated": False, - "items": [], - }, - ], - } - - parsed = _build_relationships_dict(data) - types = [g["type"] for g in parsed["relationships"]] - assert types == ["ancestors", "descendants"] - for g in parsed["relationships"]: - assert g["totalCount"] == 0 - assert g["items"] == [] - assert "No relationships were found for this profile" in parsed["hint"] - - def test_optional_fields_omitted_when_null(self): - data = { - "sourceIdentifier": "org/repo::path::Symbol", - "profile": "CallsOnly", - "found": True, - "relationships": [ - { - "relationType": "OutgoingCalls", - "totalCount": 1, - "returnedCount": 1, - "truncated": False, - "items": [ - { - "identifier": "org/repo::path::Target", - # filePath, startLine, shortSummary all absent - } - ], - }, - ], - } - - parsed = _build_relationships_dict(data) - item = parsed["relationships"][0]["items"][0] - assert item["identifier"] == "org/repo::path::Target" - assert "filePath" not in item - assert "startLine" not in item - assert "shortSummary" not in item - - def test_empty_profile_hint_uses_available_counts(self): - data = { - "sourceIdentifier": "org/repo::path::Command", - "profile": "CallsOnly", - "found": True, - "availableRelationshipCounts": { - "outgoingCalls": 0, - "incomingCalls": 0, - "ancestors": 0, - "descendants": 0, - "references": 7, - }, - "relationships": [ - { - "relationType": "OutgoingCalls", - "totalCount": 0, - "returnedCount": 0, - "truncated": False, - "items": [], - }, - { - "relationType": "IncomingCalls", - "totalCount": 0, - "returnedCount": 0, - "truncated": False, - "items": [], - }, - ], - } - - parsed = _build_relationships_dict(data) - - assert parsed["availableRelationshipCounts"]["references"] == 7 - assert "referencesOnly" in parsed["hint"] - assert "where-used" in parsed["hint"] - - def test_all_relevant_empty_profile_hint_says_references_are_excluded(self): - data = { - "sourceIdentifier": "org/repo::path::Message", - "profile": "AllRelevant", - "found": True, - "availableRelationshipCounts": { - "outgoingCalls": 0, - "incomingCalls": 0, - "ancestors": 0, - "descendants": 0, - "references": 4, - }, - "relationships": [ - { - "relationType": "OutgoingCalls", - "totalCount": 0, - "returnedCount": 0, - "truncated": False, - "items": [], - }, - { - "relationType": "IncomingCalls", - "totalCount": 0, - "returnedCount": 0, - "truncated": False, - "items": [], - }, - { - "relationType": "Ancestors", - "totalCount": 0, - "returnedCount": 0, - "truncated": False, - "items": [], - }, - { - "relationType": "Descendants", - "totalCount": 0, - "returnedCount": 0, - "truncated": False, - "items": [], - }, - ], - } - - parsed = _build_relationships_dict(data) - - assert parsed["profile"] == "allRelevant" - assert "excludes references" in parsed["hint"] - assert "referencesOnly" in parsed["hint"] - - def test_quotes_and_specials_pass_through_unchanged(self): - """Special chars (<, >, &, ") are preserved as-is in the dict — no HTML encoding.""" - data = { - "sourceIdentifier": "org/repo::path::Class", - "profile": "CallsOnly", - "found": True, - "relationships": [ - { - "relationType": "OutgoingCalls", - "totalCount": 1, - "returnedCount": 1, - "truncated": False, - "items": [ - { - "identifier": "org/repo::path::Method", - "shortSummary": 'Returns "value" & more', - } - ], - }, - ], - } - - parsed = _build_relationships_dict(data) - assert parsed["sourceIdentifier"] == "org/repo::path::Class" - assert parsed["relationships"][0]["items"][0]["identifier"] == "org/repo::path::Method" - assert parsed["relationships"][0]["items"][0]["shortSummary"] == 'Returns "value" & more' - - def test_profile_mapped_back_to_mcp_name(self): - """Backend profile enum names are mapped back to MCP-friendly names.""" - for mcp_name, api_name in PROFILE_MAP.items(): - data = { - "sourceIdentifier": "id", - "profile": api_name, - "found": False, - "relationships": [], - } - parsed = _build_relationships_dict(data) - assert parsed["profile"] == mcp_name - - -class TestGetArtifactRelationshipsTool: - """Test the async tool function.""" - - @pytest.mark.asyncio - @patch("tools.artifact_relationships.get_api_key_from_context") - async def test_default_profile_sends_calls_only(self, mock_get_api_key): - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.debug = AsyncMock() - ctx.error = AsyncMock() - - mock_response = MagicMock() - mock_response.json.return_value = { - "sourceIdentifier": "org/repo::path::Symbol", - "profile": "CallsOnly", - "found": True, - "relationships": [], - } - mock_response.raise_for_status = MagicMock() - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_context = MagicMock() - mock_context.client = mock_client - mock_context.base_url = "https://app.codealive.ai" - ctx.request_context.lifespan_context = mock_context - - result = await get_artifact_relationships( - ctx=ctx, - identifier="org/repo::path::Symbol", - ) - - # Verify the API was called with CallsOnly profile - call_args = mock_client.post.call_args - assert call_args[1]["json"]["profile"] == "CallsOnly" - assert result["found"] is True - - @pytest.mark.asyncio - @patch("tools.artifact_relationships.get_api_key_from_context") - async def test_explicit_profile_maps_correctly(self, mock_get_api_key): - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.debug = AsyncMock() - ctx.error = AsyncMock() - - mock_response = MagicMock() - mock_response.json.return_value = { - "sourceIdentifier": "id", - "profile": "InheritanceOnly", - "found": True, - "relationships": [], - } - mock_response.raise_for_status = MagicMock() - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_context = MagicMock() - mock_context.client = mock_client - mock_context.base_url = "https://app.codealive.ai" - ctx.request_context.lifespan_context = mock_context - - await get_artifact_relationships( - ctx=ctx, - identifier="id", - profile="inheritanceOnly", - ) - - call_args = mock_client.post.call_args - assert call_args[1]["json"]["profile"] == "InheritanceOnly" - # No data_source supplied => omitted from the body. - assert "dataSource" not in call_args[1]["json"] - - @pytest.mark.asyncio - @patch("tools.artifact_relationships.get_api_key_from_context") - async def test_forwards_data_source(self, mock_get_api_key): - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.debug = AsyncMock() - ctx.error = AsyncMock() - - mock_response = MagicMock() - mock_response.json.return_value = { - "sourceIdentifier": "id", - "profile": "CallsOnly", - "found": True, - "relationships": [], - } - mock_response.raise_for_status = MagicMock() - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_context = MagicMock() - mock_context.client = mock_client - mock_context.base_url = "https://app.codealive.ai" - ctx.request_context.lifespan_context = mock_context - - await get_artifact_relationships( - ctx=ctx, - identifier="id", - data_source="backend", - ) - - assert mock_client.post.call_args[1]["json"]["dataSource"] == "backend" - - @pytest.mark.asyncio - @patch("tools.artifact_relationships.get_api_key_from_context") - async def test_whitespace_data_source_omitted(self, mock_get_api_key): - """A whitespace-only data_source normalizes to None: not sent to the backend - and not echoed in the not-found hint (preserves the 409-on-ambiguity fallback).""" - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.debug = AsyncMock() - ctx.error = AsyncMock() - - mock_response = MagicMock() - mock_response.json.return_value = { - "sourceIdentifier": "id", - "profile": "CallsOnly", - "found": False, - } - mock_response.raise_for_status = MagicMock() - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_context = MagicMock() - mock_context.client = mock_client - mock_context.base_url = "https://app.codealive.ai" - ctx.request_context.lifespan_context = mock_context - - result = await get_artifact_relationships( - ctx=ctx, - identifier="id", - data_source=" ", - ) - - assert "dataSource" not in mock_client.post.call_args[1]["json"] - # The confusing `... in data source " "` hint must not appear. - assert '" "' not in result["hint"] - - @pytest.mark.asyncio - @patch("tools.artifact_relationships.get_api_key_from_context") - async def test_ambiguous_409_surfaces_candidate_data_sources(self, mock_get_api_key): - import httpx - - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.debug = AsyncMock() - ctx.error = AsyncMock() - - mock_response = MagicMock() - mock_response.status_code = 409 - mock_response.text = ( - '{"detail": "Identifier matches 2 data sources: ' - "Name='backend' Id='ds-main', Name='backend-legacy' Id='ds-master'\"}" - ) - mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( - "Conflict", request=MagicMock(), response=mock_response - ) - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_context = MagicMock() - mock_context.client = mock_client - mock_context.base_url = "https://app.codealive.ai" - ctx.request_context.lifespan_context = mock_context - - with pytest.raises(ToolError) as exc: - await get_artifact_relationships(ctx=ctx, identifier="org/repo::path::Symbol") - - message = str(exc.value) - assert "409" in message - # The candidate data sources from the backend 409 must be surfaced, plus the data_source retry hint. - assert "backend" in message and "backend-legacy" in message - assert "data_source" in message - - @pytest.mark.asyncio - async def test_empty_identifier_raises_tool_error(self): - ctx = MagicMock(spec=Context) - with pytest.raises(ToolError, match="required"): - await get_artifact_relationships(ctx=ctx, identifier="") - - @pytest.mark.asyncio - async def test_unsupported_profile_raises_tool_error(self): - ctx = MagicMock(spec=Context) - with pytest.raises(ToolError, match="Unsupported profile"): - await get_artifact_relationships( - ctx=ctx, identifier="id", profile="invalidProfile" - ) - - @pytest.mark.asyncio - @patch("tools.artifact_relationships.get_api_key_from_context") - async def test_api_error_returns_error_json(self, mock_get_api_key): - import httpx - - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.debug = AsyncMock() - ctx.error = AsyncMock() - - mock_response = MagicMock() - mock_response.status_code = 401 - mock_response.text = "Unauthorized" - mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( - "Unauthorized", request=MagicMock(), response=mock_response - ) - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_context = MagicMock() - mock_context.client = mock_client - mock_context.base_url = "https://app.codealive.ai" - ctx.request_context.lifespan_context = mock_context - - with pytest.raises(ToolError, match="401"): - await get_artifact_relationships(ctx=ctx, identifier="id") - - @pytest.mark.asyncio - @patch("tools.artifact_relationships.get_api_key_from_context") - async def test_not_found_response_renders_correctly(self, mock_get_api_key): - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.debug = AsyncMock() - ctx.error = AsyncMock() - - mock_response = MagicMock() - mock_response.json.return_value = { - "sourceIdentifier": "org/repo::path::Missing", - "profile": "CallsOnly", - "found": False, - "relationships": [], - } - mock_response.raise_for_status = MagicMock() - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_context = MagicMock() - mock_context.client = mock_client - mock_context.base_url = "https://app.codealive.ai" - ctx.request_context.lifespan_context = mock_context - - data = await get_artifact_relationships(ctx=ctx, identifier="org/repo::path::Missing") - - assert data["found"] is False - assert "relationships" not in data - - def test_not_found_hint_with_data_source_suggests_retry_or_omit(self): - payload = _build_relationships_dict( - {"sourceIdentifier": "org/repo::path::S", "profile": "CallsOnly", "found": False}, - data_source="backend", - ) - hint = payload["hint"] - assert "backend" in hint - assert "data_source" in hint - assert "omit" in hint.lower() - - def test_not_found_hint_without_data_source_is_generic(self): - payload = _build_relationships_dict( - {"sourceIdentifier": "org/repo::path::S", "profile": "CallsOnly", "found": False}, - ) - hint = payload["hint"] - assert "data_source" not in hint - assert "fresh identifier" in hint diff --git a/src/tests/test_chat_tool.py b/src/tests/test_chat_tool.py deleted file mode 100644 index 50c6f30..0000000 --- a/src/tests/test_chat_tool.py +++ /dev/null @@ -1,306 +0,0 @@ -"""Test suite for chat tool and legacy consultant alias.""" - -import pytest -from unittest.mock import AsyncMock, MagicMock, patch -import json -from fastmcp import Context -from fastmcp.exceptions import ToolError -from tools.chat import chat, codebase_consultant - - -@pytest.mark.asyncio -@patch('tools.chat.get_api_key_from_context') -async def test_chat_with_simple_names(mock_get_api_key): - """Test chat with simple string names.""" - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.info = AsyncMock() - ctx.warning = AsyncMock() - ctx.error = AsyncMock() - - # Mock streaming response - mock_response = MagicMock() - mock_response.raise_for_status = MagicMock() - - # Simulate SSE streaming response - async def mock_aiter_lines(): - yield 'data: {"choices":[{"delta":{"content":"Hello"}}]}' - yield 'data: {"choices":[{"delta":{"content":" world"}}]}' - yield 'data: [DONE]' - - mock_response.aiter_lines = mock_aiter_lines - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_codealive_context = MagicMock() - mock_codealive_context.client = mock_client - mock_codealive_context.base_url = "https://app.codealive.ai" - - ctx.request_context.lifespan_context = mock_codealive_context - - # Test with simple string names - result = await chat( - ctx=ctx, - question="Test question", - data_sources=["repo123", "repo456"] - ) - - # Verify the API was called with correct format - call_args = mock_client.post.call_args - request_data = call_args.kwargs["json"] - - # Should convert simple names to the backend names array - assert request_data["names"] == [ - "repo123", - "repo456" - ] - - assert result == "Hello world" - assert call_args.kwargs["headers"]["Accept"] == "text/event-stream, application/problem+json" - assert call_args.kwargs["headers"]["X-CodeAlive-Tool"] == "chat" - - -@pytest.mark.asyncio -@patch('tools.chat.get_api_key_from_context') -async def test_consultant_alias_preserves_string_names(mock_get_api_key): - """Test deprecated consultant alias preserves behavior.""" - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.info = AsyncMock() - ctx.warning = AsyncMock() - ctx.error = AsyncMock() - - mock_response = MagicMock() - mock_response.raise_for_status = MagicMock() - - async def mock_aiter_lines(): - yield 'data: {"choices":[{"delta":{"content":"Response"}}]}' - yield 'data: [DONE]' - - mock_response.aiter_lines = mock_aiter_lines - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_codealive_context = MagicMock() - mock_codealive_context.client = mock_client - mock_codealive_context.base_url = "https://app.codealive.ai" - - ctx.request_context.lifespan_context = mock_codealive_context - - # Test with string names - result = await codebase_consultant( - ctx=ctx, - question="Test", - data_sources=["repo123", "repo456"] - ) - - call_args = mock_client.post.call_args - request_data = call_args.kwargs["json"] - - # Should extract just the normalized names - assert request_data["names"] == [ - "repo123", - "repo456" - ] - - assert result == "Response" - - -@pytest.mark.asyncio -@patch('tools.chat.get_api_key_from_context') -async def test_chat_with_conversation_id(mock_get_api_key): - """Test chat with existing conversation ID.""" - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.info = AsyncMock() - - mock_response = MagicMock() - mock_response.raise_for_status = MagicMock() - - async def mock_aiter_lines(): - yield 'data: {"choices":[{"delta":{"content":"Continued"}}]}' - yield 'data: [DONE]' - - mock_response.aiter_lines = mock_aiter_lines - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_codealive_context = MagicMock() - mock_codealive_context.client = mock_client - mock_codealive_context.base_url = "https://app.codealive.ai" - - ctx.request_context.lifespan_context = mock_codealive_context - - result = await chat( - ctx=ctx, - question="Follow up", - conversation_id="69fceb3e7b2a6a7efdd18180" - ) - - call_args = mock_client.post.call_args - request_data = call_args.kwargs["json"] - - # Should include conversation ID - assert request_data["conversationId"] == "69fceb3e7b2a6a7efdd18180" - # Should not have explicit names when continuing conversation - assert "names" not in request_data - assert result == "Continued" - - -@pytest.mark.asyncio -@patch('tools.chat.get_api_key_from_context') -async def test_chat_rejects_non_objectid_conversation_id(mock_get_api_key): - """Invalid continuation IDs fail locally with an actionable ToolError.""" - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.info = AsyncMock() - ctx.warning = AsyncMock() - ctx.error = AsyncMock() - - with pytest.raises(ToolError) as exc: - await chat( - ctx=ctx, - question="Follow up", - conversation_id="conv_123", - ) - - msg = str(exc.value) - assert "24-character hex Mongo ObjectId" in msg - assert "Retry: no" in msg - - -@pytest.mark.asyncio -@patch('tools.chat.get_api_key_from_context') -async def test_chat_named_sse_error_raises_tool_error(mock_get_api_key): - """RFC 9457 `event: error` frames must not collapse to an empty answer.""" - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.info = AsyncMock() - ctx.warning = AsyncMock() - ctx.error = AsyncMock() - - mock_response = MagicMock() - mock_response.raise_for_status = MagicMock() - - async def mock_aiter_lines(): - yield 'event: error' - yield 'data: {"title":"Bad request","status":400,"detail":"Message content violates our content policy","requestId":"req-1"}' - yield '' - - mock_response.aiter_lines = mock_aiter_lines - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_codealive_context = MagicMock() - mock_codealive_context.client = mock_client - mock_codealive_context.base_url = "https://app.codealive.ai" - - ctx.request_context.lifespan_context = mock_codealive_context - - with pytest.raises(ToolError) as exc: - await chat(ctx=ctx, question="Test question", data_sources=["repo123"]) - - msg = str(exc.value) - assert "Message content violates our content policy" in msg - assert "Code: 400" in msg - assert "Retry: no" in msg - assert "requestId=req-1" in msg - - -@pytest.mark.asyncio -@patch('tools.chat.get_api_key_from_context') -async def test_chat_named_sse_rate_limit_error_is_retryable(mock_get_api_key): - """429 ProblemDetails frames should tell agents to back off, not fix input.""" - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.info = AsyncMock() - ctx.warning = AsyncMock() - ctx.error = AsyncMock() - - mock_response = MagicMock() - mock_response.raise_for_status = MagicMock() - - async def mock_aiter_lines(): - yield 'event: error' - yield 'data: {"title":"Plan limit","status":429,"detail":"Chat completion rate limit exceeded","requestId":"req-429"}' - yield '' - - mock_response.aiter_lines = mock_aiter_lines - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_codealive_context = MagicMock() - mock_codealive_context.client = mock_client - mock_codealive_context.base_url = "https://app.codealive.ai" - - ctx.request_context.lifespan_context = mock_codealive_context - - with pytest.raises(ToolError) as exc: - await chat(ctx=ctx, question="Test question", data_sources=["repo123"]) - - msg = str(exc.value) - assert "Chat completion rate limit exceeded" in msg - assert "Retry: yes" in msg - assert "back off" in msg - assert "requestId=req-429" in msg - - -@pytest.mark.asyncio -@patch('tools.chat.get_api_key_from_context') -async def test_chat_empty_question_validation(mock_get_api_key): - """Test validation of empty question.""" - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.request_context.lifespan_context = MagicMock() - - # Test with empty question - with pytest.raises(ToolError, match="No question provided"): - await chat(ctx=ctx, question="") - - # Test with whitespace only - with pytest.raises(ToolError, match="No question provided"): - await chat(ctx=ctx, question=" ") - - - - -@pytest.mark.asyncio -@patch('tools.chat.get_api_key_from_context') -@patch('tools.chat.handle_api_error') -async def test_chat_error_handling(mock_handle_error, mock_get_api_key): - """Test error handling in chat — handle_api_error raises ToolError.""" - mock_get_api_key.return_value = "test_key" - mock_handle_error.side_effect = ToolError("Error: Authentication failed") - - ctx = MagicMock(spec=Context) - ctx.info = AsyncMock() - - mock_client = AsyncMock() - mock_client.post.side_effect = Exception("Network error") - - mock_codealive_context = MagicMock() - mock_codealive_context.client = mock_client - mock_codealive_context.base_url = "https://app.codealive.ai" - - ctx.request_context.lifespan_context = mock_codealive_context - - with pytest.raises(ToolError, match="Authentication failed"): - await chat( - ctx=ctx, - question="Test", - data_sources=["repo123"] - ) - - mock_handle_error.assert_called_once() diff --git a/src/tests/test_datasources.py b/src/tests/test_datasources.py deleted file mode 100644 index c79b640..0000000 --- a/src/tests/test_datasources.py +++ /dev/null @@ -1,356 +0,0 @@ -"""Tests for data sources tool.""" - -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest -from fastmcp import Context - -from tools.datasources import get_data_sources - - -@pytest.mark.asyncio -@patch('tools.datasources.get_api_key_from_context') -async def test_get_data_sources_removes_repository_ids_from_workspaces(mock_get_api_key): - """Test that repositoryIds are removed from workspace data sources.""" - mock_get_api_key.return_value = "test-key" - - # Mock context - mock_ctx = MagicMock(spec=Context) - mock_ctx.info = AsyncMock() - mock_ctx.warning = AsyncMock() - mock_ctx.error = AsyncMock() - - mock_lifespan_context = MagicMock() - mock_lifespan_context.base_url = "https://api.example.com" - - # Mock client with response containing workspaces with repositoryIds - mock_response = MagicMock() - mock_response.json.return_value = [ - { - "id": "repo-1", - "name": "Test Repository", - "type": "Repository", - "url": "https://github.com/example/repo", - "state": "Alive" - }, - { - "id": "workspace-1", - "name": "Test Workspace", - "type": "Workspace", - "repositoryIds": ["repo-1", "repo-2", "repo-3"], - "state": "Alive" - } - ] - mock_response.raise_for_status = MagicMock() - - mock_client = AsyncMock() - mock_client.get = AsyncMock(return_value=mock_response) - mock_lifespan_context.client = mock_client - - mock_ctx.request_context.lifespan_context = mock_lifespan_context - - # Tool returns a dict {"dataSources":[...], "hint":"..."}. - result = await get_data_sources(mock_ctx, alive_only=True) - data_sources = result["dataSources"] - assert "hint" in result - - # Verify repository still has all fields - repo = next(ds for ds in data_sources if ds["type"] == "Repository") - assert repo["id"] == "repo-1" - assert repo["name"] == "Test Repository" - assert repo["url"] == "https://github.com/example/repo" - assert "repositoryIds" not in repo - - # Verify workspace has repositoryIds removed - workspace = next(ds for ds in data_sources if ds["type"] == "Workspace") - assert workspace["id"] == "workspace-1" - assert workspace["name"] == "Test Workspace" - assert "repositoryIds" not in workspace, "repositoryIds should be removed from workspace" - - # Verify API was called correctly. Headers include CodeAlive integration - # markers added on every request, so assert on the relevant subset. - mock_client.get.assert_called_once() - call_args = mock_client.get.call_args - assert call_args.args[0] == "/api/datasources/ready" - headers = call_args.kwargs["headers"] - assert headers["Authorization"] == "Bearer test-key" - assert headers["X-CodeAlive-Tool"] == "get_data_sources" - assert headers["X-CodeAlive-Integration"] == "mcp" - - -@pytest.mark.asyncio -@patch('tools.datasources.get_api_key_from_context') -async def test_get_data_sources_preserves_other_workspace_fields(mock_get_api_key): - """Test that other workspace fields are preserved when removing repositoryIds.""" - mock_get_api_key.return_value = "test-key" - - # Mock context - mock_ctx = MagicMock(spec=Context) - mock_ctx.info = AsyncMock() - mock_ctx.warning = AsyncMock() - mock_ctx.error = AsyncMock() - - mock_lifespan_context = MagicMock() - mock_lifespan_context.base_url = "https://api.example.com" - - # Mock client with workspace containing various fields - mock_response = MagicMock() - mock_response.json.return_value = [ - { - "id": "workspace-1", - "name": "Test Workspace", - "type": "Workspace", - "state": "Alive", - "repositoryIds": ["repo-1", "repo-2"], - "customField": "custom-value", - "createdAt": "2025-01-01T00:00:00Z" - } - ] - mock_response.raise_for_status = MagicMock() - - mock_client = AsyncMock() - mock_client.get = AsyncMock(return_value=mock_response) - mock_lifespan_context.client = mock_client - - mock_ctx.request_context.lifespan_context = mock_lifespan_context - - result = await get_data_sources(mock_ctx, alive_only=True) - data_sources = result["dataSources"] - - workspace = data_sources[0] - - # Verify repositoryIds removed but other fields preserved - assert "repositoryIds" not in workspace - assert workspace["id"] == "workspace-1" - assert workspace["name"] == "Test Workspace" - assert workspace["type"] == "Workspace" - assert workspace["state"] == "Alive" - assert workspace["customField"] == "custom-value" - assert workspace["createdAt"] == "2025-01-01T00:00:00Z" - - -@pytest.mark.asyncio -@patch('tools.datasources.get_api_key_from_context') -async def test_get_data_sources_handles_missing_repository_ids(mock_get_api_key): - """Test that function handles workspaces without repositoryIds field.""" - mock_get_api_key.return_value = "test-key" - - # Mock context - mock_ctx = MagicMock(spec=Context) - mock_ctx.info = AsyncMock() - mock_ctx.warning = AsyncMock() - mock_ctx.error = AsyncMock() - - mock_lifespan_context = MagicMock() - mock_lifespan_context.base_url = "https://api.example.com" - - # Mock client with workspace without repositoryIds - mock_response = MagicMock() - mock_response.json.return_value = [ - { - "id": "workspace-1", - "name": "Test Workspace", - "type": "Workspace", - "state": "Alive" - } - ] - mock_response.raise_for_status = MagicMock() - - mock_client = AsyncMock() - mock_client.get = AsyncMock(return_value=mock_response) - mock_lifespan_context.client = mock_client - - mock_ctx.request_context.lifespan_context = mock_lifespan_context - - # Should not raise an error - result = await get_data_sources(mock_ctx, alive_only=True) - data_sources = result["dataSources"] - - # Verify workspace is intact - workspace = data_sources[0] - assert workspace["id"] == "workspace-1" - assert workspace["name"] == "Test Workspace" - assert "repositoryIds" not in workspace - - -def _ctx_with_response(json_return, headers=None): - """Builds a mocked Context whose client.get returns a response with the given JSON body.""" - mock_ctx = MagicMock(spec=Context) - mock_ctx.info = AsyncMock() - mock_ctx.warning = AsyncMock() - mock_ctx.error = AsyncMock() - - mock_response = MagicMock() - mock_response.json.return_value = json_return - mock_response.headers = headers or {} - mock_response.raise_for_status = MagicMock() - - mock_client = AsyncMock() - mock_client.get = AsyncMock(return_value=mock_response) - - mock_lifespan_context = MagicMock() - mock_lifespan_context.base_url = "https://api.example.com" - mock_lifespan_context.client = mock_client - mock_ctx.request_context.lifespan_context = mock_lifespan_context - return mock_ctx, mock_client - - -@pytest.mark.asyncio -@patch('tools.datasources.get_api_key_from_context') -async def test_get_data_sources_with_query_passes_query_param(mock_get_api_key): - """When a query is supplied, it is forwarded to the listing endpoint as the `query` param.""" - mock_get_api_key.return_value = "test-key" - mock_ctx, mock_client = _ctx_with_response([ - {"id": "repo-1", "name": "Repo", "type": "Repository", "relevanceReason": "handles OAuth"}, - ]) - - await get_data_sources(mock_ctx, alive_only=True, query="add OAuth to checkout") - - call_args = mock_client.get.call_args - assert call_args.args[0] == "/api/datasources/ready" - assert call_args.kwargs["params"] == {"query": "add OAuth to checkout"} - - -@pytest.mark.asyncio -@patch('tools.datasources.get_api_key_from_context') -async def test_get_data_sources_without_query_sends_no_query_param(mock_get_api_key): - """Without a query, no `query` param is sent (legacy behavior unchanged).""" - mock_get_api_key.return_value = "test-key" - mock_ctx, mock_client = _ctx_with_response([ - {"id": "repo-1", "name": "Repo", "type": "Repository"}, - ]) - - await get_data_sources(mock_ctx, alive_only=True) - - call_args = mock_client.get.call_args - assert call_args.kwargs.get("params") is None - - -@pytest.mark.asyncio -@patch('tools.datasources.get_api_key_from_context') -async def test_get_data_sources_surfaces_relevance_reason(mock_get_api_key): - """relevanceReason is preserved per item for the client (wrapped shape when query is set).""" - mock_get_api_key.return_value = "test-key" - mock_ctx, _ = _ctx_with_response([ - {"id": "repo-1", "name": "Repo", "type": "Repository", "relevanceReason": "implements the checkout flow"}, - ]) - - result = await get_data_sources(mock_ctx, alive_only=True, query="checkout") - - payload = result - assert payload["dataSources"][0]["relevanceReason"] == "implements the checkout flow" - - -@pytest.mark.asyncio -@patch('tools.datasources.get_api_key_from_context') -async def test_get_data_sources_filtered_hint_reports_total_and_omitted(mock_get_api_key): - """Filtered success surfaces how many sources exist beyond the shown subset and how to get them.""" - mock_get_api_key.return_value = "test-key" - mock_ctx, _ = _ctx_with_response( - [{"id": "repo-1", "name": "Repo", "type": "Repository", "relevanceReason": "checkout flow"}], - headers={"X-CodeAlive-Total-Data-Sources": "25"}, - ) - - result = await get_data_sources(mock_ctx, alive_only=True, query="checkout") - - payload = result - assert len(payload["dataSources"]) == 1 - assert "1 of 25" in payload["message"] - assert "omitted" in payload["message"].lower() - assert "without a query" in payload["message"].lower() - - -@pytest.mark.asyncio -@patch('tools.datasources.get_api_key_from_context') -async def test_get_data_sources_filtered_hint_without_total_header(mock_get_api_key): - """Filtered success without the total header still hints that sources were omitted.""" - mock_get_api_key.return_value = "test-key" - mock_ctx, _ = _ctx_with_response( - [{"id": "repo-1", "name": "Repo", "type": "Repository", "relevanceReason": "checkout flow"}], - ) - - result = await get_data_sources(mock_ctx, alive_only=True, query="checkout") - - payload = result - assert "omitted" in payload["message"].lower() - assert "without a query" in payload["message"].lower() - - -@pytest.mark.asyncio -@patch('tools.datasources.get_api_key_from_context') -async def test_get_data_sources_filtered_hint_with_malformed_total_header(mock_get_api_key): - """A malformed total header is treated as absent rather than raising.""" - mock_get_api_key.return_value = "test-key" - mock_ctx, _ = _ctx_with_response( - [{"id": "repo-1", "name": "Repo", "type": "Repository", "relevanceReason": "checkout flow"}], - headers={"X-CodeAlive-Total-Data-Sources": "not-a-number"}, - ) - - result = await get_data_sources(mock_ctx, alive_only=True, query="checkout") - - payload = result - assert "omitted" in payload["message"].lower() - assert "without a query" in payload["message"].lower() - - -@pytest.mark.asyncio -@patch('tools.datasources.get_api_key_from_context') -async def test_get_data_sources_all_relevant_hint_reports_no_omission(mock_get_api_key): - """When every available source is relevant, the hint says so instead of claiming omissions.""" - mock_get_api_key.return_value = "test-key" - mock_ctx, _ = _ctx_with_response( - [{"id": "repo-1", "name": "Repo", "type": "Repository", "relevanceReason": "checkout flow"}], - headers={"X-CodeAlive-Total-Data-Sources": "1"}, - ) - - result = await get_data_sources(mock_ctx, alive_only=True, query="checkout") - - payload = result - assert "all 1" in payload["message"].lower() - assert "omitted" not in payload["message"].lower() - - -@pytest.mark.asyncio -@patch('tools.datasources.get_api_key_from_context') -async def test_get_data_sources_failopen_hint_when_no_reasons_present(mock_get_api_key): - """Query supplied but no item carries relevanceReason → the filter did not run (fail-open, - disabled, or an older backend); the hint must say the FULL list is returned.""" - mock_get_api_key.return_value = "test-key" - mock_ctx, _ = _ctx_with_response([ - {"id": "repo-1", "name": "Repo", "type": "Repository"}, - {"id": "repo-2", "name": "Other", "type": "Repository"}, - ]) - - result = await get_data_sources(mock_ctx, alive_only=True, query="checkout") - - payload = result - assert len(payload["dataSources"]) == 2 - assert "unavailable" in payload["message"].lower() - assert "full" in payload["message"].lower() - - -@pytest.mark.asyncio -@patch('tools.datasources.get_api_key_from_context') -async def test_get_data_sources_empty_with_query_returns_no_relevant_hint(mock_get_api_key): - """Empty result WITH a query returns a 'no relevant' hint, not 'add a repository'.""" - mock_get_api_key.return_value = "test-key" - mock_ctx, _ = _ctx_with_response([]) - - result = await get_data_sources(mock_ctx, alive_only=True, query="something unrelated") - - assert result["dataSources"] == [] - assert "relevant" in result["hint"].lower() - assert "add a repository" not in result["hint"].lower() - - -@pytest.mark.asyncio -@patch('tools.datasources.get_api_key_from_context') -async def test_get_data_sources_empty_without_query_keeps_add_repository_hint(mock_get_api_key): - """Empty result WITHOUT a query keeps the existing 'add a repository' hint.""" - mock_get_api_key.return_value = "test-key" - mock_ctx, _ = _ctx_with_response([]) - - result = await get_data_sources(mock_ctx, alive_only=True) - - assert result["dataSources"] == [] - assert "add a repository" in result["hint"].lower() \ No newline at end of file diff --git a/src/tests/test_e2e_tools.py b/src/tests/test_e2e_tools.py deleted file mode 100644 index 6f1cb6a..0000000 --- a/src/tests/test_e2e_tools.py +++ /dev/null @@ -1,1604 +0,0 @@ -"""End-to-end tests for MCP tools using FastMCP's built-in Client. - -Each test builds a FastMCP server with the real tool functions, a custom -lifespan backed by httpx.MockTransport, and exercises the tool through -the in-memory MCP transport — covering argument validation, HTTP -call dispatch, response parsing, and XML/text formatting in a single pass. -""" - -import json -import sys -from contextlib import asynccontextmanager -from pathlib import Path -from typing import AsyncIterator - -import httpx -import pytest -from fastmcp import Client, FastMCP -from loguru import logger - -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from core import CodeAliveContext -from middleware.observability_middleware import ObservabilityMiddleware -from tools import ( - chat, - codebase_consultant, - codebase_search, - fetch_artifacts, - grep_search, - get_artifact_relationships, - get_data_sources, - semantic_search, -) - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -def _mock_transport(routes: dict) -> httpx.MockTransport: - """httpx MockTransport dispatching by URL path. - - ``routes`` maps a URL path (e.g. "/api/search") to a callable - ``(httpx.Request) -> httpx.Response``. - """ - def handler(request: httpx.Request) -> httpx.Response: - for path, responder in routes.items(): - if request.url.path == path: - return responder(request) - return httpx.Response(404, json={"error": f"no mock for {request.url.path}"}) - return httpx.MockTransport(handler) - - -def _server(routes: dict) -> FastMCP: - """Build a FastMCP instance wired to mock HTTP routes.""" - - @asynccontextmanager - async def lifespan(server: FastMCP) -> AsyncIterator[CodeAliveContext]: - transport = _mock_transport(routes) - async with httpx.AsyncClient( - transport=transport, base_url="https://test.codealive.ai" - ) as client: - yield CodeAliveContext( - client=client, - api_key="", - base_url="https://test.codealive.ai", - ) - - mcp = FastMCP("E2E Test Server", lifespan=lifespan) - mcp.tool()(get_data_sources) - mcp.tool()(codebase_search) - mcp.tool()(semantic_search) - mcp.tool()(grep_search) - mcp.tool()(fetch_artifacts) - mcp.tool()(chat) - mcp.tool()(codebase_consultant) - mcp.tool()(get_artifact_relationships) - return mcp - - -def _text(result) -> str: - """Extract first text block from a CallToolResult.""" - return result.content[0].text - - -# --------------------------------------------------------------------------- -# Fixtures -# --------------------------------------------------------------------------- - -@pytest.fixture(autouse=True) -def _api_key_env(monkeypatch): - """Provide CODEALIVE_API_KEY so get_api_key_from_context falls back to it.""" - monkeypatch.setenv("CODEALIVE_API_KEY", "test-e2e-key") - - -# --------------------------------------------------------------------------- -# get_data_sources -# --------------------------------------------------------------------------- - -class TestGetDataSourcesE2E: - @pytest.mark.asyncio - async def test_returns_compact_json(self): - payload = [ - {"id": "r1", "name": "backend", "type": "Repository", "url": "https://github.com/org/backend"}, - {"id": "w1", "name": "platform", "type": "Workspace", "repositoryIds": ["r1", "r2"]}, - ] - - def handler(req): - assert req.headers["authorization"] == "Bearer test-e2e-key" - return httpx.Response(200, json=payload) - - mcp = _server({"/api/datasources/ready": handler}) - async with Client(mcp) as client: - result = await client.call_tool("get_data_sources", {}) - - text = _text(result) - data = json.loads(text) - # Compact JSON, UTF-8 preserved (FastMCP uses pydantic_core.to_json). - assert text == json.dumps(data, separators=(",", ":"), ensure_ascii=False) - names = [ds["name"] for ds in data["dataSources"]] - assert "backend" in names - assert "platform" in names - # repositoryIds must be stripped from workspaces - for ds in data["dataSources"]: - assert "repositoryIds" not in ds - # Always emit a follow-up hint pointing at search/chat tools. - assert "semantic_search" in data["hint"] - - @pytest.mark.asyncio - async def test_empty_list_returns_recovery_hint(self): - mcp = _server({"/api/datasources/ready": lambda r: httpx.Response(200, json=[])}) - async with Client(mcp) as client: - result = await client.call_tool("get_data_sources", {}) - - text = _text(result) - data = json.loads(text) - assert data["dataSources"] == [] - assert "No data sources found" in data["hint"] - - @pytest.mark.asyncio - async def test_unicode_preserved_in_response(self): - """Cyrillic in name/description must survive as UTF-8, not \\uXXXX.""" - payload = [ - {"id": "r1", "name": "кирилл-репо", "type": "Repository", - "description": "Описание про принтеры HPRT"}, - ] - - mcp = _server({"/api/datasources/ready": lambda r: httpx.Response(200, json=payload)}) - async with Client(mcp) as client: - result = await client.call_tool("get_data_sources", {}) - - text = _text(result) - # Round-trip via ensure_ascii=False — ASCII-escaped output would mismatch. - assert text == json.dumps(json.loads(text), separators=(",", ":"), ensure_ascii=False) - data = json.loads(text) - assert data["dataSources"][0]["name"] == "кирилл-репо" - assert data["dataSources"][0]["description"] == "Описание про принтеры HPRT" - assert "\\u04" not in text - - @pytest.mark.asyncio - async def test_alive_only_false_hits_all_endpoint(self): - hit = [] - - def handler_all(req): - hit.append("all") - return httpx.Response(200, json=[{"id": "1", "name": "r", "type": "Repository"}]) - - mcp = _server({ - "/api/datasources/all": handler_all, - "/api/datasources/ready": lambda r: httpx.Response(200, json=[]), - }) - async with Client(mcp) as client: - await client.call_tool("get_data_sources", {"alive_only": False}) - - assert "all" in hit - - @pytest.mark.asyncio - async def test_backend_500_returns_error(self): - mcp = _server({ - "/api/datasources/ready": lambda r: httpx.Response(500, text="boom"), - }) - async with Client(mcp) as client: - result = await client.call_tool("get_data_sources", {}, raise_on_error=False) - - text = _text(result) - assert result.is_error - assert "500" in text or "Server error" in text - - -# --------------------------------------------------------------------------- -# codebase_search -# --------------------------------------------------------------------------- - -class TestCodebaseSearchE2E: - _SEARCH_RESPONSE = { - "results": [ - { - "identifier": "org/repo::src/auth.py::AuthService", - "kind": "Class", - "description": "Handles authentication", - "contentByteSize": 4200, - "location": { - "path": "src/auth.py", - "range": {"start": {"line": 10}, "end": {"line": 85}}, - }, - } - ] - } - - @pytest.mark.asyncio - async def test_success_returns_compact_json(self): - def handler(req): - assert req.url.params.get("Query") == "auth service" - assert req.url.params.get("Mode") == "auto" - assert "X-CodeAlive-Tool" in req.headers - return httpx.Response(200, json=self._SEARCH_RESPONSE) - - mcp = _server({"/api/search": handler}) - async with Client(mcp) as client: - result = await client.call_tool( - "codebase_search", - {"query": "auth service", "data_sources": ["backend"]}, - ) - - text = _text(result) - data = json.loads(text) - # Compact JSON with Unicode preserved (pydantic_core.to_json keeps UTF-8) - assert text == json.dumps(data, separators=(",", ":"), ensure_ascii=False) - assert data["results"][0]["path"] == "src/auth.py" - assert "AuthService" in data["results"][0]["identifier"] - # Hint must always be present and instruct the agent to fetch real content - assert "fetch_artifacts" in data["hint"] - - @pytest.mark.asyncio - async def test_empty_query_returns_error(self): - mcp = _server({}) - async with Client(mcp) as client: - result = await client.call_tool( - "codebase_search", {"query": ""}, - raise_on_error=False, - ) - - text = _text(result) - assert result.is_error - assert "empty" in text.lower() or "Query cannot be empty" in text - - @pytest.mark.asyncio - async def test_no_results_returns_empty_json(self): - mcp = _server({ - "/api/search": lambda r: httpx.Response(200, json={"results": []}), - }) - async with Client(mcp) as client: - result = await client.call_tool( - "codebase_search", {"query": "nonexistent"}, - ) - - text = _text(result) - data = json.loads(text) - assert data["results"] == [] - assert "grep_search" in data["hint"] - assert "get_data_sources" in data["hint"] - assert "fetch_artifacts" not in data["hint"] - - @pytest.mark.asyncio - async def test_deep_mode_forwarded(self): - received_mode = [] - - def handler(req): - received_mode.append(req.url.params.get("Mode")) - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search": handler}) - async with Client(mcp) as client: - await client.call_tool( - "codebase_search", {"query": "x", "mode": "deep"}, - ) - - assert received_mode == ["deep"] - - @pytest.mark.asyncio - async def test_404_returns_not_found_error(self): - mcp = _server({ - "/api/search": lambda r: httpx.Response(404, text="not found"), - }) - async with Client(mcp) as client: - result = await client.call_tool( - "codebase_search", {"query": "x"}, - raise_on_error=False, - ) - - text = _text(result) - assert result.is_error - assert "404" in text or "not found" in text.lower() - - -# --------------------------------------------------------------------------- -# semantic_search -# --------------------------------------------------------------------------- - -class TestSemanticSearchE2E: - @pytest.mark.asyncio - async def test_success_hits_canonical_endpoint(self): - def handler(req): - assert req.url.params.get("Query") == "auth service" - assert req.url.params.get("MaxResults") == "7" - assert req.url.params.get_list("Names") == ["backend"] - assert req.url.params.get_list("Paths") == ["src/auth.py"] - assert req.url.params.get_list("Extensions") == [".py"] - assert req.headers["X-CodeAlive-Tool"] == "semantic_search" - return httpx.Response( - 200, - json={ - "results": [ - { - "identifier": "org/repo::src/auth.py::AuthService", - "kind": "Class", - "description": "Handles authentication", - "contentByteSize": 4200, - "location": { - "path": "src/auth.py", - "range": {"start": {"line": 10}, "end": {"line": 85}}, - }, - } - ] - }, - ) - - mcp = _server({"/api/search/semantic": handler}) - async with Client(mcp) as client: - result = await client.call_tool( - "semantic_search", - { - "query": "auth service", - "data_sources": ["backend"], - "paths": ["src/auth.py"], - "extensions": [".py"], - "max_results": 7, - }, - ) - - data = json.loads(_text(result)) - assert data["results"][0]["path"] == "src/auth.py" - assert "fetch_artifacts" in data["hint"] - - @pytest.mark.asyncio - async def test_max_results_forwarded(self): - def handler(req): - assert req.url.params.get("MaxResults") == "3" - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/semantic": handler}) - async with Client(mcp) as client: - await client.call_tool( - "semantic_search", - {"query": "test", "data_sources": ["repo"], "max_results": 3}, - ) - - @pytest.mark.asyncio - async def test_max_results_not_sent_when_omitted(self): - def handler(req): - assert "MaxResults" not in dict(req.url.params) - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/semantic": handler}) - async with Client(mcp) as client: - await client.call_tool( - "semantic_search", - {"query": "test", "data_sources": ["repo"]}, - ) - - @pytest.mark.asyncio - async def test_max_results_boundary_0_rejected(self): - mcp = _server({}) - async with Client(mcp) as client: - result = await client.call_tool( - "semantic_search", - {"query": "test", "data_sources": ["repo"], "max_results": 0}, - raise_on_error=False, - ) - assert result.is_error - assert "max_results" in _text(result) - - @pytest.mark.asyncio - async def test_max_results_boundary_501_rejected(self): - mcp = _server({}) - async with Client(mcp) as client: - result = await client.call_tool( - "semantic_search", - {"query": "test", "data_sources": ["repo"], "max_results": 501}, - raise_on_error=False, - ) - assert result.is_error - assert "max_results" in _text(result) - - @pytest.mark.asyncio - async def test_max_results_boundary_500_accepted(self): - def handler(req): - assert req.url.params.get("MaxResults") == "500" - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/semantic": handler}) - async with Client(mcp) as client: - await client.call_tool( - "semantic_search", - {"query": "test", "data_sources": ["repo"], "max_results": 500}, - ) - - @pytest.mark.asyncio - async def test_max_results_boundary_1_accepted(self): - def handler(req): - assert req.url.params.get("MaxResults") == "1" - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/semantic": handler}) - async with Client(mcp) as client: - await client.call_tool( - "semantic_search", - {"query": "test", "data_sources": ["repo"], "max_results": 1}, - ) - - @pytest.mark.asyncio - async def test_extensions_forwarded(self): - def handler(req): - assert req.url.params.get_list("Extensions") == [".cs", ".py"] - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/semantic": handler}) - async with Client(mcp) as client: - await client.call_tool( - "semantic_search", - {"query": "test", "data_sources": ["repo"], "extensions": [".cs", ".py"]}, - ) - - @pytest.mark.asyncio - async def test_paths_forwarded(self): - def handler(req): - assert req.url.params.get_list("Paths") == ["src/services", "src/domain"] - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/semantic": handler}) - async with Client(mcp) as client: - await client.call_tool( - "semantic_search", - {"query": "test", "data_sources": ["repo"], "paths": ["src/services", "src/domain"]}, - ) - - @pytest.mark.asyncio - async def test_multiple_data_sources_forwarded(self): - def handler(req): - assert req.url.params.get_list("Names") == ["repo-a", "repo-b"] - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/semantic": handler}) - async with Client(mcp) as client: - await client.call_tool( - "semantic_search", - {"query": "test", "data_sources": ["repo-a", "repo-b"]}, - ) - - @pytest.mark.asyncio - async def test_all_filters_combined(self): - def handler(req): - assert req.url.params.get("Query") == "pattern" - assert req.url.params.get("MaxResults") == "10" - assert req.url.params.get_list("Names") == ["backend"] - assert req.url.params.get_list("Paths") == ["src/domain"] - assert req.url.params.get_list("Extensions") == [".cs"] - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/semantic": handler}) - async with Client(mcp) as client: - await client.call_tool( - "semantic_search", - { - "query": "pattern", - "data_sources": ["backend"], - "paths": ["src/domain"], - "extensions": [".cs"], - "max_results": 10, - }, - ) - - @pytest.mark.asyncio - async def test_empty_data_sources_omits_names(self): - def handler(req): - assert "Names" not in dict(req.url.params) - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/semantic": handler}) - async with Client(mcp) as client: - await client.call_tool( - "semantic_search", - {"query": "test", "data_sources": []}, - ) - - @pytest.mark.asyncio - async def test_data_sources_as_string_normalized(self): - def handler(req): - assert req.url.params.get_list("Names") == ["my-repo"] - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/semantic": handler}) - async with Client(mcp) as client: - await client.call_tool( - "semantic_search", - {"query": "test", "data_sources": "my-repo"}, - ) - - @pytest.mark.asyncio - async def test_404_includes_recovery_hint(self): - mcp = _server({ - "/api/search/semantic": lambda r: httpx.Response(404, text="not found"), - }) - async with Client(mcp) as client: - result = await client.call_tool( - "semantic_search", - {"query": "test", "data_sources": ["bad-repo"]}, - raise_on_error=False, - ) - text = _text(result) - assert result.is_error - assert "get_data_sources" in text - - @pytest.mark.asyncio - async def test_unicode_preserved_in_response(self): - """Cyrillic in path/description must survive as UTF-8, not \\uXXXX.""" - payload = { - "results": [ - { - "kind": "File", - "identifier": "org/repo::база/file.md::", - "description": "Описание про принтеры HPRT", - "location": {"path": "база/file.md"}, - "contentByteSize": 100, - } - ] - } - - mcp = _server({"/api/search/semantic": lambda r: httpx.Response(200, json=payload)}) - async with Client(mcp) as client: - result = await client.call_tool( - "semantic_search", {"query": "кир", "data_sources": ["repo"]}, - ) - - text = _text(result) - assert text == json.dumps(json.loads(text), separators=(",", ":"), ensure_ascii=False) - assert "база/file.md" in text - assert "\\u04" not in text - - -# --------------------------------------------------------------------------- -# grep_search -# --------------------------------------------------------------------------- - -class TestGrepSearchE2E: - @pytest.mark.asyncio - async def test_success_hits_canonical_endpoint(self): - def handler(req): - assert req.url.params.get("Query") == "auth\\(" - assert req.url.params.get("Regex") == "true" - assert req.headers["X-CodeAlive-Tool"] == "grep_search" - return httpx.Response( - 200, - json={ - "results": [ - { - "identifier": "org/repo::src/auth.py", - "kind": "File", - "matchCount": 2, - "matches": [ - { - "lineNumber": 15, - "startColumn": 5, - "endColumn": 10, - "lineText": "auth(token)", - } - ], - "location": { - "path": "src/auth.py", - "range": {"start": {"line": 15}, "end": {"line": 15}}, - }, - } - ] - }, - ) - - mcp = _server({"/api/search/grep": handler}) - async with Client(mcp) as client: - result = await client.call_tool( - "grep_search", - {"query": "auth\\(", "data_sources": ["backend"], "regex": True}, - ) - - data = json.loads(_text(result)) - assert data["results"][0]["matchCount"] == 2 - assert data["results"][0]["matches"][0]["lineNumber"] == 15 - assert "fetch_artifacts" in data["hint"] or "Read()" in data["hint"] - - @pytest.mark.asyncio - async def test_regex_false_forwarded(self): - def handler(req): - assert req.url.params.get("Regex") == "false" - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/grep": handler}) - async with Client(mcp) as client: - await client.call_tool( - "grep_search", - {"query": "literal string", "data_sources": ["repo"], "regex": False}, - ) - - @pytest.mark.asyncio - async def test_regex_default_is_false(self): - def handler(req): - assert req.url.params.get("Regex") == "false" - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/grep": handler}) - async with Client(mcp) as client: - await client.call_tool( - "grep_search", - {"query": "literal string", "data_sources": ["repo"]}, - ) - - @pytest.mark.asyncio - async def test_max_results_forwarded(self): - def handler(req): - assert req.url.params.get("MaxResults") == "10" - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/grep": handler}) - async with Client(mcp) as client: - await client.call_tool( - "grep_search", - {"query": "test", "data_sources": ["repo"], "max_results": 10}, - ) - - @pytest.mark.asyncio - async def test_max_results_boundary_0_rejected(self): - mcp = _server({}) - async with Client(mcp) as client: - result = await client.call_tool( - "grep_search", - {"query": "test", "data_sources": ["repo"], "max_results": 0}, - raise_on_error=False, - ) - assert result.is_error - assert "max_results" in _text(result) - - @pytest.mark.asyncio - async def test_max_results_boundary_501_rejected(self): - mcp = _server({}) - async with Client(mcp) as client: - result = await client.call_tool( - "grep_search", - {"query": "test", "data_sources": ["repo"], "max_results": 501}, - raise_on_error=False, - ) - assert result.is_error - assert "max_results" in _text(result) - - @pytest.mark.asyncio - async def test_extensions_forwarded(self): - def handler(req): - assert req.url.params.get_list("Extensions") == [".ts", ".vue"] - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/grep": handler}) - async with Client(mcp) as client: - await client.call_tool( - "grep_search", - {"query": "test", "data_sources": ["repo"], "extensions": [".ts", ".vue"]}, - ) - - @pytest.mark.asyncio - async def test_paths_forwarded(self): - def handler(req): - assert req.url.params.get_list("Paths") == ["src/controllers"] - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/grep": handler}) - async with Client(mcp) as client: - await client.call_tool( - "grep_search", - {"query": "test", "data_sources": ["repo"], "paths": ["src/controllers"]}, - ) - - @pytest.mark.asyncio - async def test_all_filters_combined(self): - def handler(req): - assert req.url.params.get("Query") == "Status\\.Alive" - assert req.url.params.get("Regex") == "true" - assert req.url.params.get("MaxResults") == "5" - assert req.url.params.get_list("Names") == ["backend"] - assert req.url.params.get_list("Paths") == ["src/services"] - assert req.url.params.get_list("Extensions") == [".cs"] - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/grep": handler}) - async with Client(mcp) as client: - await client.call_tool( - "grep_search", - { - "query": "Status\\.Alive", - "data_sources": ["backend"], - "paths": ["src/services"], - "extensions": [".cs"], - "max_results": 5, - "regex": True, - }, - ) - - @pytest.mark.asyncio - async def test_empty_query_returns_error(self): - mcp = _server({}) - async with Client(mcp) as client: - result = await client.call_tool( - "grep_search", - {"query": "", "data_sources": ["repo"]}, - raise_on_error=False, - ) - assert result.is_error - assert "empty" in _text(result).lower() or "Query cannot be empty" in _text(result) - - @pytest.mark.asyncio - async def test_data_sources_as_string_normalized(self): - def handler(req): - assert req.url.params.get_list("Names") == ["my-repo"] - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/grep": handler}) - async with Client(mcp) as client: - await client.call_tool( - "grep_search", - {"query": "test", "data_sources": "my-repo"}, - ) - - @pytest.mark.asyncio - async def test_404_includes_recovery_hint(self): - mcp = _server({ - "/api/search/grep": lambda r: httpx.Response(404, text="not found"), - }) - async with Client(mcp) as client: - result = await client.call_tool( - "grep_search", - {"query": "test", "data_sources": ["bad-repo"]}, - raise_on_error=False, - ) - assert result.is_error - assert "get_data_sources" in _text(result) - - @pytest.mark.asyncio - async def test_unicode_preserved_in_response(self): - """Cyrillic in path/lineText must survive as UTF-8, not \\uXXXX.""" - payload = { - "results": [ - { - "kind": "File", - "identifier": "org/repo::база/file.md::", - "location": {"path": "база/file.md"}, - "matchCount": 1, - "matches": [{"lineNumber": 3, "startColumn": 1, "endColumn": 5, - "lineText": "тест кириллица"}], - } - ] - } - mcp = _server({"/api/search/grep": lambda r: httpx.Response(200, json=payload)}) - async with Client(mcp) as client: - result = await client.call_tool( - "grep_search", {"query": "кир", "data_sources": ["repo"]}, - ) - - text = _text(result) - assert text == json.dumps(json.loads(text), separators=(",", ":"), ensure_ascii=False) - assert "тест кириллица" in text - assert "\\u04" not in text - - -# --------------------------------------------------------------------------- -# fetch_artifacts -# --------------------------------------------------------------------------- - -class TestFetchArtifactsE2E: - _ARTIFACTS_RESPONSE = { - "artifacts": [ - { - "identifier": "org/repo::src/auth.py::AuthService", - "content": "class AuthService:\n pass\n", - "contentByteSize": 28, - "startLine": 10, - } - ] - } - - @pytest.mark.asyncio - async def test_success_returns_xml_with_content(self): - def handler(req): - body = json.loads(req.content) - assert body["identifiers"] == ["org/repo::src/auth.py::AuthService"] - return httpx.Response(200, json=self._ARTIFACTS_RESPONSE) - - mcp = _server({"/api/search/artifacts": handler}) - async with Client(mcp) as client: - result = await client.call_tool( - "fetch_artifacts", - {"identifiers": ["org/repo::src/auth.py::AuthService"]}, - ) - - xml = _text(result) - assert "" in xml - assert "AuthService" in xml - assert "class AuthService" in xml - # Content body sits between newlines inside - assert "\n" in xml - assert "\n " in xml - - @pytest.mark.asyncio - async def test_not_found_surfaced_with_found_flag(self): - payload = { - "artifacts": [ - {"identifier": "org/repo::src/auth.py::AuthService", "found": True, - "content": "class AuthService:\n pass\n", "contentByteSize": 28, "startLine": 10}, - {"identifier": "org/repo::src/missing.py::Gone", "found": False, "content": None}, - ] - } - mcp = _server({"/api/search/artifacts": lambda r: httpx.Response(200, json=payload)}) - async with Client(mcp) as client: - result = await client.call_tool( - "fetch_artifacts", - {"identifiers": ["org/repo::src/auth.py::AuthService", "org/repo::src/missing.py::Gone"]}, - ) - - xml = _text(result) - assert "" in xml - assert "AuthService" in xml - - @pytest.mark.asyncio - async def test_single_string_identifier_coerced(self): - """A bare string identifier should be wrapped into a list.""" - def handler(req): - body = json.loads(req.content) - assert body["identifiers"] == ["org/repo::src/auth.py"] - return httpx.Response(200, json=self._ARTIFACTS_RESPONSE) - - mcp = _server({"/api/search/artifacts": handler}) - async with Client(mcp) as client: - result = await client.call_tool( - "fetch_artifacts", - {"identifiers": "org/repo::src/auth.py"}, - ) - - xml = _text(result) - assert "" in xml - - @pytest.mark.asyncio - async def test_unicode_preserved_in_xml(self): - """Cyrillic in identifier and content must survive into the XML output.""" - payload = { - "artifacts": [ - { - "identifier": "org/repo::файл.cs::Класс.Метод", - "content": "класс Привет {\n метод() => 42\n}\n", - "contentByteSize": 100, - "startLine": 1, - } - ] - } - mcp = _server({"/api/search/artifacts": lambda r: httpx.Response(200, json=payload)}) - async with Client(mcp) as client: - result = await client.call_tool( - "fetch_artifacts", - {"identifiers": ["org/repo::файл.cs::Класс.Метод"]}, - ) - - xml = _text(result) - assert "Класс.Метод" in xml - assert "класс Привет" in xml - assert "\\u04" not in xml - - -# --------------------------------------------------------------------------- -# Stringified parameter coercion for search tools -# --------------------------------------------------------------------------- - -class TestSearchStringifiedParamsE2E: - """Verify that search tools accept stringified JSON arrays for list params.""" - - @pytest.mark.asyncio - async def test_semantic_search_stringified_extensions(self): - def handler(req): - assert req.url.params.get_list("Extensions") == [".cs", ".py"] - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/semantic": handler}) - async with Client(mcp) as client: - await client.call_tool( - "semantic_search", - {"query": "test", "data_sources": ["repo"], "extensions": '[".cs", ".py"]'}, - ) - - @pytest.mark.asyncio - async def test_semantic_search_stringified_paths(self): - def handler(req): - assert req.url.params.get_list("Paths") == ["src/services", "src/domain"] - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/semantic": handler}) - async with Client(mcp) as client: - await client.call_tool( - "semantic_search", - {"query": "test", "data_sources": ["repo"], "paths": '["src/services", "src/domain"]'}, - ) - - @pytest.mark.asyncio - async def test_grep_search_stringified_extensions(self): - def handler(req): - assert req.url.params.get_list("Extensions") == [".ts"] - return httpx.Response(200, json={"results": []}) - - mcp = _server({"/api/search/grep": handler}) - async with Client(mcp) as client: - await client.call_tool( - "grep_search", - {"query": "test", "data_sources": ["repo"], "extensions": '[".ts"]'}, - ) - - -# --------------------------------------------------------------------------- -# chat / codebase_consultant (streaming SSE) -# --------------------------------------------------------------------------- - -class TestChatE2E: - @staticmethod - def _sse_body( - chunks: list[str], - conv_id: str = "69fceb3e7b2a6a7efdd18180", - msg_id: str = "69fceb3e7b2a6a7efdd18181", - ) -> str: - """Build an SSE response body with metadata + content chunks + DONE.""" - lines = [ - "event: message", - f'data: {{"event":"metadata","conversationId":"{conv_id}","messageId":"{msg_id}"}}', - "", - ] - for chunk in chunks: - payload = json.dumps({"choices": [{"delta": {"content": chunk}}]}) - lines.append(f"data: {payload}") - lines.append("") - lines.append("data: [DONE]") - lines.append("") - return "\n".join(lines) - - @pytest.mark.asyncio - async def test_streaming_success(self): - body = self._sse_body(["Hello ", "world!"]) - - def handler(req): - data = json.loads(req.content) - assert data["stream"] is True - assert data["messages"][0]["content"] == "How does auth work?" - assert req.headers["accept"] == "text/event-stream, application/problem+json" - return httpx.Response(200, text=body, headers={"content-type": "text/event-stream"}) - - mcp = _server({"/api/chat/completions": handler}) - async with Client(mcp) as client: - result = await client.call_tool( - "chat", - {"question": "How does auth work?", "data_sources": ["backend"]}, - ) - - text = _text(result) - assert "Hello world!" in text - # New conversation gets ID appended - assert "69fceb3e7b2a6a7efdd18180" in text - - @pytest.mark.asyncio - async def test_continuing_conversation(self): - conversation_id = "69fceb3e7b2a6a7efdd18180" - body = self._sse_body(["Follow-up answer"], conv_id=conversation_id) - - def handler(req): - data = json.loads(req.content) - assert data["conversationId"] == conversation_id - return httpx.Response(200, text=body, headers={"content-type": "text/event-stream"}) - - mcp = _server({"/api/chat/completions": handler}) - async with Client(mcp) as client: - result = await client.call_tool( - "chat", - {"question": "And the error handling?", "conversation_id": conversation_id}, - ) - - text = _text(result) - assert "Follow-up answer" in text - - @pytest.mark.asyncio - async def test_invalid_conversation_id_returns_actionable_tool_error(self): - mcp = _server({}) - async with Client(mcp) as client: - result = await client.call_tool( - "chat", - {"question": "And the error handling?", "conversation_id": "conv-existing"}, - raise_on_error=False, - ) - - text = _text(result) - assert "24-character hex Mongo ObjectId" in text - assert "Retry: no" in text - - @pytest.mark.asyncio - async def test_empty_question_returns_error(self): - mcp = _server({}) - async with Client(mcp) as client: - result = await client.call_tool( - "chat", {"question": ""}, - raise_on_error=False, - ) - - text = _text(result) - assert "error" in text.lower() or "question" in text.lower() - - @pytest.mark.asyncio - async def test_backend_error_handled(self): - mcp = _server({ - "/api/chat/completions": lambda r: httpx.Response(401, text="unauthorized"), - }) - async with Client(mcp) as client: - result = await client.call_tool( - "chat", - {"question": "hello"}, - raise_on_error=False, - ) - - text = _text(result) - assert "401" in text or "auth" in text.lower() - - @pytest.mark.asyncio - async def test_problem_details_backend_error_keeps_detail_and_request_id(self): - problem = { - "type": "https://app.codealive.ai/errors/bad-request", - "title": "Bad request", - "status": 400, - "detail": "Message content violates our content policy", - "requestId": "req-rest", - } - - mcp = _server({ - "/api/chat/completions": lambda r: httpx.Response( - 400, - json=problem, - headers={"content-type": "application/problem+json"}, - ), - }) - async with Client(mcp) as client: - result = await client.call_tool( - "chat", - {"question": "hello"}, - raise_on_error=False, - ) - - text = _text(result) - assert "Message content violates our content policy" in text - assert "requestId=req-rest" in text - assert "Retry: no" in text - - @pytest.mark.asyncio - async def test_named_sse_problem_details_error_returns_tool_error(self): - problem = json.dumps({ - "type": "https://app.codealive.ai/errors/bad-request", - "title": "Bad request", - "status": 400, - "detail": "Message content violates our content policy", - "requestId": "req-sse", - }) - body = f"event: error\ndata: {problem}\n\n" - - mcp = _server({ - "/api/chat/completions": lambda r: httpx.Response( - 200, - text=body, - headers={"content-type": "text/event-stream"}, - ), - }) - async with Client(mcp) as client: - result = await client.call_tool( - "chat", - {"question": "hello", "data_sources": ["backend"]}, - raise_on_error=False, - ) - - text = _text(result) - assert "Message content violates our content policy" in text - assert "Code: 400" in text - assert "requestId=req-sse" in text - assert "Retry: no" in text - - @pytest.mark.asyncio - async def test_named_sse_rate_limit_error_is_retryable(self): - problem = json.dumps({ - "type": "https://app.codealive.ai/errors/plan-limit", - "title": "Plan limit", - "status": 429, - "detail": "Chat completion rate limit exceeded", - "requestId": "req-sse-429", - }) - body = f"event: error\ndata: {problem}\n\n" - - mcp = _server({ - "/api/chat/completions": lambda r: httpx.Response( - 200, - text=body, - headers={"content-type": "text/event-stream"}, - ), - }) - async with Client(mcp) as client: - result = await client.call_tool( - "chat", - {"question": "hello", "data_sources": ["backend"]}, - raise_on_error=False, - ) - - text = _text(result) - assert "Chat completion rate limit exceeded" in text - assert "Retry: yes" in text - assert "back off" in text - assert "requestId=req-sse-429" in text - - @pytest.mark.asyncio - async def test_unicode_preserved_in_streamed_response(self): - """Cyrillic chunks streamed via SSE must survive as UTF-8 in the final text.""" - body = self._sse_body(["Привет, ", "мир!"]) - - mcp = _server({ - "/api/chat/completions": lambda r: httpx.Response( - 200, text=body, headers={"content-type": "text/event-stream"}, - ), - }) - async with Client(mcp) as client: - result = await client.call_tool( - "chat", - {"question": "Как работает аутентификация?", "data_sources": ["backend"]}, - ) - - text = _text(result) - assert "Привет, мир!" in text - assert "\\u04" not in text - - @pytest.mark.asyncio - async def test_legacy_alias_still_works(self): - body = self._sse_body(["Legacy alias"]) - - def handler(req): - assert req.headers["X-CodeAlive-Tool"] == "codebase_consultant" - return httpx.Response(200, text=body, headers={"content-type": "text/event-stream"}) - - mcp = _server({"/api/chat/completions": handler}) - async with Client(mcp) as client: - result = await client.call_tool( - "codebase_consultant", - {"question": "How does auth work?", "data_sources": ["backend"]}, - ) - - assert "Legacy alias" in _text(result) - - -# --------------------------------------------------------------------------- -# get_artifact_relationships -# --------------------------------------------------------------------------- - -class TestGetArtifactRelationshipsE2E: - _RELATIONSHIPS_RESPONSE = { - "sourceIdentifier": "org/repo::src/svc.py::Service", - "profile": "CallsOnly", - "found": True, - "availableRelationshipCounts": { - "outgoingCalls": 3, - "incomingCalls": 1, - "ancestors": 0, - "descendants": 0, - "references": 2, - }, - "relationships": [ - { - "relationType": "OutgoingCalls", - "totalCount": 3, - "returnedCount": 3, - "truncated": False, - "items": [ - {"identifier": "org/repo::src/db.py::query", "filePath": "src/db.py", "startLine": 42}, - {"identifier": "org/repo::src/cache.py::get", "filePath": "src/cache.py", "startLine": 10, - "shortSummary": "Cache lookup"}, - ], - }, - { - "relationType": "IncomingCalls", - "totalCount": 1, - "returnedCount": 1, - "truncated": False, - "items": [ - {"identifier": "org/repo::src/main.py::run", "filePath": "src/main.py", "startLine": 5}, - ], - }, - ], - } - - @pytest.mark.asyncio - async def test_success_returns_compact_json(self): - def handler(req): - body = json.loads(req.content) - assert body["identifier"] == "org/repo::src/svc.py::Service" - assert body["profile"] == "CallsOnly" - return httpx.Response(200, json=self._RELATIONSHIPS_RESPONSE) - - mcp = _server({"/api/search/artifact-relationships": handler}) - async with Client(mcp) as client: - result = await client.call_tool( - "get_artifact_relationships", - {"identifier": "org/repo::src/svc.py::Service", "profile": "callsOnly"}, - ) - - text = _text(result) - data = json.loads(text) - # FastMCP serializes via pydantic_core.to_json — compact, UTF-8. - assert text == json.dumps(data, separators=(",", ":"), ensure_ascii=False) - assert data["found"] is True - assert data["availableRelationshipCounts"]["references"] == 2 - assert "Fetch promising related artifacts" in data["hint"] - types = [g["type"] for g in data["relationships"]] - assert "outgoing_calls" in types - assert "incoming_calls" in types - outgoing_items = data["relationships"][0]["items"] - assert any(item.get("shortSummary") == "Cache lookup" for item in outgoing_items) - assert any(item.get("filePath") == "src/db.py" for item in outgoing_items) - - @pytest.mark.asyncio - async def test_not_found(self): - response_data = { - "sourceIdentifier": "org/repo::missing", - "profile": "CallsOnly", - "found": False, - } - mcp = _server({ - "/api/search/artifact-relationships": lambda r: httpx.Response(200, json=response_data), - }) - async with Client(mcp) as client: - result = await client.call_tool( - "get_artifact_relationships", - {"identifier": "org/repo::missing"}, - ) - - data = json.loads(_text(result)) - assert data["found"] is False - assert "relationships" not in data - - @pytest.mark.asyncio - async def test_invalid_profile_returns_error(self): - """Pydantic rejects invalid Literal values before the function body runs.""" - mcp = _server({}) - async with Client(mcp) as client: - result = await client.call_tool( - "get_artifact_relationships", - {"identifier": "org/repo::x", "profile": "bogus"}, - raise_on_error=False, - ) - - text = _text(result) - # Pydantic Literal validation fires before the function body, producing - # a human-readable validation error (not our custom JSON). - assert "callsOnly" in text - assert "literal_error" in text or "Input should be" in text - - @pytest.mark.asyncio - async def test_invalid_profile_is_logged_with_arguments_by_middleware(self): - """FastMCP validation fails before the tool body, so middleware must capture args.""" - mcp = _server({}) - mcp.add_middleware(ObservabilityMiddleware()) - records = [] - handler_id = logger.add(lambda message: records.append(message.record), level="DEBUG") - - try: - async with Client(mcp) as client: - result = await client.call_tool( - "get_artifact_relationships", - {"identifier": "org/repo::x", "profile": "bogus"}, - raise_on_error=False, - ) - finally: - logger.remove(handler_id) - - assert result.is_error - failures = [ - record for record in records - if record["message"] == "Tool call failed: get_artifact_relationships" - ] - assert len(failures) == 1 - failure = failures[0] - assert failure["level"].name == "WARNING" - assert failure["extra"]["tool_arguments"] == { - "identifier": "org/repo::x", - "profile": "bogus", - } - assert failure["extra"]["error_type"] == "ValidationError" - - @pytest.mark.asyncio - async def test_empty_identifier_returns_error(self): - mcp = _server({}) - async with Client(mcp) as client: - result = await client.call_tool( - "get_artifact_relationships", - {"identifier": ""}, - raise_on_error=False, - ) - - assert result.is_error - assert "required" in _text(result).lower() - - @pytest.mark.asyncio - async def test_max_count_per_type_0_rejected(self): - mcp = _server({}) - async with Client(mcp) as client: - result = await client.call_tool( - "get_artifact_relationships", - {"identifier": "org/repo::x", "max_count_per_type": 0}, - raise_on_error=False, - ) - assert result.is_error - assert "max_count_per_type" in _text(result) - - @pytest.mark.asyncio - async def test_max_count_per_type_1001_rejected(self): - mcp = _server({}) - async with Client(mcp) as client: - result = await client.call_tool( - "get_artifact_relationships", - {"identifier": "org/repo::x", "max_count_per_type": 1001}, - raise_on_error=False, - ) - assert result.is_error - assert "max_count_per_type" in _text(result) - - @pytest.mark.asyncio - async def test_max_count_per_type_negative_rejected(self): - mcp = _server({}) - async with Client(mcp) as client: - result = await client.call_tool( - "get_artifact_relationships", - {"identifier": "org/repo::x", "max_count_per_type": -1}, - raise_on_error=False, - ) - assert result.is_error - assert "max_count_per_type" in _text(result) - - @pytest.mark.asyncio - async def test_max_count_per_type_forwarded(self): - response_data = { - "sourceIdentifier": "org/repo::src/svc.py::run", - "profile": "CallsOnly", - "found": True, - "relationships": [], - } - - def handler(req): - body = json.loads(req.content) - assert body["maxCountPerType"] == 3 - return httpx.Response(200, json=response_data) - - mcp = _server({"/api/search/artifact-relationships": handler}) - async with Client(mcp) as client: - result = await client.call_tool( - "get_artifact_relationships", - {"identifier": "org/repo::src/svc.py::run", "max_count_per_type": 3}, - ) - - data = json.loads(_text(result)) - assert data["found"] is True - - @pytest.mark.asyncio - async def test_all_relevant_profile(self): - response_data = { - "sourceIdentifier": "org/repo::cls", - "profile": "AllRelevant", - "found": True, - "relationships": [ - {"relationType": "OutgoingCalls", "totalCount": 0, "returnedCount": 0, "truncated": False, "items": []}, - {"relationType": "IncomingCalls", "totalCount": 0, "returnedCount": 0, "truncated": False, "items": []}, - {"relationType": "Ancestors", "totalCount": 1, "returnedCount": 1, "truncated": False, "items": [{"identifier": "org/repo::Base"}]}, - {"relationType": "Descendants", "totalCount": 0, "returnedCount": 0, "truncated": False, "items": []}, - ], - } - - def handler(req): - body = json.loads(req.content) - assert body["profile"] == "AllRelevant" - return httpx.Response(200, json=response_data) - - mcp = _server({"/api/search/artifact-relationships": handler}) - async with Client(mcp) as client: - result = await client.call_tool( - "get_artifact_relationships", - {"identifier": "org/repo::cls", "profile": "allRelevant"}, - ) - - data = json.loads(_text(result)) - assert data["profile"] == "allRelevant" - types = [g["type"] for g in data["relationships"]] - assert "outgoing_calls" in types - assert "incoming_calls" in types - assert "ancestors" in types - assert "descendants" in types - - @pytest.mark.asyncio - async def test_references_profile(self): - response_data = { - "sourceIdentifier": "org/repo::var", - "profile": "ReferencesOnly", - "found": True, - "relationships": [ - {"relationType": "References", "totalCount": 5, "returnedCount": 5, "truncated": False, "items": [ - {"identifier": "org/repo::src/a.py::func_a", "filePath": "src/a.py", "startLine": 10} - ]}, - ], - } - - def handler(req): - body = json.loads(req.content) - assert body["profile"] == "ReferencesOnly" - return httpx.Response(200, json=response_data) - - mcp = _server({"/api/search/artifact-relationships": handler}) - async with Client(mcp) as client: - result = await client.call_tool( - "get_artifact_relationships", - {"identifier": "org/repo::var", "profile": "referencesOnly"}, - ) - - data = json.loads(_text(result)) - assert data["profile"] == "referencesOnly" - assert data["relationships"][0]["type"] == "references" - assert data["relationships"][0]["totalCount"] == 5 - - @pytest.mark.asyncio - async def test_unicode_preserved_in_response(self): - """Cyrillic in identifiers/summaries must survive as UTF-8, not \\uXXXX.""" - response_data = { - "sourceIdentifier": "org/repo::файл.cs::Класс.Метод", - "profile": "CallsOnly", - "found": True, - "relationships": [ - { - "relationType": "OutgoingCalls", - "totalCount": 1, - "returnedCount": 1, - "truncated": False, - "items": [{"identifier": "org/repo::другой.cs::Метод2", - "filePath": "другой.cs", - "shortSummary": "Описание метода"}], - } - ], - } - mcp = _server({ - "/api/search/artifact-relationships": lambda r: httpx.Response(200, json=response_data), - }) - async with Client(mcp) as client: - result = await client.call_tool( - "get_artifact_relationships", - {"identifier": "org/repo::файл.cs::Класс.Метод"}, - ) - - text = _text(result) - assert text == json.dumps(json.loads(text), separators=(",", ":"), ensure_ascii=False) - assert "Класс.Метод" in text - assert "Описание метода" in text - assert "\\u04" not in text - - @pytest.mark.asyncio - async def test_inheritance_profile_maps_correctly(self): - response_data = { - "sourceIdentifier": "org/repo::cls", - "profile": "InheritanceOnly", - "found": True, - "relationships": [ - { - "relationType": "Ancestors", - "totalCount": 1, - "returnedCount": 1, - "truncated": False, - "items": [{"identifier": "org/repo::Base", "filePath": "base.py", "startLine": 1}], - } - ], - } - - def handler(req): - body = json.loads(req.content) - assert body["profile"] == "InheritanceOnly" - return httpx.Response(200, json=response_data) - - mcp = _server({"/api/search/artifact-relationships": handler}) - async with Client(mcp) as client: - result = await client.call_tool( - "get_artifact_relationships", - {"identifier": "org/repo::cls", "profile": "inheritanceOnly"}, - ) - - data = json.loads(_text(result)) - assert data["profile"] == "inheritanceOnly" - assert data["relationships"][0]["type"] == "ancestors" diff --git a/src/tests/test_error_handling.py b/src/tests/test_error_handling.py index 471e849..7a1b7b4 100644 --- a/src/tests/test_error_handling.py +++ b/src/tests/test_error_handling.py @@ -79,7 +79,7 @@ async def test_handle_500_server_error(): @pytest.mark.asyncio async def test_handle_422_data_source_not_ready(): - """422 errors must point at get_data_sources(alive_only=false).""" + """422 errors must point at get_data_sources(ready_only=false).""" ctx = MagicMock() ctx.error = AsyncMock() @@ -88,7 +88,7 @@ async def test_handle_422_data_source_not_ready(): error_msg = ctx.error.call_args[0][0] assert "Retry: yes" in error_msg - assert "alive_only=false" in error_msg + assert "ready_only=false" in error_msg @pytest.mark.asyncio @@ -170,7 +170,7 @@ async def test_recovery_hints_override_default_404(): ctx = MagicMock() ctx.error = AsyncMock() - custom = "(1) check conversation_id, (2) drop conversation_id and retry" + custom = "(1) include prior context in the stateless chat question, (2) retry without legacy conversation fields" with pytest.raises(ToolError) as exc_info: await handle_api_error( ctx, _make_http_error(404), "chat", diff --git a/src/tests/test_fetch_artifacts.py b/src/tests/test_fetch_artifacts.py deleted file mode 100644 index 209c154..0000000 --- a/src/tests/test_fetch_artifacts.py +++ /dev/null @@ -1,794 +0,0 @@ -"""Test suite for fetch_artifacts tool.""" - -import pytest -from unittest.mock import AsyncMock, MagicMock, patch -from fastmcp import Context -from fastmcp.exceptions import ToolError -from tools.fetch_artifacts import ( - _add_line_numbers, - _build_artifacts_xml, - _has_any_calls, - fetch_artifacts, -) - - -class TestAddLineNumbers: - """Test cases for _add_line_numbers helper.""" - - def test_multi_line_content(self): - content = "line1\nline2\nline3" - result = _add_line_numbers(content) - assert result == "1 | line1\n2 | line2\n3 | line3" - - def test_single_line_content(self): - content = "only one line" - result = _add_line_numbers(content) - assert result == "1 | only one line" - - def test_empty_content(self): - assert _add_line_numbers("") == "" - - def test_right_aligned_padding(self): - lines = "\n".join(f"line{i}" for i in range(100)) - result = _add_line_numbers(lines) - first_line = result.split("\n")[0] - assert first_line == " 1 | line0" - last_line = result.split("\n")[99] - assert last_line == "100 | line99" - - def test_start_line_offset(self): - result = _add_line_numbers("a\nb", start_line=50) - assert result == "50 | a\n51 | b" - - def test_start_line_default(self): - result = _add_line_numbers("x", start_line=1) - assert result == "1 | x" - - def test_start_line_right_aligned_padding(self): - result = _add_line_numbers("a\nb\nc", start_line=98) - assert result == " 98 | a\n 99 | b\n100 | c" - - def test_start_line_empty_content(self): - assert _add_line_numbers("", start_line=50) == "" - - -class TestBuildArtifactsXmlStartLine: - """Test _build_artifacts_xml uses startLine from API response.""" - - def test_artifact_with_start_line(self): - data = {"artifacts": [ - {"identifier": "repo::file.py::func", "content": "line1\nline2", "contentByteSize": 10, "startLine": 50} - ]} - result = _build_artifacts_xml(data) - assert "50 | line1" in result - assert "51 | line2" in result - - def test_artifact_without_start_line_defaults_to_1(self): - data = {"artifacts": [ - {"identifier": "repo::file.py::func", "content": "line1\nline2", "contentByteSize": 10} - ]} - result = _build_artifacts_xml(data) - assert "1 | line1" in result - assert "2 | line2" in result - - def test_artifact_with_null_start_line_defaults_to_1(self): - data = {"artifacts": [ - {"identifier": "repo::file.py", "content": "hello", "contentByteSize": 5, "startLine": None} - ]} - result = _build_artifacts_xml(data) - assert "1 | hello" in result - - -class TestBuildArtifactsXmlContentWrapper: - """Test that content is wrapped in element with newlines around it.""" - - def test_content_wrapped_in_element_with_newlines(self): - data = {"artifacts": [ - {"identifier": "repo::file.py::func", "content": "code here", "contentByteSize": 9} - ]} - result = _build_artifacts_xml(data) - assert "" in result - assert "" in result - # Content lives on its own line(s) between the open and close tags - assert "\n1 | code here\n " in result - - def test_artifact_structure_has_content_child(self): - data = {"artifacts": [ - {"identifier": "repo::f.py::fn", "content": "x = 1", "contentByteSize": 5} - ]} - result = _build_artifacts_xml(data) - assert "" in result - assert "" in result - - def test_content_is_not_html_escaped(self): - """Quotes, ampersands, and angle brackets are kept as-is inside .""" - data = {"artifacts": [ - {"identifier": "repo::f.py::fn", - "content": 'if x < 10 && y > 5: return ""', - "contentByteSize": 32} - ]} - result = _build_artifacts_xml(data) - # Raw characters preserved - assert 'if x < 10 && y > 5: return ""' in result - # No HTML escaping - assert "<" not in result - assert "&" not in result - assert """ not in result - - -class TestBuildArtifactsXmlRelationships: - """Test relationships rendering in _build_artifacts_xml.""" - - def test_relationships_with_outgoing_and_incoming(self): - data = {"artifacts": [{ - "identifier": "repo::src/a.ts::FuncA", - "content": "code", - "contentByteSize": 4, - "relationships": { - "outgoingCallsCount": 12, - "outgoingCalls": [ - {"identifier": "repo::src/b.ts::FuncB", "summary": "Validates token"}, - {"identifier": "repo::src/c.ts::FuncC", "summary": "Logs event"}, - ], - "incomingCallsCount": 3, - "incomingCalls": [ - {"identifier": "repo::src/d.ts::FuncD", "summary": "Entry point"}, - ], - } - }]} - result = _build_artifacts_xml(data) - assert "" in result - assert '' in result - assert '' in result - assert '' in result - assert '' in result - assert '' in result - assert 'identifier="repo::src/b.ts::FuncB" summary="Validates token"' in result - assert 'identifier="repo::src/d.ts::FuncD" summary="Entry point"' in result - - def test_relationships_with_only_outgoing(self): - data = {"artifacts": [{ - "identifier": "repo::src/a.ts::FuncA", - "content": "code", - "contentByteSize": 4, - "relationships": { - "outgoingCallsCount": 2, - "outgoingCalls": [ - {"identifier": "repo::src/b.ts::FuncB", "summary": "Does stuff"}, - ], - "incomingCallsCount": None, - "incomingCalls": None, - } - }]} - result = _build_artifacts_xml(data) - assert "" in result - assert "" not in result - assert "" in result - - def test_relationships_absent_omits_relationships_element(self): - data = {"artifacts": [{ - "identifier": "repo::src/a.ts", - "content": "code", - "contentByteSize": 4, - }]} - result = _build_artifacts_xml(data) - assert "" not in result - - def test_relationships_call_without_summary_omits_summary_attr(self): - data = {"artifacts": [{ - "identifier": "repo::src/a.ts::FuncA", - "content": "code", - "contentByteSize": 4, - "relationships": { - "outgoingCallsCount": 1, - "outgoingCalls": [ - {"identifier": "repo::src/b.ts::FuncB", "summary": None}, - ], - "incomingCallsCount": None, - "incomingCalls": None, - } - }]} - result = _build_artifacts_xml(data) - assert 'identifier="repo::src/b.ts::FuncB"/>' in result - assert 'summary' not in result.split('FuncB')[1].split('/>')[0] - - def test_relationships_summary_xml_escaped(self): - # Summaries are AI-generated text placed in an XML *attribute*; like identifiers they must - # be escaped so a crafted summary cannot break out of the attribute and inject pseudo-XML - # into the model context. (Source-code *content* stays raw — that is the body, - # not an attribute; see test_content_is_not_html_escaped.) - data = {"artifacts": [{ - "identifier": "repo::src/a.ts::FuncA", - "content": "code", - "contentByteSize": 4, - "relationships": { - "outgoingCallsCount": 1, - "outgoingCalls": [ - {"identifier": "repo::src/b.ts::FuncB", "summary": 'Checks if x < 10 & y > 5'}, - ], - "incomingCallsCount": None, - "incomingCalls": None, - } - }]} - result = _build_artifacts_xml(data) - # Special chars in the attribute are escaped, and the raw unescaped form is gone. - assert "<" in result - assert "&" in result - assert ">" in result - assert "x < 10 & y > 5" not in result - - def test_relationships_summary_injection_is_neutralized(self): - # A crafted summary must not break out of the attribute and inject pseudo-XML. - data = {"artifacts": [{ - "identifier": "repo::src/a.ts::FuncA", - "content": "code", - "contentByteSize": 4, - "relationships": { - "outgoingCallsCount": 1, - "outgoingCalls": [ - {"identifier": "repo::src/b.ts::FuncB", "summary": '"/>x" not in result - - -class TestHasAnyCalls: - """Test cases for _has_any_calls helper.""" - - def test_outgoing_calls_present(self): - assert _has_any_calls({"outgoingCallsCount": 5, "incomingCallsCount": 0}) is True - - def test_incoming_calls_present(self): - assert _has_any_calls({"outgoingCallsCount": 0, "incomingCallsCount": 2}) is True - - def test_both_zero(self): - assert _has_any_calls({"outgoingCallsCount": 0, "incomingCallsCount": 0}) is False - - def test_both_none(self): - assert _has_any_calls({"outgoingCallsCount": None, "incomingCallsCount": None}) is False - - def test_empty_dict(self): - assert _has_any_calls({}) is False - - -class TestBuildArtifactsXmlHint: - """Test the trailing hint that points to get_artifact_relationships.""" - - HINT_MARKER = "get_artifact_relationships" - - def test_hint_present_when_outgoing_calls_exist(self): - data = {"artifacts": [{ - "identifier": "repo::src/a.ts::FuncA", - "content": "code", - "contentByteSize": 4, - "relationships": { - "outgoingCallsCount": 12, - "outgoingCalls": [ - {"identifier": "repo::src/b.ts::FuncB", "summary": "Validates"}, - ], - "incomingCallsCount": 0, - "incomingCalls": None, - } - }]} - result = _build_artifacts_xml(data) - assert "" in result - assert self.HINT_MARKER in result - assert "" in result - # Hint must appear after relationships and before closing - assert result.index("") > result.index("") - assert result.index("") < result.index("") - - def test_hint_present_when_only_incoming_calls_exist(self): - data = {"artifacts": [{ - "identifier": "repo::src/a.ts::FuncA", - "content": "code", - "contentByteSize": 4, - "relationships": { - "outgoingCallsCount": 0, - "outgoingCalls": None, - "incomingCallsCount": 1, - "incomingCalls": [ - {"identifier": "repo::src/d.ts::FuncD", "summary": "Calls A"}, - ], - } - }]} - result = _build_artifacts_xml(data) - assert "" in result - assert self.HINT_MARKER in result - - def test_hint_absent_when_relationships_missing(self): - data = {"artifacts": [{ - "identifier": "repo::src/a.ts", - "content": "code", - "contentByteSize": 4, - }]} - result = _build_artifacts_xml(data) - assert "" not in result - assert self.HINT_MARKER not in result - - def test_hint_absent_when_relationships_null(self): - data = {"artifacts": [{ - "identifier": "repo::src/a.ts", - "content": "code", - "contentByteSize": 4, - "relationships": None, - }]} - result = _build_artifacts_xml(data) - assert "" not in result - - def test_hint_absent_when_all_call_counts_are_zero(self): - data = {"artifacts": [{ - "identifier": "repo::src/a.ts::FuncA", - "content": "code", - "contentByteSize": 4, - "relationships": { - "outgoingCallsCount": 0, - "outgoingCalls": None, - "incomingCallsCount": 0, - "incomingCalls": None, - } - }]} - result = _build_artifacts_xml(data) - assert "" not in result - - def test_hint_appears_once_with_multiple_artifacts(self): - data = {"artifacts": [ - { - "identifier": "repo::src/a.ts::FuncA", - "content": "code", - "contentByteSize": 4, - "relationships": { - "outgoingCallsCount": 2, - "outgoingCalls": [ - {"identifier": "repo::src/b.ts::FuncB", "summary": "X"}, - ], - "incomingCallsCount": 0, - "incomingCalls": None, - } - }, - { - "identifier": "repo::src/c.ts::FuncC", - "content": "code", - "contentByteSize": 4, - "relationships": { - "outgoingCallsCount": 0, - "outgoingCalls": None, - "incomingCallsCount": 3, - "incomingCalls": [ - {"identifier": "repo::src/d.ts::FuncD", "summary": "Y"}, - ], - } - }, - ]} - result = _build_artifacts_xml(data) - assert result.count("") == 1 - assert result.count(self.HINT_MARKER) == 1 - - def test_relationships_hint_absent_but_missing_surfaced_when_no_content(self): - # A not-found artifact produces no relationships preview hint (nothing to drill - # into), but it must still be surfaced — not silently dropped. - data = {"artifacts": [ - {"identifier": "repo::missing.ts::Func", "content": None, "contentByteSize": None}, - ]} - result = _build_artifacts_xml(data) - assert self.HINT_MARKER not in result - assert "" in result - assert "backend" in result - # Guides toward the two recovery moves. - assert "data_source" in result - assert "omit" in result.lower() - - def test_hint_when_empty_artifacts_and_data_source(self): - result = _build_artifacts_xml({"artifacts": []}, data_source="ds-main") - assert "ds-main" in result and "" in result - - def test_no_miss_hint_when_data_source_resolved_content(self): - data = {"artifacts": [ - {"identifier": "repo::a.ts::F", "content": "code", "contentByteSize": 4}, - ]} - result = _build_artifacts_xml(data, data_source="backend") - assert "omit data_source" not in result - - def test_no_data_source_miss_hint_without_data_source(self): - # Without a data_source selector there is no data-source-specific recovery hint, - # but the missing artifact is still surfaced in a block. - data = {"artifacts": [ - {"identifier": "repo::a.ts::F", "content": None, "contentByteSize": None}, - ]} - result = _build_artifacts_xml(data) - assert "omit data_source" not in result - assert "' in result - assert 'identifier="repo::missing.ts::G"' in result - # the found sibling is still rendered as a normal artifact - assert '' in result - assert 'identifier="repo::ghost.ts::Z"' in result - - def test_not_found_count_matches_rows(self): - data = {"artifacts": [ - {"identifier": "repo::m1::A", "found": False, "content": None}, - {"identifier": "repo::m2::B", "found": False, "content": None}, - ]} - result = _build_artifacts_xml(data) - assert '' in result - assert 'identifier="repo::m1::A"' in result - assert 'identifier="repo::m2::B"' in result - - def test_identifiers_are_xml_escaped(self): - # Crafted identifiers (caller/LLM-supplied, and any unmatched requested string lands in - # via the backstop) must not break out of the XML attribute and inject - # pseudo-XML into the model context — neither in nor . - data = {"artifacts": [ - {"identifier": 'r::ok">::F', "found": True, "content": "code", "contentByteSize": 4}, - {"identifier": 'r::bad">::G', "found": False, "content": None}, - ]} - result = _build_artifacts_xml(data) - assert "" not in result - assert "" not in result - assert ""><injected>" in result - assert ""><x>" in result - - -@pytest.mark.asyncio -@patch('tools.fetch_artifacts.get_api_key_from_context') -async def test_fetch_artifacts_returns_xml(mock_get_api_key): - """Test that fetch_artifacts returns properly formatted XML.""" - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.info = AsyncMock() - ctx.warning = AsyncMock() - ctx.error = AsyncMock() - - mock_response = MagicMock() - mock_response.json.return_value = { - "artifacts": [ - { - "identifier": "owner/repo::src/auth.py::login", - "content": "def login(user, pwd):\n return True", - "contentByteSize": 38 - }, - { - "identifier": "owner/repo::src/missing.py::func", - "content": None, - "contentByteSize": None - } - ] - } - mock_response.raise_for_status = MagicMock() - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_codealive_context = MagicMock() - mock_codealive_context.client = mock_client - mock_codealive_context.base_url = "https://app.codealive.ai" - - ctx.request_context.lifespan_context = mock_codealive_context - ctx.request_context.headers = {"authorization": "Bearer test_key"} - - result = await fetch_artifacts( - ctx=ctx, - identifiers=["owner/repo::src/auth.py::login", "owner/repo::src/missing.py::func"], - ) - - assert isinstance(result, str) - assert "" in result - assert "" in result - # Found artifact has line-numbered content wrapped in - assert "" in result - assert "1 | def login(user, pwd):" in result - assert "2 | return True" in result - assert 'contentByteSize="38"' in result - assert 'identifier="owner/repo::src/auth.py::login"' in result - # Not-found artifact is surfaced in a block, not silently dropped. - assert " the field is omitted (preserves the 409-on-ambiguity fallback). - assert "dataSource" not in body - - -@pytest.mark.asyncio -@patch('tools.fetch_artifacts.get_api_key_from_context') -async def test_fetch_artifacts_forwards_data_source(mock_get_api_key): - """data_source (Name or Id) is forwarded as the DataSource body field when provided.""" - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.info = AsyncMock() - ctx.warning = AsyncMock() - ctx.error = AsyncMock() - - mock_response = MagicMock() - mock_response.json.return_value = {"artifacts": []} - mock_response.raise_for_status = MagicMock() - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_codealive_context = MagicMock() - mock_codealive_context.client = mock_client - mock_codealive_context.base_url = "https://app.codealive.ai" - - ctx.request_context.lifespan_context = mock_codealive_context - ctx.request_context.headers = {"authorization": "Bearer test_key"} - - await fetch_artifacts( - ctx=ctx, - identifiers=["id1"], - data_source="backend", - ) - - body = mock_client.post.call_args.kwargs["json"] - assert body["dataSource"] == "backend" - - -@pytest.mark.asyncio -@patch('tools.fetch_artifacts.get_api_key_from_context') -async def test_fetch_artifacts_whitespace_data_source_omitted(mock_get_api_key): - """A whitespace-only data_source normalizes to None: not sent to the backend - and not echoed in the not-found hint (preserves the 409-on-ambiguity fallback).""" - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.info = AsyncMock() - ctx.warning = AsyncMock() - ctx.error = AsyncMock() - - mock_response = MagicMock() - mock_response.json.return_value = {"artifacts": []} - mock_response.raise_for_status = MagicMock() - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_codealive_context = MagicMock() - mock_codealive_context.client = mock_client - mock_codealive_context.base_url = "https://app.codealive.ai" - - ctx.request_context.lifespan_context = mock_codealive_context - ctx.request_context.headers = {"authorization": "Bearer test_key"} - - result = await fetch_artifacts( - ctx=ctx, - identifiers=["id1"], - data_source=" ", - ) - - body = mock_client.post.call_args.kwargs["json"] - assert "dataSource" not in body - # The confusing `... data source " "` hint must not appear. - assert '" "' not in result - - -@pytest.mark.asyncio -@patch('tools.fetch_artifacts.get_api_key_from_context') -async def test_fetch_artifacts_api_error(mock_get_api_key): - """Test that API errors are handled gracefully.""" - import httpx - - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.info = AsyncMock() - ctx.warning = AsyncMock() - ctx.error = AsyncMock() - - mock_response = MagicMock() - mock_response.status_code = 500 - mock_response.text = "Internal server error" - - def raise_500(): - raise httpx.HTTPStatusError( - "Server error", - request=MagicMock(), - response=mock_response - ) - - mock_response.raise_for_status = raise_500 - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_codealive_context = MagicMock() - mock_codealive_context.client = mock_client - mock_codealive_context.base_url = "https://app.codealive.ai" - - ctx.request_context.lifespan_context = mock_codealive_context - ctx.request_context.headers = {"authorization": "Bearer test_key"} - - with pytest.raises(ToolError, match="Server error \\(500\\)"): - await fetch_artifacts( - ctx=ctx, - identifiers=["some-id"], - ) - - -@pytest.mark.asyncio -@patch('tools.fetch_artifacts.get_api_key_from_context') -async def test_fetch_artifacts_keeps_content_raw(mock_get_api_key): - """Test that XML special chars in content are emitted as-is (no HTML escaping).""" - mock_get_api_key.return_value = "test_key" - - ctx = MagicMock(spec=Context) - ctx.info = AsyncMock() - ctx.warning = AsyncMock() - ctx.error = AsyncMock() - - mock_response = MagicMock() - mock_response.json.return_value = { - "artifacts": [ - { - "identifier": "owner/repo::file.py::func", - "content": 'if x < 10 && y > 5:\n return ""', - "contentByteSize": 40 - } - ] - } - mock_response.raise_for_status = MagicMock() - - mock_client = AsyncMock() - mock_client.post.return_value = mock_response - - mock_codealive_context = MagicMock() - mock_codealive_context.client = mock_client - mock_codealive_context.base_url = "https://app.codealive.ai" - - ctx.request_context.lifespan_context = mock_codealive_context - ctx.request_context.headers = {"authorization": "Bearer test_key"} - - result = await fetch_artifacts( - ctx=ctx, - identifiers=["owner/repo::file.py::func"], - ) - - # Line numbers are added but no escaping - assert '1 | if x < 10 && y > 5:' in result - assert '2 | return ""' in result - # No HTML escaping - assert "<" not in result - assert "&" not in result - assert """ not in result - # Structure is preserved with newline-bracketed content body - assert "" in result - assert "" in result - assert "\n" in result - assert "\n " in result diff --git a/src/tests/test_observability_middleware.py b/src/tests/test_observability_middleware.py index 2bb4865..f53967e 100644 --- a/src/tests/test_observability_middleware.py +++ b/src/tests/test_observability_middleware.py @@ -50,7 +50,7 @@ def otel_setup(): provider.shutdown() -def _make_context(tool_name: str = "codebase_search", arguments: dict | None = None): +def _make_context(tool_name: str = "semantic_search", arguments: dict | None = None): ctx = MagicMock() ctx.message.name = tool_name ctx.message.arguments = arguments or {} @@ -65,7 +65,7 @@ class TestSuccessfulToolCall: @pytest.mark.asyncio async def test_returns_result_from_call_next(self, otel_setup): middleware = ObservabilityMiddleware() - context = _make_context("codebase_search") + context = _make_context("semantic_search") call_next = AsyncMock(return_value="xml") result = await middleware.on_call_tool(context, call_next) diff --git a/src/tests/test_response_transformer.py b/src/tests/test_response_transformer.py index 52058ca..2a7538a 100644 --- a/src/tests/test_response_transformer.py +++ b/src/tests/test_response_transformer.py @@ -255,7 +255,7 @@ def test_data_preservation(self): "results": [ { "kind": "Symbol", - "identifier": "CodeAlive-AI/codealive-mcp::src/tools/search.py::codebase_search", + "identifier": "CodeAlive-AI/codealive-mcp::src/tools/search.py::semantic_search", "location": { "path": "src/tools/search.py", "range": {"start": {"line": 18}, "end": {"line": 168}} @@ -291,7 +291,7 @@ def test_data_preservation(self): assert first["startLine"] == 18 assert first["endLine"] == 168 assert first["kind"] == "Symbol" - assert first["identifier"] == "CodeAlive-AI/codealive-mcp::src/tools/search.py::codebase_search" + assert first["identifier"] == "CodeAlive-AI/codealive-mcp::src/tools/search.py::semantic_search" assert first["contentByteSize"] == 8500 assert first["description"] == "Main search function" # Data-source identity must be surfaced (not stripped) so the agent can feed it back diff --git a/src/tests/test_search_tool.py b/src/tests/test_search_tool.py deleted file mode 100644 index 42f930d..0000000 --- a/src/tests/test_search_tool.py +++ /dev/null @@ -1,197 +0,0 @@ -"""Test suite for semantic, grep, and legacy search tools.""" - -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest -from fastmcp import Context -from fastmcp.exceptions import ToolError - -from tools.search import codebase_search, grep_search, semantic_search - - -def _build_context(mock_response): - ctx = MagicMock(spec=Context) - ctx.info = AsyncMock() - ctx.warning = AsyncMock() - ctx.error = AsyncMock() - - mock_client = AsyncMock() - mock_client.get.return_value = mock_response - - mock_codealive_context = MagicMock() - mock_codealive_context.client = mock_client - mock_codealive_context.base_url = "https://app.codealive.ai" - - ctx.request_context.lifespan_context = mock_codealive_context - ctx.request_context.headers = {"authorization": "Bearer test_key"} - return ctx, mock_client - - -@pytest.mark.asyncio -@patch("tools.search.get_api_key_from_context") -async def test_semantic_search_returns_compact_json(mock_get_api_key): - mock_get_api_key.return_value = "test_key" - - mock_response = MagicMock() - mock_response.json.return_value = { - "results": [ - { - "kind": "Symbol", - "identifier": "owner/repo::path/auth.py::authenticate_user", - "location": { - "path": "path/auth.py", - "range": {"start": {"line": 10}, "end": {"line": 25}}, - }, - "description": "Authenticates a user with credentials", - } - ] - } - mock_response.raise_for_status = MagicMock() - - ctx, mock_client = _build_context(mock_response) - - result = await semantic_search( - ctx=ctx, - query="authenticate_user", - data_sources=["test-name"], - paths=["src/auth.py"], - extensions=[".py"], - max_results=7, - ) - - assert isinstance(result, dict) - assert result["results"][0]["path"] == "path/auth.py" - assert result["results"][0]["identifier"] == "owner/repo::path/auth.py::authenticate_user" - - call_args = mock_client.get.call_args - assert call_args.args[0] == "/api/search/semantic" - params = call_args.kwargs["params"] - assert ("Query", "authenticate_user") in params - assert ("Names", "test-name") in params - assert ("Paths", "src/auth.py") in params - assert ("Extensions", ".py") in params - assert ("MaxResults", "7") in params - assert call_args.kwargs["headers"]["X-CodeAlive-Tool"] == "semantic_search" - - -@pytest.mark.asyncio -@patch("tools.search.get_api_key_from_context") -async def test_grep_search_returns_matches(mock_get_api_key): - mock_get_api_key.return_value = "test_key" - - mock_response = MagicMock() - mock_response.json.return_value = { - "results": [ - { - "kind": "File", - "identifier": "owner/repo::path/auth.py", - "location": { - "path": "path/auth.py", - "range": {"start": {"line": 15}}, - }, - "matchCount": 2, - "matches": [ - { - "lineNumber": 15, - "startColumn": 5, - "endColumn": 12, - "lineText": "token = auth()", - } - ], - } - ] - } - mock_response.raise_for_status = MagicMock() - - ctx, mock_client = _build_context(mock_response) - - result = await grep_search( - ctx=ctx, - query="auth\\(", - data_sources=["test-name"], - regex=True, - ) - - assert isinstance(result, dict) - assert result["results"][0]["matchCount"] == 2 - assert result["results"][0]["matches"][0]["lineNumber"] == 15 - - call_args = mock_client.get.call_args - assert call_args.args[0] == "/api/search/grep" - params = call_args.kwargs["params"] - assert ("Regex", "true") in params - assert call_args.kwargs["headers"]["X-CodeAlive-Tool"] == "grep_search" - - -@pytest.mark.asyncio -@patch("tools.search.get_api_key_from_context") -async def test_codebase_search_keeps_legacy_params(mock_get_api_key): - mock_get_api_key.return_value = "test_key" - - mock_response = MagicMock() - mock_response.json.return_value = {"results": []} - mock_response.raise_for_status = MagicMock() - - ctx, mock_client = _build_context(mock_response) - - await codebase_search( - ctx=ctx, - query="test", - data_sources=["test-name"], - mode="deep", - description_detail="full", - ) - - call_args = mock_client.get.call_args - assert call_args.args[0] == "/api/search" - params = call_args.kwargs["params"] - assert ("Mode", "deep") in params - assert ("DescriptionDetail", "Full") in params - assert ("IncludeContent", "false") in params - assert call_args.kwargs["headers"]["X-CodeAlive-Tool"] == "codebase_search" - - -@pytest.mark.asyncio -async def test_semantic_search_empty_query_raises_tool_error(): - ctx = MagicMock(spec=Context) - - with pytest.raises(ToolError, match="Query cannot be empty"): - await semantic_search(ctx=ctx, query="") - - -@pytest.mark.asyncio -async def test_grep_search_invalid_max_results_raises_tool_error(): - ctx = MagicMock(spec=Context) - - with pytest.raises(ToolError, match="max_results"): - await grep_search(ctx=ctx, query="foo", max_results=501) - - -@pytest.mark.asyncio -@patch("tools.search.get_api_key_from_context") -async def test_codebase_search_api_error_raises_tool_error(mock_get_api_key): - import httpx - - mock_get_api_key.return_value = "test_key" - - mock_response = MagicMock() - mock_response.status_code = 404 - mock_response.text = "Not found" - - def raise_404(): - raise httpx.HTTPStatusError( - "Not found", - request=MagicMock(), - response=mock_response, - ) - - mock_response.raise_for_status = raise_404 - ctx, mock_client = _build_context(mock_response) - mock_client.get.return_value = mock_response - - with pytest.raises(ToolError, match="404"): - await codebase_search( - ctx=ctx, - query="test query", - data_sources=["invalid-name"], - ) diff --git a/src/tests/test_stdio_smoke.py b/src/tests/test_stdio_smoke.py index d8c574f..65af9c3 100644 --- a/src/tests/test_stdio_smoke.py +++ b/src/tests/test_stdio_smoke.py @@ -19,7 +19,9 @@ def _mock_codealive_server(): requests = [] class Handler(BaseHTTPRequestHandler): - def do_GET(self): + def do_POST(self): + length = int(self.headers.get("Content-Length", "0")) + body = self.rfile.read(length).decode("utf-8") if length else "{}" requests.append( { "path": self.path, @@ -27,26 +29,32 @@ def do_GET(self): "tool": self.headers.get("X-CodeAlive-Tool"), "integration": self.headers.get("X-CodeAlive-Integration"), "client": self.headers.get("X-CodeAlive-Client"), + "body": json.loads(body), } ) - if self.path == "/api/datasources/ready": + if self.path == "/api/tools/get_data_sources": body = json.dumps( - [ - { - "id": "repo-1", - "name": "backend", - "type": "Repository", - "url": "https://github.com/CodeAlive-AI/backend", - "state": "Ready", + { + "rendered": "backend\ncore-workspace", + "obj": { + "data_sources": [ + { + "id": "repo-1", + "name": "backend", + "type": "Repository", + "url": "https://github.com/CodeAlive-AI/backend", + "state": "Ready", + }, + { + "id": "ws-1", + "name": "core-workspace", + "type": "Workspace", + "repositoryIds": ["repo-1"], + "state": "Alive", + }, + ] }, - { - "id": "ws-1", - "name": "core-workspace", - "type": "Workspace", - "repositoryIds": ["repo-1"], - "state": "Alive", - }, - ] + } ).encode("utf-8") self.send_response(200) self.send_header("Content-Type", "application/json") @@ -72,7 +80,7 @@ def log_message(self, format, *args): @pytest.mark.asyncio -async def test_stdio_server_lists_tools_and_uses_normalized_ready_endpoint(): +async def test_stdio_server_lists_tools_and_uses_tool_api_v3_endpoint(): server_script = Path(__file__).resolve().parents[1] / "codealive_mcp_server.py" with _mock_codealive_server() as (port, requests): @@ -95,12 +103,15 @@ async def test_stdio_server_lists_tools_and_uses_normalized_ready_endpoint(): tool_names = sorted(tool.name for tool in tools_result.tools) assert tool_names == [ "chat", - "codebase_consultant", - "codebase_search", "fetch_artifacts", + "get_artifact_query_schema", "get_artifact_relationships", "get_data_sources", + "get_file_tree", + "get_repository_ontology", "grep_search", + "query_artifact_metadata", + "read_file", "semantic_search", ] @@ -113,10 +124,11 @@ async def test_stdio_server_lists_tools_and_uses_normalized_ready_endpoint(): assert requests == [ { - "path": "/api/datasources/ready", + "path": "/api/tools/get_data_sources", "authorization": "Bearer stdio-smoke-test-key", "tool": "get_data_sources", "integration": "mcp", - "client": "fastmcp", + "client": "fastmcp-v3", + "body": {"ready_only": True, "output_format": "agentic"}, } ] diff --git a/src/tests/test_tool_api_v3.py b/src/tests/test_tool_api_v3.py new file mode 100644 index 0000000..d3674a1 --- /dev/null +++ b/src/tests/test_tool_api_v3.py @@ -0,0 +1,174 @@ +"""MCP Tool API v3 contract tests.""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from fastmcp import Context +from fastmcp.exceptions import ToolError + +from tools.artifact_query import get_artifact_query_schema, query_artifact_metadata +from tools.artifact_relationships import get_artifact_relationships +from tools.chat import chat +from tools.datasources import get_data_sources +from tools.fetch_artifacts import fetch_artifacts +from tools.repository import get_file_tree, get_repository_ontology, read_file +from tools.search import grep_search, semantic_search + + +def _context_with_response(rendered: str = "ok"): + ctx = MagicMock(spec=Context) + ctx.info = AsyncMock() + ctx.warning = AsyncMock() + ctx.error = AsyncMock() + + response = MagicMock() + response.json.return_value = {"rendered": rendered, "obj": {"ok": True}} + response.raise_for_status = MagicMock() + + client = AsyncMock() + client.post.return_value = response + + codealive_context = MagicMock() + codealive_context.client = client + codealive_context.base_url = "https://app.codealive.ai/api/" + + ctx.request_context.lifespan_context = codealive_context + return ctx, client + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + ("tool_call", "expected_path", "expected_payload"), + [ + ( + lambda ctx: get_data_sources(ctx, query="checkout", ready_only=False), + "/api/tools/get_data_sources", + {"query": "checkout", "ready_only": False}, + ), + ( + lambda ctx: semantic_search( + ctx, + question="How does checkout authorization work?", + data_sources=["backend"], + paths=["src"], + extensions=".cs", + max_results=7, + exclude_markdown=True, + ), + "/api/tools/semantic_search", + { + "question": "How does checkout authorization work?", + "data_sources": ["backend"], + "paths": ["src"], + "extensions": [".cs"], + "max_results": 7, + "exclude_markdown": True, + }, + ), + ( + lambda ctx: grep_search(ctx, query="Authorize", data_sources="backend", regex=True), + "/api/tools/grep_search", + { + "query": "Authorize", + "data_sources": ["backend"], + "exclude_markdown": False, + "regex": True, + }, + ), + ( + lambda ctx: get_repository_ontology(ctx, data_source="backend"), + "/api/tools/get_repository_ontology", + {"data_source": "backend"}, + ), + ( + lambda ctx: get_file_tree(ctx, data_source="backend", path="src", max_depth=2), + "/api/tools/get_file_tree", + {"data_source": "backend", "path": "src", "max_depth": 2}, + ), + ( + lambda ctx: read_file(ctx, path="README.md", data_source="backend", start_line=1, end_line=20), + "/api/tools/read_file", + {"data_source": "backend", "path": "README.md", "start_line": 1, "end_line": 20}, + ), + ( + lambda ctx: fetch_artifacts(ctx, identifiers=["repo::src/Foo.cs::Foo"], data_source="backend"), + "/api/tools/fetch_artifacts", + {"identifiers": ["repo::src/Foo.cs::Foo"], "data_source": "backend"}, + ), + ( + lambda ctx: get_artifact_relationships( + ctx, + identifier="repo::src/Foo.cs::Foo", + profile="AllRelevant", + max_count_per_type=25, + data_source="backend", + ), + "/api/tools/get_artifact_relationships", + { + "identifier": "repo::src/Foo.cs::Foo", + "profile": "AllRelevant", + "max_count_per_type": 25, + "data_source": "backend", + }, + ), + ( + lambda ctx: get_artifact_query_schema(ctx, entity="files", include_examples=False), + "/api/tools/get_artifact_query_schema", + {"entity": "files", "include_examples": False}, + ), + ( + lambda ctx: query_artifact_metadata(ctx, statement="SELECT path FROM files LIMIT 5", data_sources=["backend"]), + "/api/tools/query_artifact_metadata", + {"statement": "SELECT path FROM files LIMIT 5", "data_sources": ["backend"]}, + ), + ( + lambda ctx: chat(ctx, question="Summarize repository startup flow.", data_sources=["backend"]), + "/api/tools/chat", + {"question": "Summarize repository startup flow.", "data_sources": ["backend"]}, + ), + ], +) +@patch("tools.tool_api.get_api_key_from_context") +async def test_mcp_tools_post_canonical_v3_payloads(mock_get_api_key, tool_call, expected_path, expected_payload): + mock_get_api_key.return_value = "test_key" + ctx, client = _context_with_response("done") + + result = await tool_call(ctx) + + assert result == "done" + call_args = client.post.call_args + assert call_args.args[0] == expected_path + assert call_args.kwargs["json"] == {**expected_payload, "output_format": "agentic"} + assert call_args.kwargs["headers"]["Authorization"] == "Bearer test_key" + assert call_args.kwargs["headers"]["X-CodeAlive-Integration"] == "mcp" + assert call_args.kwargs["headers"]["X-CodeAlive-Tool"] == expected_path.rsplit("/", 1)[1] + assert call_args.kwargs["headers"]["X-CodeAlive-Client"] == "fastmcp-v3" + + +@pytest.mark.asyncio +async def test_required_arguments_fail_before_network_call(): + ctx, client = _context_with_response() + + with pytest.raises(ToolError, match="question is required"): + await semantic_search(ctx, question="") + + with pytest.raises(ToolError, match="path is required"): + await read_file(ctx, path="") + + with pytest.raises(ToolError, match="identifiers is required"): + await fetch_artifacts(ctx, identifiers=[]) + + client.post.assert_not_called() + + +@pytest.mark.asyncio +async def test_local_bounds_validation_fail_before_network_call(): + ctx, client = _context_with_response() + + with pytest.raises(ToolError, match="max_results"): + await grep_search(ctx, query="Foo", max_results=501) + + with pytest.raises(ToolError, match="max_count_per_type"): + await get_artifact_relationships(ctx, identifier="repo::Foo", max_count_per_type=0) + + client.post.assert_not_called() diff --git a/src/tests/test_tool_metadata.py b/src/tests/test_tool_metadata.py index 1ec6e13..bf43fd0 100644 --- a/src/tests/test_tool_metadata.py +++ b/src/tests/test_tool_metadata.py @@ -18,13 +18,16 @@ async def test_all_tools_are_marked_read_only_with_titles(): expected_titles = { "chat": "Chat About Codebase", - "codebase_consultant": "Consult Codebase (Deprecated)", "get_data_sources": "List Data Sources", - "codebase_search": "Search Codebase (Deprecated)", "semantic_search": "Semantic Search", "grep_search": "Grep Search", + "get_repository_ontology": "Get Repository Ontology", + "get_file_tree": "Get File Tree", + "read_file": "Read File", "fetch_artifacts": "Fetch Artifacts", "get_artifact_relationships": "Inspect Artifact Relationships", + "get_artifact_query_schema": "Get ArtifactQuery Schema", + "query_artifact_metadata": "Query Artifact Metadata", } actual = {tool.name: tool for tool in tools} @@ -41,5 +44,3 @@ async def test_all_tools_are_marked_read_only_with_titles(): assert "exact artifact identifier" in relationships_description assert "not a search tool" in relationships_description assert "fetch_artifacts" in relationships_description - assert "excludes references" in relationships_description - assert "Mediated or dynamic frameworks" in relationships_description diff --git a/src/tools/__init__.py b/src/tools/__init__.py index 4ec565a..93654c5 100644 --- a/src/tools/__init__.py +++ b/src/tools/__init__.py @@ -1,18 +1,23 @@ """Tool implementations for CodeAlive MCP server.""" -from .artifact_relationships import get_artifact_relationships -from .chat import chat, codebase_consultant from .datasources import get_data_sources +from .search import grep_search, semantic_search +from .repository import get_file_tree, get_repository_ontology, read_file from .fetch_artifacts import fetch_artifacts -from .search import codebase_search, grep_search, semantic_search +from .artifact_relationships import get_artifact_relationships +from .artifact_query import get_artifact_query_schema, query_artifact_metadata +from .chat import chat __all__ = [ - 'chat', - 'codebase_consultant', 'get_data_sources', - 'fetch_artifacts', - 'codebase_search', 'semantic_search', 'grep_search', + 'get_repository_ontology', + 'get_file_tree', + 'read_file', + 'fetch_artifacts', 'get_artifact_relationships', + 'get_artifact_query_schema', + 'query_artifact_metadata', + 'chat', ] diff --git a/src/tools/artifact_query.py b/src/tools/artifact_query.py new file mode 100644 index 0000000..3a1877b --- /dev/null +++ b/src/tools/artifact_query.py @@ -0,0 +1,32 @@ +"""Tool API v3 ArtifactQuery tools.""" + +from typing import Optional, Union + +from fastmcp import Context + +from .tool_api import call_tool_api, normalize_optional_list, require_text + + +async def get_artifact_query_schema( + ctx: Context, + entity: Optional[str] = None, + include_examples: bool = True, +) -> str: + """Return the ArtifactQuery v1 schema, fields, operators, and examples.""" + return await call_tool_api(ctx, "get_artifact_query_schema", { + "entity": entity, + "include_examples": include_examples, + }, action_label="get artifact query schema") + + +async def query_artifact_metadata( + ctx: Context, + statement: str, + data_sources: Optional[Union[str, list[str]]] = None, +) -> str: + """Execute one bounded ArtifactQuery metadata statement.""" + require_text(statement, "query_artifact_metadata", "statement") + return await call_tool_api(ctx, "query_artifact_metadata", { + "statement": statement, + "data_sources": normalize_optional_list(data_sources), + }, action_label="query artifact metadata") diff --git a/src/tools/artifact_relationships.py b/src/tools/artifact_relationships.py index 54ab63f..ab764dd 100644 --- a/src/tools/artifact_relationships.py +++ b/src/tools/artifact_relationships.py @@ -1,391 +1,34 @@ -"""Artifact relationships tool implementation.""" +"""Tool API v3 artifact relationship expansion.""" -from typing import Any, Dict, List, Literal, Optional -from urllib.parse import urljoin +from typing import Literal, Optional -import httpx from fastmcp import Context from fastmcp.exceptions import ToolError -from loguru import logger -from core import CodeAliveContext, get_api_key_from_context, log_api_request, log_api_response -from utils import handle_api_error - -# MCP tool/method name surfaced in every error/log message from this module. -_TOOL_NAME = "get_artifact_relationships" - -# Map MCP profile names to backend enum values -PROFILE_MAP = { - "callsOnly": "CallsOnly", - "inheritanceOnly": "InheritanceOnly", - "allRelevant": "AllRelevant", - "referencesOnly": "ReferencesOnly", -} - -# Backend relationship type to MCP-friendly snake_case -RELATIONSHIP_TYPE_MAP = { - "OutgoingCalls": "outgoing_calls", - "IncomingCalls": "incoming_calls", - "Ancestors": "ancestors", - "Descendants": "descendants", - "References": "references", -} +from .tool_api import call_tool_api, require_text async def get_artifact_relationships( ctx: Context, identifier: str, - profile: Literal["callsOnly", "inheritanceOnly", "allRelevant", "referencesOnly"] = "callsOnly", + profile: Literal["CallsOnly", "InheritanceOnly", "AllRelevant", "ReferencesOnly"] = "CallsOnly", max_count_per_type: int = 50, data_source: Optional[str] = None, -) -> Dict[str, Any]: - """ - Retrieve relationship groups for a single artifact by profile. - - Use this tool to expand the relationship graph around one known artifact: - call graph edges, inheritance hierarchy, or references. - - Important usage rules: - - This is a graph expansion tool, not a search tool. The `identifier` - must be an exact artifact identifier returned by `semantic_search`, - `grep_search`, legacy `codebase_search`, or `fetch_artifacts`. - - Do not pass a repository name, file path, class name, method name, or - guessed symbol name unless it is the full identifier from a prior - tool result. - - If `found=false` or the backend returns a not-found/inaccessible - error, get a fresh identifier with `semantic_search`, `grep_search`, - `codebase_search`, or `fetch_artifacts` before retrying. Repeating - the same guessed identifier usually repeats the same failure. - - Relationships are primarily available for symbol artifacts such as - functions, methods, classes, and interfaces. Plain files and prose - documents can legitimately have no relationship graph. - - The response contains relationship metadata and short summaries, not - full source code. Use `fetch_artifacts` on returned identifiers when - exact source content is needed. - - Choose `profile` by artifact shape: `callsOnly` for function/method - callers and callees; `inheritanceOnly` for hierarchy; `allRelevant` - for calls plus inheritance only (it excludes references); - `referencesOnly` for where-used checks on types, containers, fields, - commands, events, interfaces, and other non-call usage. - - Mediated or dynamic frameworks such as command buses, event buses, - dependency injection, reflection, route binding, subscriptions, - schedulers, or generated dispatch may not expose a direct call edge. - When graph context is missing or insufficient, use targeted - `grep_search` for construction, registration, dispatch, route, - subscription, or scheduler text surfaced by source you've already read. - - If any relationship group has `truncated=true`, increase - `max_count_per_type` up to 1000 or narrow the investigation with a - more specific `profile`. - - Args: - identifier: Fully qualified artifact identifier from search or fetch results. - profile: Relationship profile to expand. One of: - - "callsOnly" (default): outgoing and incoming calls - - "inheritanceOnly": ancestors, descendants, implementations, and derived types - - "allRelevant": calls + inheritance only; references are excluded - - "referencesOnly": where-used LSP references for non-call usage - max_count_per_type: Maximum related artifacts per relationship type (1–1000, default 50). - data_source: Optional data-source Name or Id used to disambiguate an identifier that - exists in more than one data source. Copy the `dataSource.name` or - `dataSource.id` from a search result. Omit it for normal lookups; if the - source identifier is ambiguous and you omit it, the backend returns a 409 - listing the candidate data sources. - - Returns: - A dict with grouped relationships: - {"sourceIdentifier":"...","profile":"callsOnly","found":true, - "relationships":[ - {"type":"outgoing_calls","totalCount":57,"returnedCount":50,"truncated":true, - "items":[{"identifier":"...","filePath":"src/Data/Repo.cs","startLine":88, - "shortSummary":"Stores data"}]}, - {"type":"incoming_calls","totalCount":3,"returnedCount":3,"truncated":false, - "items":[{"identifier":"...","filePath":"src/Services/Worker.cs","startLine":142}]} - ]} +) -> str: + """Expand relationships around one exact artifact identifier. - When the artifact is not found or inaccessible: - {"sourceIdentifier":"...","profile":"callsOnly","found":false} + This is a graph expansion tool, not a search tool. Use identifiers returned + by semantic_search, grep_search, fetch_artifacts, read_file, or prior + relationship results. """ - tool_arguments = { + tool_name = "get_artifact_relationships" + require_text(identifier, tool_name, "identifier") + if not (1 <= max_count_per_type <= 1000): + raise ToolError(f"[{tool_name}] max_count_per_type must be between 1 and 1000.") + + return await call_tool_api(ctx, tool_name, { "identifier": identifier, "profile": profile, "max_count_per_type": max_count_per_type, "data_source": data_source, - } - - # Normalize the optional selector: treat empty/whitespace-only as "no selector" - # so we don't send a junk dataSource to the backend or echo it in the not-found hint. - # (tool_arguments above intentionally keeps the raw value for exact-invocation logging.) - if data_source is not None: - data_source = data_source.strip() or None - - if not identifier: - logger.bind(tool=_TOOL_NAME, tool_arguments=tool_arguments).warning( - "Tool validation failed: artifact identifier is required" - ) - raise ToolError(f"[{_TOOL_NAME}] Artifact identifier is required.") - - if not (1 <= max_count_per_type <= 1000): - logger.bind(tool=_TOOL_NAME, tool_arguments=tool_arguments).warning( - "Tool validation failed: max_count_per_type is out of range" - ) - raise ToolError(f"[{_TOOL_NAME}] max_count_per_type must be between 1 and 1000.") - - # Literal type handles most validation via Pydantic, but direct callers - # (e.g. unit tests) can still pass invalid values — keep as fallback. - api_profile = PROFILE_MAP.get(profile) - if api_profile is None: - supported = ", ".join(PROFILE_MAP.keys()) - logger.bind(tool=_TOOL_NAME, tool_arguments=tool_arguments).warning( - "Tool validation failed: unsupported relationship profile" - ) - raise ToolError(f'[{_TOOL_NAME}] Unsupported profile "{profile}". Use one of: {supported}') - - context: CodeAliveContext = ctx.request_context.lifespan_context - - try: - api_key = get_api_key_from_context(ctx) - headers = { - "Authorization": f"Bearer {api_key}", - "X-CodeAlive-Integration": "mcp", - "X-CodeAlive-Tool": "get_artifact_relationships", - "X-CodeAlive-Client": "fastmcp", - } - - body = { - "identifier": identifier, - "profile": api_profile, - "maxCountPerType": max_count_per_type, - } - if data_source: - body["dataSource"] = data_source - - await ctx.debug(f"Fetching {profile} relationships for artifact") - - full_url = urljoin(context.base_url, "/api/search/artifact-relationships") - request_id = log_api_request("POST", full_url, headers, body=body) - - response = await context.client.post( - "/api/search/artifact-relationships", json=body, headers=headers - ) - - log_api_response(response, request_id) - response.raise_for_status() - - return _build_relationships_dict(response.json(), data_source=data_source) - - except (httpx.HTTPStatusError, Exception) as e: - logger.bind( - tool=_TOOL_NAME, - tool_arguments=tool_arguments, - error_type=type(e).__name__, - error=str(e), - ).warning("Tool call failed while fetching artifact relationships") - await handle_api_error( - ctx, e, "get artifact relationships", method=_TOOL_NAME, - recovery_hints={ - 404: ( - "(1) verify the identifier came from a recent semantic_search, grep_search, codebase_search, or fetch_artifacts result, " - "(2) call semantic_search or grep_search again to get a fresh identifier — the index may have changed, " - "(3) check that the artifact is a function/class (relationships are not available for non-symbol artifacts)" - ), - 409: ( - "(1) the identifier exists in more than one data source — see the candidate data sources in the Detail above; each one will resolve, " - "(2) retry get_artifact_relationships with data_source set to one candidate's Name or Id; if that data source isn't the one you want, retry with the next candidate, " - "(3) do NOT invent relation results — pick from the listed data sources" - ), - }, - ) - - -def _build_relationships_dict(data: dict, data_source: Optional[str] = None) -> Dict[str, Any]: - """Build a dict representation of an artifact relationships response. - - FastMCP serializes the dict via pydantic_core.to_json, which preserves UTF-8 — - don't reintroduce json.dumps here, it would re-escape non-ASCII identifiers. - - ``data_source`` is the selector the caller passed (if any); when the source is not - found it shapes the recovery hint so the agent can retry with another data source - or drop the selector. - """ - raw_source_id = data.get("sourceIdentifier") or "" - raw_profile = data.get("profile") or "" - found = bool(data.get("found", False)) - - # Map profile back to MCP-friendly name - mcp_profile = raw_profile - for mcp_name, api_name in PROFILE_MAP.items(): - if api_name == raw_profile: - mcp_profile = mcp_name - break - - payload: Dict[str, Any] = { - "sourceIdentifier": raw_source_id, - "profile": mcp_profile, - "found": found, - } - - if found: - relationships = data.get("relationships") or [] - groups = [_build_group(group) for group in relationships] - payload["relationships"] = groups - - counts = _build_counts(data.get("availableRelationshipCounts")) - if counts is not None: - payload["availableRelationshipCounts"] = counts - payload["hint"] = _build_relationship_hint(found, mcp_profile, groups, counts, data_source) - else: - payload["hint"] = _build_relationship_hint(found, mcp_profile, [], None, data_source) - - return payload - - -def _build_group(group: dict) -> Dict[str, Any]: - """Build the JSON representation of a single relationship group.""" - relationship_type = group.get("relationType", "") - mcp_type = RELATIONSHIP_TYPE_MAP.get(relationship_type, relationship_type.lower()) - - items: List[Dict[str, Any]] = [] - for item in group.get("items", []) or []: - item_dict: Dict[str, Any] = {"identifier": item.get("identifier") or ""} - - file_path = item.get("filePath") - if file_path is not None: - item_dict["filePath"] = file_path - - start_line = item.get("startLine") - if start_line is not None: - item_dict["startLine"] = start_line - - short_summary = item.get("shortSummary") - if short_summary is not None: - item_dict["shortSummary"] = short_summary - - items.append(item_dict) - - return { - "type": mcp_type, - "totalCount": group.get("totalCount") or 0, - "returnedCount": group.get("returnedCount") or 0, - "truncated": bool(group.get("truncated")), - "items": items, - } - - -def _build_counts(counts: Any) -> Dict[str, int] | None: - """Preserve backend relationship counts that guide profile recovery.""" - if not isinstance(counts, dict): - return None - - return { - "outgoingCalls": int(counts.get("outgoingCalls") or counts.get("OutgoingCalls") or 0), - "incomingCalls": int(counts.get("incomingCalls") or counts.get("IncomingCalls") or 0), - "ancestors": int(counts.get("ancestors") or counts.get("Ancestors") or 0), - "descendants": int(counts.get("descendants") or counts.get("Descendants") or 0), - "references": int(counts.get("references") or counts.get("References") or 0), - } - - -def _build_relationship_hint( - found: bool, - profile: str, - groups: List[Dict[str, Any]], - counts: Dict[str, int] | None, - data_source: Optional[str] = None, -) -> str: - """Give model-facing next-step guidance for graph traversal results.""" - if not found: - if data_source: - return ( - f'No relationship data was found for this identifier in data source "{data_source}". ' - "The identifier may belong to a different data source, or the data_source value may be " - "wrong. Try: re-run with data_source set to a different candidate (use the `dataSource` " - "name or id from your search results, or call get_data_sources), or omit data_source " - "entirely — if the identifier is ambiguous you then get a 409 listing the candidate data " - "sources. Otherwise re-run semantic_search or grep_search to get a fresh identifier." - ) - return ( - "No relationship data was found for this identifier. Verify that the identifier came from " - "a recent search/fetch result and points to a symbol-level artifact; otherwise re-run " - "semantic_search or grep_search to get a fresh identifier." - ) - - if any(group["truncated"] for group in groups): - return ( - "Some relationship groups are truncated. If the user asked for all usages or full graph " - "scope, call get_artifact_relationships again with a higher max_count_per_type, then " - "fetch promising related artifacts before making broad claims." - ) - - if all(group["totalCount"] == 0 for group in groups): - return _build_empty_profile_hint(profile, counts) - - return ( - "Fetch promising related artifacts before making claims about behavior, concrete applications, " - "or how broadly this mechanism is used." - ) - - -def _build_empty_profile_hint(profile: str, counts: Dict[str, int] | None) -> str: - has_calls = (counts or {}).get("outgoingCalls", 0) > 0 or (counts or {}).get("incomingCalls", 0) > 0 - has_inheritance = (counts or {}).get("ancestors", 0) > 0 or (counts or {}).get("descendants", 0) > 0 - has_references = (counts or {}).get("references", 0) > 0 - - if profile == "referencesOnly" and has_calls and has_inheritance: - return ( - "No references were found for this profile, but call and inheritance relationships exist. " - "Use callsOnly for function/method callers or callees, or inheritanceOnly for base classes, " - "interfaces, overrides, implementations, or derived types." - ) - if profile == "referencesOnly" and has_calls: - return ( - "No references were found for this profile, but call relationships exist. Use callsOnly " - "for function/method callers or callees. Use referencesOnly for where-used checks on " - "types, containers, fields, commands, events, interfaces, and other non-call usage." - ) - if profile == "referencesOnly" and has_inheritance: - return ( - "No references were found for this profile, but inheritance relationships exist. Use " - "inheritanceOnly for base classes, interfaces, overrides, implementations, or derived types." - ) - if profile == "callsOnly" and has_references and has_inheritance: - return ( - "No call relationships were found for this profile, but references and inheritance " - "relationships exist. Try referencesOnly for where-used checks or inheritanceOnly for hierarchy." - ) - if profile == "callsOnly" and has_references: - return ( - "No call relationships were found for this profile, but references exist. Use referencesOnly " - "for where-used checks on types, containers, fields, commands, events, interfaces, or mediated dispatch symbols." - ) - if profile == "callsOnly" and has_inheritance: - return ( - "No call relationships were found for this profile, but inheritance relationships exist. " - "Use inheritanceOnly for base classes, interfaces, overrides, implementations, or derived types." - ) - if profile == "allRelevant" and has_references: - return ( - "No calls or inheritance relationships were found for allRelevant. allRelevant excludes " - "references by design; use referencesOnly for where-used checks." - ) - if profile == "inheritanceOnly" and has_calls and has_references: - return ( - "No inheritance relationships were found for this profile. Use callsOnly for function " - "callers/callees, or referencesOnly for where-used checks on types, commands, events, fields, containers, or interfaces." - ) - if profile == "inheritanceOnly" and has_calls: - return ( - "No inheritance relationships were found for this profile, but call relationships exist. " - "Use callsOnly for function/method callers or callees." - ) - if profile == "inheritanceOnly" and has_references: - return ( - "No inheritance relationships were found for this profile, but references exist. Use " - "referencesOnly for where-used checks on types, containers, fields, commands, events, interfaces, or mediated dispatch symbols." - ) - - return ( - "No relationships were found for this profile. Empty profile results do not mean the artifact " - "has no graph data. Use callsOnly for function/method callers and callees, inheritanceOnly for " - "hierarchy, allRelevant for calls plus inheritance, and referencesOnly for where-used checks on " - "types, containers, fields, commands, events, interfaces, and other non-call usage." - ) + }, action_label="get artifact relationships") diff --git a/src/tools/chat.py b/src/tools/chat.py index 804ab25..ca42949 100644 --- a/src/tools/chat.py +++ b/src/tools/chat.py @@ -1,338 +1,26 @@ -"""Chat completions tool implementation. +"""Tool API v3 stateless chat.""" -The canonical MCP tool name is ``chat``. ``codebase_consultant`` remains as a -deprecated alias for backward compatibility. -""" +from typing import Optional, Union -import json -import re -from typing import Dict, List, Optional, Union -from urllib.parse import urljoin - -import httpx from fastmcp import Context -from fastmcp.exceptions import ToolError - -from core import CodeAliveContext, get_api_key_from_context, log_api_request, log_api_response -from utils import handle_api_error, format_validation_error, format_data_source_names, normalize_data_source_names -_PRIMARY_TOOL_NAME = "chat" -_LEGACY_TOOL_NAME = "codebase_consultant" -_OBJECT_ID_RE = re.compile(r"^[0-9a-fA-F]{24}$") +from .tool_api import call_tool_api, normalize_optional_list, require_text async def chat( ctx: Context, question: str, - data_sources: Optional[Union[str, List[str]]] = None, - conversation_id: Optional[str] = None, -) -> str: - """ - Ask CodeAlive for a synthesized answer about the indexed codebase. - - **IMPORTANT: Do NOT call this tool unless the user explicitly asks for it** - (e.g. "use chat", "use codebase_consultant", "call the chat tool"). - For all other tasks — finding code, understanding architecture, debugging — - use `semantic_search`, `grep_search`, `fetch_artifacts`, and - `get_artifact_relationships` instead. These tools are faster, return - primary evidence, and give you full control over the workflow. - - `chat` is a slow synthesis fallback (up to 30 seconds) with lower evidence - fidelity. It exists for cases where the user wants a single opinionated - answer from CodeAlive rather than raw search results. - - **PREREQUISITE**: You MUST call `get_data_sources` FIRST to discover available data source names, - UNLESS the user has explicitly provided specific data source names OR you are continuing an - existing conversation with a `conversation_id`. - - When invoked by the user, this tool can produce synthesized answers about - architecture, design decisions, code walkthroughs, debugging, etc. - These topics do NOT by themselves justify calling the tool — only an - explicit user request does. - - Args: - question: What you want to know about the codebase - Example: "How does the authentication system work?" - - data_sources: Repository or workspace names to analyze. These names are - resolved to IDs on the server side. - Example: ["enterprise-platform", "workspace:payments-team"] - - conversation_id: Continue a previous consultation session. Must be the - 24-character hex Mongo ObjectId returned by a previous - response. - Example: "69fceb3e7b2a6a7efdd18180" - - Returns: - Synthesized analysis and explanation addressing your question. - - Examples: - 1. Ask about architecture: - chat( - question="What's the best way to add caching to our API?", - data_sources=["repo123"] - ) - - 2. Understand implementation: - chat( - question="How do the microservices communicate?", - data_sources=["platform", "payments"] - ) - - 3. Continue a consultation: - chat( - question="What about error handling in that flow?", - conversation_id="69fceb3e7b2a6a7efdd18180" - ) - - Note: - - `chat` is disabled by default; see top of docstring for the single - condition that permits calling it. - - Either conversation_id OR data_sources is typically provided - - When creating a new conversation, data_sources is optional if your API key has exactly one assigned data source - - When continuing a conversation, conversation_id is required to maintain context - - The tool maintains full conversation history for follow-up questions - - Choose workspace names for broad architectural questions or repository names for specific implementation details - """ - return await _chat_impl( - ctx, - question=question, - data_sources=data_sources, - conversation_id=conversation_id, - method_name=_PRIMARY_TOOL_NAME, - ) - - -async def codebase_consultant( - ctx: Context, - question: str, - data_sources: Optional[Union[str, List[str]]] = None, - conversation_id: Optional[str] = None, + data_sources: Optional[Union[str, list[str]]] = None, ) -> str: - """Deprecated alias for `chat`. - - Keep this for backward compatibility with older prompts and MCP clients. - New integrations should prefer the canonical `chat` tool name. + """Ask stateless CodeAlive chat through Tool API v3. - **Same invocation policy as `chat`**: do NOT call unless the user explicitly - named the tool (e.g. "use codebase_consultant", "use chat", "call the chat tool"). - For all other tasks use `semantic_search`, `grep_search`, `fetch_artifacts`, - and `get_artifact_relationships`. + Call only when the user explicitly asks for chat/synthesis. Tool API v3 + chat does not preserve public conversation context across calls; include all + important prior findings, artifact identifiers, assumptions, scope, and + constraints in each `question`. """ - return await _chat_impl( - ctx, - question=question, - data_sources=data_sources, - conversation_id=conversation_id, - method_name=_LEGACY_TOOL_NAME, - ) - - -async def _chat_impl( - ctx: Context, - *, - question: str, - data_sources: Optional[Union[str, List[str]]], - conversation_id: Optional[str], - method_name: str, -) -> str: - context: CodeAliveContext = ctx.request_context.lifespan_context - - # Normalize data source names (handles Claude Desktop serialization issues) - data_sources = normalize_data_source_names(data_sources) - - if not question or not question.strip(): - raise ToolError(format_validation_error( - method_name, - "No question provided. Please provide a question to ask the chat tool.", - )) - - if conversation_id and not _OBJECT_ID_RE.match(conversation_id): - raise ToolError(format_validation_error( - method_name, - f"conversation_id {conversation_id!r} is not a 24-character hex Mongo ObjectId. " - "Retry: no — fix the input. Try: pass the Conversation ID returned by an earlier " - "successful chat response, or omit conversation_id to start a new conversation.", - )) - - # Validate that either conversation_id or data_sources is provided - if not conversation_id and (not data_sources or len(data_sources) == 0): - await ctx.info("No data sources provided. If the API key has exactly one assigned data source, that will be used as default.") - await ctx.info( - f"[{method_name}] This synthesized call can take up to 30 seconds. " - "Prefer semantic_search and grep_search for default discovery." - ) - - # Transform simple question into message format internally - messages = [{"role": "user", "content": question}] - - # Prepare the request payload - request_data = { - "messages": messages, - "stream": True # Always stream internally for efficiency - } - - if conversation_id: - request_data["conversationId"] = conversation_id - - if data_sources: - request_data["names"] = format_data_source_names(data_sources) - - try: - api_key = get_api_key_from_context(ctx) - - # Log the attempt - await ctx.info(f"Consulting about: '{question[:100]}...'" if len(question) > 100 else f"Consulting about: '{question}'" + - (f" (continuing conversation {conversation_id})" if conversation_id else "")) - - headers = { - "Authorization": f"Bearer {api_key}", - "Accept": "text/event-stream, application/problem+json", - "X-CodeAlive-Integration": "mcp", - "X-CodeAlive-Tool": method_name, - "X-CodeAlive-Client": "fastmcp", - } - - # Log the request - full_url = urljoin(context.base_url, "/api/chat/completions") - request_id = log_api_request("POST", full_url, headers, body=request_data) - - # Make API request - response = await context.client.post( - "/api/chat/completions", - json=request_data, - headers=headers - ) - - # Log the response - log_api_response(response, request_id) - - response.raise_for_status() - - # Process streaming response - we always stream internally for efficiency - full_response = "" - conversation_metadata = {} - current_event_name = "message" - - try: - async for line in response.aiter_lines(): - line = line.rstrip("\r") - if not line: - current_event_name = "message" - continue - - # Handle metadata events - if line.startswith("event:"): - current_event_name = line[len("event:"):].strip() or "message" - continue - - if line.startswith("data:"): - data = line[len("data:"):].lstrip() - if data == "[DONE]": - break - try: - chunk = json.loads(data) - - if current_event_name == "error" and chunk.get("event") is None: - raise ToolError(_format_stream_error(method_name, chunk, conversation_metadata)) - - if chunk.get("event") == "error": - raise ToolError(_format_stream_error(method_name, chunk, conversation_metadata)) - - # Capture metadata with conversation ID and message ID - if chunk.get("event") == "metadata": - conv_id = chunk.get("conversationId") - msg_id = chunk.get("messageId") - if conv_id or msg_id: - conversation_metadata = chunk - await ctx.info(f"Conversation ID: {conv_id}, Message ID: {msg_id}") - continue - - # Process content chunks - if "choices" in chunk and len(chunk["choices"]) > 0: - delta = chunk["choices"][0].get("delta", {}) - if delta and "content" in delta and delta["content"] is not None: - full_response += delta["content"] - except json.JSONDecodeError: - pass - except ToolError: - raise - except Exception as streaming_error: - # Include conversation and message IDs in streaming error response - error_context = _format_metadata_context(conversation_metadata) - error_msg = ( - f"[{method_name}] Error during streaming: {str(streaming_error)}" - ) - await ctx.error(error_msg) - raise ToolError(f"{error_msg} {error_context}") - - # Append conversation ID info to the response if we got one and it's a new conversation - if conversation_metadata.get("conversationId") and not conversation_id: - conversation_id_note = f"\n\n---\n**Conversation ID:** `{conversation_metadata['conversationId']}`\n*Use this ID in the `conversation_id` parameter to continue this conversation.*" - full_response += conversation_id_note - - return full_response or "No content returned from the API. Please check that your data sources are accessible and try again." - - except ToolError: - raise - except (httpx.HTTPStatusError, Exception) as e: - await handle_api_error( - ctx, e, "chat completion", method=method_name, - recovery_hints={ - 404: ( - "(1) if continuing a conversation, verify conversation_id matches one returned by an earlier call, " - "(2) if starting a new conversation, call get_data_sources to list valid data source names, " - "(3) drop conversation_id and data_sources to fall back to the API key's default" - ), - }, - ) - - -def _format_metadata_context(metadata: Dict) -> str: - """Format conversation metadata for error messages.""" - if not metadata: - return "" - - parts = [] - if metadata.get("conversationId"): - parts.append(f"Conversation ID: {metadata['conversationId']}") - if metadata.get("messageId"): - parts.append(f"Message ID: {metadata['messageId']}") - - if parts: - return f"\n\n---\n**Debug Info:**\n" + "\n".join(f"- {p}" for p in parts) - return "" - - -def _format_stream_error(method_name: str, payload: Dict, metadata: Dict) -> str: - """Format an in-stream SSE error frame into an agent-actionable ToolError.""" - status = payload.get("status") - code = str(status or payload.get("code") or "STREAM_ERROR") - message = payload.get("message") or payload.get("detail") or payload.get("title") or "Streaming error" - detail = payload.get("detail") or payload.get("details") - request_id = payload.get("requestId") or payload.get("traceId") - - try: - status_int = int(status) if status is not None else None - except (TypeError, ValueError): - status_int = None - - if status_int in {408, 425, 429}: - retry = "Retry: yes (back off before retrying)" - hint = "Try: (1) wait 30-60 seconds before retrying, (2) reduce request frequency if this repeats" - elif status_int is not None and status_int >= 500: - retry = "Retry: yes (retry once after a few seconds)" - hint = "Try: (1) retry the call once, (2) if it fails again, stop retrying and report the requestId" - else: - retry = "Retry: no — fix the input or credentials, do not loop" - hint = "Try: inspect the detail/requestId below and adjust the chat request before retrying" - - extras = [] - if detail and detail != message: - extras.append(f"Detail: {detail}") - if request_id: - extras.append(f"requestId={request_id}") - metadata_context = _format_metadata_context(metadata) - suffix = f" ({' | '.join(extras)})" if extras else "" - - return f"[{method_name}] Error: {message}. Code: {code}. {retry}. {hint}{suffix}{metadata_context}" + require_text(question, "chat", "question") + return await call_tool_api(ctx, "chat", { + "question": question, + "data_sources": normalize_optional_list(data_sources), + }, action_label="chat") diff --git a/src/tools/datasources.py b/src/tools/datasources.py index 672bf7a..b21c9ae 100644 --- a/src/tools/datasources.py +++ b/src/tools/datasources.py @@ -1,235 +1,23 @@ -"""Data sources tool implementation.""" +"""Tool API v3 data-source discovery.""" -from typing import Any, Dict -from urllib.parse import urljoin +from typing import Optional -import httpx from fastmcp import Context -from core import ( - CodeAliveContext, - get_api_key_from_context, - log_api_request, - log_api_response, -) -from utils import handle_api_error +from .tool_api import call_tool_api -# MCP tool/method name surfaced in every error/log message from this module. -_TOOL_NAME = "get_data_sources" -# Pre-filter scoped candidate count, emitted by the backend only on relevance-filtered requests. -_TOTAL_HEADER = "X-CodeAlive-Total-Data-Sources" - - -def _relevance_message(data_sources: list, response) -> str: - """Builds the hint accompanying a query'd (relevance-filtered) result. - - The backend guarantees every relevance-selected item carries a non-empty `relevanceReason`, - so a query'd response where NO item has one means the filter did not run (fail-open on error, - disabled by config, or an older backend ignoring `query`) and the FULL list was returned — - the model must be told, instead of mistaking the full dump for a relevant shortlist. - """ - filtered = any(ds.get("relevanceReason") for ds in data_sources) - if not filtered: - return ( - "Relevance filtering was unavailable for this request (it may have failed or be " - "disabled), so the FULL unfiltered list of data sources is returned." - ) - - shown = len(data_sources) - try: - total = int(response.headers.get(_TOTAL_HEADER)) - except (TypeError, ValueError): - # Header absent (TypeError on int(None)) or malformed (ValueError). - total = None - if total is not None and total > shown: - return ( - f"{shown} of {total} available data sources are relevant to this query; the other " - f"{total - shown} were omitted. Call get_data_sources without a query to get the full list." - ) - if total is not None: - return f"All {total} available data sources are relevant to this query." - return ( - "Only the data sources relevant to this query are shown; non-relevant sources were " - "omitted. Call get_data_sources without a query to get the full list." - ) - - -# Hint embedded in every successful response. Mirrors the convention used by -# the search tools (see _SEARCH_HINT in utils/response_transformer.py): the -# response is always in front of the model when it picks the next step, so we -# repeat the most load-bearing usage rule here instead of relying on the -# tool's docstring being re-read mid-conversation. -_DATASOURCES_HINT = ( - "Use the `name` field as the `data_sources` parameter for `semantic_search`, " - "`grep_search`, or `chat`. To identify the CURRENT repository (vs external), " - "compare `name`/`description`/`url` against your working directory and the " - "code you've already observed." -) - -_DATASOURCES_EMPTY_HINT = ( - "No data sources found. Add a repository or workspace to CodeAlive at " - "https://app.codealive.ai before calling search or chat tools. If you " - "expected sources here, retry with alive_only=false to surface ones still " - "being indexed." -) - -# Empty result WITH a query means "nothing relevant to this intent" (sources DO exist) — -# a distinct hint from the no-sources-at-all case, so the model doesn't tell the user -# to add a repository. -_DATASOURCES_EMPTY_QUERY_HINT = ( - "No data sources are relevant to this query. Try a broader query, or call " - "get_data_sources without a query to see the full list." -) - - -# alive_only refers to ready_only. leaved as is for backward compatibility. async def get_data_sources( - ctx: Context, alive_only: bool = True, query: str | None = None -) -> Dict[str, Any]: + ctx: Context, + query: Optional[str] = None, + ready_only: bool = True, +) -> str: + """List visible repositories and workspaces. + + Use the returned `name` value for `data_sources` or `data_source` in other + v3 tools unless automation needs a stable `id`. """ - **CALL THIS FIRST**: Gets all available data sources (repositories and workspaces) for the user's account. - - This tool MUST be called BEFORE using `semantic_search`, `grep_search`, or - `chat` to discover available data source names, UNLESS the user - has explicitly provided data source names. - - A data source is a code repository or workspace that has been indexed by CodeAlive - and can be used for code search and chat completions. - - Args: - alive_only: If True (default), returns only data sources that are fully processed and ready for use. - If False, returns all data sources regardless of processing state. - query: Optional. The user's initial intent/task in natural language (e.g. "add OAuth to - checkout"). When provided, the backend runs an agentic relevance filter and returns - ONLY the data sources relevant to that intent, each with a `relevanceReason` - explaining why. This is the user's GOAL — distinct from `searchTerm` (a substring - name filter). Omit it to get the full list. Pass it whenever you - know what the user is trying to accomplish, to keep the returned list focused. - - Returns: - {"dataSources": [...], "hint": "..."} - - With `query`, the object also carries a `message` field telling you whether sources - were omitted as non-relevant (and how many of the total), that every available source - was relevant, or that relevance filtering was unavailable and the FULL list is returned. - - Each entry in `dataSources` carries: - - id: Unique identifier for the data source - - name: Human-readable name - CRITICAL for matching with current working directory name - - description: Summary of codebase contents - CRITICAL for identifying if this matches your - current working codebase (compare tech stack, architecture, features you've observed) - - type: The type of data source ("Repository" or "Workspace") - - url: Repository URL (for Repository type only) - useful for matching with git remote - - state: The processing state of the data source (if alive_only=false) - - relevanceReason: Why this source is relevant to `query` (present ONLY when `query` was supplied) - - The `hint` field reminds you how to use the result and how to distinguish - the CURRENT repository from EXTERNAL ones. - - Use name + description + url together to determine if a repository is the CURRENT one - you're working in versus an EXTERNAL repository. - - Examples: - 1. Get only ready-to-use data sources: - get_data_sources() - - 2. Get all data sources including those still processing: - get_data_sources(alive_only=false) - - Note: - Ready data sources are fully processed and available for search and chat. - Other states include "New" (just added), "Processing" (being indexed), - "Failed" (indexing failed), etc. - - CRITICAL - Use ALL available information to identify CURRENT vs EXTERNAL repositories: - - Heuristic signals to combine (in order of reliability): - 1. **Name matching**: Does repo name match your current working directory name? - Example: In "/Users/bob/my-app" and repo name is "my-app" → CURRENT - - 2. **Description matching**: Does description match what you've observed in the codebase? - - Tech stack (Python, JavaScript, FastAPI, React, etc.) - - Architecture patterns (microservices, monolith, MCP server, etc.) - - Key features mentioned - Example: Description says "FastAPI MCP server" and you see FastAPI + MCP code → CURRENT - - 3. **User context**: What is the user asking about? - - "this repo", "our code", "my project" → CURRENT - - "the payments service", "external API" → EXTERNAL - - 4. **URL matching** (when available): Compare with git remote URL - Note: May have format differences (SSH vs HTTPS), but hostname + path should match - - 5. **Working history**: Have you been reading/editing files that align with this repo? - - Use the returned data source names with `semantic_search`, `grep_search`, - `codebase_search` (legacy), `chat`, and `codebase_consultant` (legacy). - """ - context: CodeAliveContext = ctx.request_context.lifespan_context - - try: - api_key = get_api_key_from_context(ctx) - - # Determine the endpoint based on ready_only flag - endpoint = "/api/datasources/ready" if alive_only else "/api/datasources/all" - - headers = { - "Authorization": f"Bearer {api_key}", - "X-CodeAlive-Integration": "mcp", - "X-CodeAlive-Tool": "get_data_sources", - "X-CodeAlive-Client": "fastmcp", - } - - # Thread the user's intent as the `query` param when present so the backend relevance - # filter runs. Omitted entirely otherwise, so the request is unchanged for legacy callers - # (and an older backend that ignores `query` simply returns the full list). - params = {"query": query} if query else None - - # Log the request - full_url = urljoin(context.base_url, endpoint) - request_id = log_api_request("GET", full_url, headers) - - # Make API request - response = await context.client.get(endpoint, headers=headers, params=params) - - # Log the response - log_api_response(response, request_id) - - response.raise_for_status() - - # Parse and format the response - data_sources = response.json() - - if not data_sources or len(data_sources) == 0: - hint = _DATASOURCES_EMPTY_QUERY_HINT if query else _DATASOURCES_EMPTY_HINT - return {"dataSources": [], "hint": hint} - - # Remove repositoryIds from workspace data sources - for data_source in data_sources: - if ( - data_source.get("type") == "Workspace" - and "repositoryIds" in data_source - ): - del data_source["repositoryIds"] - - # FastMCP serializes via pydantic_core.to_json, which preserves UTF-8. - result: Dict[str, Any] = {"dataSources": data_sources, "hint": _DATASOURCES_HINT} - if query: - result["message"] = _relevance_message(data_sources, response) - return result - - except (httpx.HTTPStatusError, Exception) as e: - await handle_api_error( - ctx, - e, - "retrieving data sources", - method=_TOOL_NAME, - recovery_hints={ - # 422 means *some* sources are still indexing — surface alive_only=false as the next step - 422: ( - "(1) call get_data_sources(alive_only=false) to see which sources are still being processed, " - "(2) wait a few minutes for indexing to complete and retry" - ), - }, - ) + return await call_tool_api(ctx, "get_data_sources", { + "query": query, + "ready_only": ready_only, + }, action_label="list data sources") diff --git a/src/tools/fetch_artifacts.py b/src/tools/fetch_artifacts.py index ef8f28e..087fdf9 100644 --- a/src/tools/fetch_artifacts.py +++ b/src/tools/fetch_artifacts.py @@ -1,358 +1,26 @@ -"""Fetch artifacts tool implementation.""" +"""Tool API v3 artifact fetch.""" -from typing import List, Optional, Union -from urllib.parse import urljoin +from typing import Optional, Union -import httpx from fastmcp import Context from fastmcp.exceptions import ToolError -from core import CodeAliveContext, get_api_key_from_context, log_api_request, log_api_response -from utils import coerce_stringified_list, handle_api_error - -# MCP tool/method name surfaced in every error/log message from this module. -_TOOL_NAME = "fetch_artifacts" - -# Emitted alongside a block so the agent never silently drops a requested -# artifact. Lists the concrete missing identifiers (in the block) and tells the agent to -# re-check those ids and retry the problematic ones. Parallel to search.py's _SEARCH_EMPTY_HINT. -_NOT_FOUND_HINT = ( - "{count} requested identifier(s) returned no accessible artifact and are listed under " - " above. Do NOT silently omit them from your answer. A entry means " - "the identifier did not resolve, or points outside the data sources this key can read — it " - "is NOT proof the code is absent. Required next steps: (1) re-check those exact identifiers " - "for typos or staleness; (2) re-run semantic_search or grep_search to obtain fresh, valid " - "identifiers, then call fetch_artifacts again for those problematic ids; (3) if they still " - "cannot be retrieved, explicitly tell the user which artifacts could not be fetched — do not " - "pretend they don't exist." -) +from .tool_api import call_tool_api, normalize_optional_list async def fetch_artifacts( ctx: Context, - identifiers: Union[str, List[str]], + identifiers: Union[str, list[str]], data_source: Optional[str] = None, ) -> str: - """ - Retrieve the full content of code artifacts by their identifiers. - - Use this tool AFTER `semantic_search`, `grep_search`, or legacy `codebase_search` - to get the complete source code for results you need to inspect. The `identifier` - values come from the search results. - - This is the recommended way to retrieve content for **external repositories** that - you cannot access via local file reads. For repositories in your working directory, - prefer using `Read()` on the local files instead. - - Args: - identifiers: List of artifact identifiers from search results (max 20). - These are the `identifier` attribute values from `semantic_search`, - `grep_search`, or legacy `codebase_search` results. - - Identifier format examples: - Symbol: "my-org/backend::src/services/auth.py::AuthService.validate_token(token: str)" - File: "my-org/backend::src/services/auth.py" - Chunk: "my-org/backend::README.md::0042" - - data_source: Optional data-source Name or Id used to disambiguate an identifier that - exists in more than one data source. Copy the `dataSource.name` or - `dataSource.id` from the search result you want. Omit it for normal lookups; - if an identifier is ambiguous and you omit it, the backend returns a 409 - listing the candidate data sources. - - Returns: - XML with full content and call relationships for each found artifact: - - - numbered source code - - - - - - - - - - - - Requested identifiers the backend could not resolve (or that are outside your - access scope) are NOT dropped silently — they are listed in a - block with each concrete identifier, plus a hint to re-check - those ids and retry the problematic ones. - The `` element shows the artifact's call graph: - - **outgoing_calls**: functions this artifact calls (its dependencies) - - **incoming_calls**: functions that call this artifact (its blast radius) - Each shows up to 3 related artifacts with summaries. The `count` attribute - gives the total. Relationships are omitted for non-function artifacts. - - Note: - - Hard limit: 50 identifiers per request. Recommended: ≤20 to keep - context size manageable and avoid flooding the conversation with code. - - Identifiers must come from `semantic_search`, `grep_search`, or legacy `codebase_search` results. - - Relationships shown here are a **preview** (up to 3 call relationships per direction). - To retrieve the complete list, or to explore other relationship types - (inheritance, references), use `get_artifact_relationships`. - """ - # Coerce stringified JSON arrays sent by some MCP clients (Claude Code - # deferred tools, LiveKit agents, etc.) into a proper Python list. - identifiers = coerce_stringified_list(identifiers) - - # Normalize the optional selector: treat empty/whitespace-only as "no selector" - # so we don't send a junk dataSource to the backend or echo it in the not-found hint. - if data_source is not None: - data_source = data_source.strip() or None - - if not identifiers: - raise ToolError(f"[{_TOOL_NAME}] At least one identifier is required.") - - if len(identifiers) > 50: - raise ToolError(f"[{_TOOL_NAME}] Maximum 50 identifiers per request. Please reduce the number of identifiers.") - - context: CodeAliveContext = ctx.request_context.lifespan_context - - try: - api_key = get_api_key_from_context(ctx) - headers = { - "Authorization": f"Bearer {api_key}", - "X-CodeAlive-Integration": "mcp", - "X-CodeAlive-Tool": "fetch_artifacts", - "X-CodeAlive-Client": "fastmcp", - } - - body = {"identifiers": identifiers} - if data_source: - body["dataSource"] = data_source - - await ctx.info(f"Fetching {len(identifiers)} artifact(s)") - - # Log the request - full_url = urljoin(context.base_url, "/api/search/artifacts") - request_id = log_api_request("POST", full_url, headers, body=body) - - # Make API request - response = await context.client.post( - "/api/search/artifacts", json=body, headers=headers - ) - - # Log the response - log_api_response(response, request_id) - - response.raise_for_status() - - artifacts_data = response.json() - - # Build XML output - return _build_artifacts_xml(artifacts_data, data_source=data_source, requested=identifiers) - - except (httpx.HTTPStatusError, Exception) as e: - # handle_api_error raises ToolError → MCP response gets isError: true - await handle_api_error( - ctx, e, "fetch artifacts", method=_TOOL_NAME, - recovery_hints={ - 404: ( - "(1) verify the identifiers came from a recent semantic_search, grep_search, or codebase_search call (do not invent them), " - "(2) re-run semantic_search or grep_search to get fresh identifiers — the index may have changed, " - "(3) for local repos in your working directory, use Read() on the file path instead" - ), - 409: ( - "(1) the identifier exists in more than one data source — see the candidate data sources in the Detail above; each one will resolve, " - "(2) retry fetch_artifacts with data_source set to one candidate's Name or Id; if that data source isn't the one you want, retry with the next candidate, " - "(3) do NOT invent a result — pick from the listed data sources" - ), - }, - ) - - -def _add_line_numbers(content: str, start_line: int = 1) -> str: - """Add line numbers to content for easier navigation. - - Returns content with each line prefixed by its line number, - right-aligned and separated by ' | '. - - Args: - content: The text content to number. - start_line: 1-based line number for the first line (default 1). - """ - if not content: - return content - - lines = content.split("\n") - width = len(str(start_line + len(lines) - 1)) - numbered = [f"{start_line + i:>{width}} | {line}" for i, line in enumerate(lines)] - return "\n".join(numbered) - - -def _escape_attr(value: str) -> str: - """Escape a value for safe inclusion in an XML attribute (identifiers). - - Identifiers are caller-supplied — and in the MCP setting the "caller" is an - untrusted LLM/user — and they are reflected straight back into the model's context - (especially in the block, which echoes any unmatched requested string via - the backstop). An un-escaped quote or angle bracket would let a crafted identifier break - out of the attribute and inject pseudo-XML. Mirrors the C# wrapper's - XmlToolResultFormatter.EscapeAttr. Source-code *content* is intentionally NOT escaped - (see ); this helper is for attribute values only. - """ - return ( - value.replace("&", "&") - .replace("<", "<") - .replace(">", ">") - .replace('"', """) - ) - - -def _build_artifacts_xml( - data: dict, - data_source: str | None = None, - requested: list[str] | None = None, -) -> str: - """Build XML representation of fetched artifacts. - - Backend DTO: Identifier (string), Found (bool), Content (string?), - ContentByteSize (long?), Relationships (object?). - - A requested identifier that the backend could not resolve — or that points outside - the caller's access scope — comes back with ``found: false`` (older backends omit - the flag and return ``content: null``). Such identifiers are NOT dropped silently: - they are collected into a ```` block listing each concrete - identifier, followed by ``_NOT_FOUND_HINT`` telling the agent to re-check the ids and - retry the problematic ones — otherwise the user silently loses a requested artifact. - A ``found: true`` artifact with empty content is still rendered as a normal - ```` (it was located; it just has no extractable body). - - Content is emitted raw (no HTML escaping) and wrapped between newlines so the - LLM sees the source code exactly as-is. - - When ``data_source`` was supplied and nothing was found, an additional recovery hint - suggests the identifier may live in a different data source, or the selector is wrong. - ``requested`` is the original identifier list; it backstops the diff so an id the - backend never echoed back is still surfaced as not-found. - """ - xml_parts = [""] - - has_any_relationships = False - emitted = 0 - returned_identifiers: set[str] = set() - not_found: list[str] = [] - artifacts = data.get("artifacts", []) - for artifact in artifacts: - identifier = artifact.get("identifier", "") - if identifier: - returned_identifiers.add(identifier) - - content = artifact.get("content") - # Prefer the backend's explicit `found` flag; fall back to content-is-null for - # older backends that don't emit it yet. - found = artifact.get("found") - is_missing = (found is False) if found is not None else (content is None) - if is_missing: - if identifier: - not_found.append(identifier) - continue - - emitted += 1 - content_byte_size = artifact.get("contentByteSize") - - attrs = [f'identifier="{_escape_attr(identifier)}"'] - if content_byte_size is not None: - attrs.append(f'contentByteSize="{content_byte_size}"') - - start_line = artifact.get("startLine") or 1 - numbered_content = _add_line_numbers(content or "", start_line) - - xml_parts.append(f' ') - xml_parts.append(' ') - xml_parts.append(numbered_content) - xml_parts.append(' ') - - relationships = artifact.get("relationships") - if relationships is not None: - relationships_xml = _build_relationships_xml(relationships) - if relationships_xml: - xml_parts.append(relationships_xml) - if _has_any_calls(relationships): - has_any_relationships = True - - xml_parts.append(' ') - - # Backstop: any requested identifier the backend never echoed back is also missing. - if requested: - for identifier in requested: - if identifier not in returned_identifiers and identifier not in not_found: - not_found.append(identifier) - - if has_any_relationships: - xml_parts.append( - ' The above are a preview (up to 3 calls per ' - 'direction). To retrieve the full list, or to explore other relationship ' - 'types (inheritance, references), call `get_artifact_relationships` with ' - 'an artifact identifier.' - ) - - if not_found: - xml_parts.append(f' ') - for identifier in not_found: - xml_parts.append(f' ') - xml_parts.append(' ') - xml_parts.append(f' {_NOT_FOUND_HINT.format(count=len(not_found))}') - - if emitted == 0 and data_source: - xml_parts.append( - f' No artifacts were found in data source "{_escape_attr(data_source)}". The identifier may ' - 'belong to a different data source, or the data_source value may be wrong. Try: ' - '(1) re-run fetch_artifacts with data_source set to a different candidate (use the ' - '`dataSource` name or id from your search results, or call get_data_sources), or ' - '(2) omit data_source entirely — if the identifier is ambiguous you then get a 409 ' - 'that lists the candidate data sources to choose from.' - ) - - xml_parts.append("") - return "\n".join(xml_parts) - - -def _has_any_calls(relationships: dict) -> bool: - """Return True if relationships contain at least one outgoing or incoming call.""" - for rel_type in ("outgoingCalls", "incomingCalls"): - count = relationships.get(f"{rel_type}Count") - if count and count > 0: - return True - return False - - -def _build_relationships_xml(relationships: dict) -> str | None: - """Build XML for artifact call relationships. - - Returns None if no relationship types are present. - Identifiers and summaries are emitted raw (no HTML escaping). - """ - parts = [] - - for rel_type in ("outgoingCalls", "incomingCalls"): - tag = "outgoing_calls" if rel_type == "outgoingCalls" else "incoming_calls" - count = relationships.get(f"{rel_type}Count") - calls = relationships.get(rel_type) - - if count is None: - continue - - call_elements = [] - if calls: - for call in calls: - call_id = call.get("identifier") or "" - summary = call.get("summary") - if summary is not None: - call_elements.append( - f' ' - ) - else: - call_elements.append(f' ') - - parts.append(f' <{tag} count="{count}">') - parts.extend(call_elements) - parts.append(f' ') - - if not parts: - return None - - return " \n" + "\n".join(parts) + "\n " + """Fetch full artifact content for identifiers returned by search tools.""" + normalized = normalize_optional_list(identifiers) + if not normalized: + raise ToolError("[fetch_artifacts] identifiers is required.") + if len(normalized) > 50: + raise ToolError("[fetch_artifacts] Maximum 50 identifiers per request.") + + return await call_tool_api(ctx, "fetch_artifacts", { + "identifiers": normalized, + "data_source": data_source, + }, action_label="fetch artifacts") diff --git a/src/tools/repository.py b/src/tools/repository.py new file mode 100644 index 0000000..4ac707f --- /dev/null +++ b/src/tools/repository.py @@ -0,0 +1,49 @@ +"""Tool API v3 repository context tools.""" + +from typing import Optional + +from fastmcp import Context + +from .tool_api import call_tool_api, require_text + + +async def get_repository_ontology(ctx: Context, data_source: Optional[str] = None) -> str: + """Return ontology and orientation context for exactly one repository.""" + return await call_tool_api(ctx, "get_repository_ontology", { + "data_source": data_source, + }, action_label="get repository ontology") + + +async def get_file_tree( + ctx: Context, + data_source: Optional[str] = None, + path: Optional[str] = None, + max_depth: Optional[int] = None, + max_nodes: Optional[int] = None, + output_depth: Optional[int] = None, +) -> str: + """Return a bounded file tree for exactly one repository.""" + return await call_tool_api(ctx, "get_file_tree", { + "data_source": data_source, + "path": path, + "max_depth": max_depth, + "max_nodes": max_nodes, + "output_depth": output_depth, + }, action_label="get file tree") + + +async def read_file( + ctx: Context, + path: str, + data_source: Optional[str] = None, + start_line: Optional[int] = None, + end_line: Optional[int] = None, +) -> str: + """Read a safe repository-relative file path from exactly one repository.""" + require_text(path, "read_file", "path") + return await call_tool_api(ctx, "read_file", { + "data_source": data_source, + "path": path, + "start_line": start_line, + "end_line": end_line, + }, action_label="read file") diff --git a/src/tools/search.py b/src/tools/search.py index bdbef36..aa1786e 100644 --- a/src/tools/search.py +++ b/src/tools/search.py @@ -1,434 +1,66 @@ -"""Search tool implementations.""" +"""Tool API v3 search tools.""" -import json -from typing import Any, Dict, List, Optional, Sequence, Union -from urllib.parse import urljoin +from typing import Optional, Union -import httpx from fastmcp import Context from fastmcp.exceptions import ToolError -from core import CodeAliveContext, get_api_key_from_context, log_api_request, log_api_response -from utils import ( - handle_api_error, - normalize_data_source_names, - transform_grep_response, - transform_search_response, -) - - -def _normalize_optional_list(value: Optional[Union[str, List[str]]]) -> List[str]: - """Normalize optional string-or-list inputs while preserving ordering. - - Handles stringified JSON arrays (e.g. ``'[".cs",".py"]'``) that some MCP - clients send instead of native arrays. - """ - if value is None: - return [] - if isinstance(value, str): - stripped = value.strip() - if not stripped: - return [] - if stripped.startswith("["): - try: - parsed = json.loads(stripped) - if isinstance(parsed, list): - return [str(item) for item in parsed if item] - except (json.JSONDecodeError, TypeError): - pass - return [stripped] - return [item for item in value if item] - - -def _validate_query(query: str, tool_name: str) -> None: - """Raise ToolError if query is empty.""" - if not query or not query.strip(): - raise ToolError( - f"[{tool_name}] Query cannot be empty. Please provide a search term, " - "pattern, function name, or description of the code you're looking for." - ) +from .tool_api import call_tool_api, normalize_optional_list, require_text def _validate_max_results(max_results: Optional[int], tool_name: str) -> None: - """Raise ToolError if max_results is out of range.""" if max_results is not None and not (1 <= max_results <= 500): raise ToolError(f"[{tool_name}] max_results must be between 1 and 500.") -async def _perform_search_request( - ctx: Context, - *, - tool_name: str, - endpoint: str, - params: List[tuple[str, str]], - transform_response, - action_label: str, -) -> Dict[str, Any]: - context: CodeAliveContext = ctx.request_context.lifespan_context - api_key = get_api_key_from_context(ctx) - - headers = { - "Authorization": f"Bearer {api_key}", - "X-CodeAlive-Integration": "mcp", - "X-CodeAlive-Tool": tool_name, - "X-CodeAlive-Client": "fastmcp", - } - - full_url = urljoin(context.base_url, endpoint) - request_id = log_api_request("GET", full_url, headers, params=params) - - try: - response = await context.client.get(endpoint, params=params, headers=headers) - log_api_response(response, request_id) - response.raise_for_status() - return transform_response(response.json()) - except (httpx.HTTPStatusError, Exception) as e: - # handle_api_error raises ToolError → MCP response gets isError: true - await handle_api_error( - ctx, - e, - action_label, - method=tool_name, - recovery_hints={ - 404: ( - "(1) call get_data_sources to list available data source names, " - "(2) check spelling and case of the names you passed in data_sources, " - "(3) drop data_sources entirely to fall back to the API key's default" - ), - }, - ) - - -def _build_scope_params( - *, - query: str, - data_sources: Sequence[str], - paths: Sequence[str], - extensions: Sequence[str], - max_results: Optional[int], -) -> List[tuple[str, str]]: - params: List[tuple[str, str]] = [("Query", query)] - - if max_results is not None: - params.append(("MaxResults", str(max_results))) - - for data_source in data_sources: - params.append(("Names", data_source)) - for path in paths: - params.append(("Paths", path)) - for extension in extensions: - params.append(("Extensions", extension)) - - return params - - async def semantic_search( ctx: Context, - query: str, - data_sources: Optional[Union[str, List[str]]] = None, - paths: Optional[Union[str, List[str]]] = None, - extensions: Optional[Union[str, List[str]]] = None, + question: str, + data_sources: Optional[Union[str, list[str]]] = None, + paths: Optional[Union[str, list[str]]] = None, + extensions: Optional[Union[str, list[str]]] = None, max_results: Optional[int] = None, -) -> Dict[str, Any]: - """ - Search indexed code by meaning — the default discovery tool. - - Finds code by WHAT it does, not by exact text it contains. Start here - when you can describe the behavior or concept you're looking for but - don't know (or aren't sure of) the exact names in the codebase. - - **When to use semantic_search (default):** - - Exploring concepts: "authentication middleware", "retry logic" - - Describing behavior: "database connection pooling", "JWT validation" - - Architecture questions: "request handling pipeline", "event processing" - - You don't know the exact naming convention used in the codebase - - **When to use `grep_search` instead:** - - You know an exact identifier: class, function, or variable name - - Looking for a literal string: error message, URL, config key, file path - - Finding ALL usages of a known symbol: `RepositoryDeleted`, `handlePayment` - - Searching for import paths, TODO comments, or regex patterns - - Args: - query: Natural-language description of what you're looking for. - Example: "authentication middleware", "database connection pooling", - "JWT token validation" - - data_sources: Repository or workspace names to search. - Omit to use the API key's default data source. - Call `get_data_sources` first to discover available names. - Example: ["backend", "workspace:payments-team"] - - paths: Restrict results to specific directory paths. - Example: ["src/services", "src/domain"] - - extensions: Restrict results to specific file extensions. - Example: [".cs", ".py", ".ts"] + exclude_markdown: bool = False, +) -> str: + """Search indexed code by meaning using Tool API v3. - max_results: Maximum number of results to return (1–500). - Omit for the server default. - - Returns: - {"results": [...], "hint": "..."} - - Each result contains: - - path: file path within the repository - - identifier: fully qualified artifact ID — pass this to `fetch_artifacts` - - kind: "File", "Symbol", or "Chunk" - - description: short triage summary (NOT the real source — see hint) - - startLine/endLine: line range (for symbols) - - contentByteSize: file size in bytes - - The `hint` field reminds you to load real source code via - `fetch_artifacts(identifier)` or local `Read(path)` before reasoning - about the code. - - Examples: - 1. Find authentication code: - semantic_search(query="authentication middleware", - data_sources=["backend"]) - - 2. Narrow to Python files in a specific directory: - semantic_search(query="database retry logic", - data_sources=["backend"], - paths=["src/services"], - extensions=[".py"]) + `question` must be a natural-language English sentence. Use `data_sources` + names returned by `get_data_sources`; use `id` only for automation or + disambiguation. Returns backend-rendered agentic output directly. """ tool_name = "semantic_search" - _validate_query(query, tool_name) + require_text(question, tool_name, "question") _validate_max_results(max_results, tool_name) - - data_source_names = normalize_data_source_names(data_sources) - normalized_paths = _normalize_optional_list(paths) - normalized_extensions = _normalize_optional_list(extensions) - - if data_source_names: - await ctx.info( - f"Semantic search for '{query}' across {len(data_source_names)} data source(s)" - ) - else: - await ctx.info( - f"Semantic search for '{query}' using the API key's default data source" - ) - - params = _build_scope_params( - query=query, - data_sources=data_source_names, - paths=normalized_paths, - extensions=normalized_extensions, - max_results=max_results, - ) - - return await _perform_search_request( - ctx, - tool_name=tool_name, - endpoint="/api/search/semantic", - params=params, - transform_response=transform_search_response, - action_label="semantic search", - ) + return await call_tool_api(ctx, tool_name, { + "question": question, + "data_sources": normalize_optional_list(data_sources), + "paths": normalize_optional_list(paths), + "extensions": normalize_optional_list(extensions), + "max_results": max_results, + "exclude_markdown": exclude_markdown, + }, action_label="semantic search") async def grep_search( ctx: Context, query: str, - data_sources: Optional[Union[str, List[str]]] = None, - paths: Optional[Union[str, List[str]]] = None, - extensions: Optional[Union[str, List[str]]] = None, + data_sources: Optional[Union[str, list[str]]] = None, + paths: Optional[Union[str, list[str]]] = None, + extensions: Optional[Union[str, list[str]]] = None, max_results: Optional[int] = None, + exclude_markdown: bool = False, regex: bool = False, -) -> Dict[str, Any]: - """ - Search indexed code by exact text or regex — matches file content - and, for literal queries, also file names/paths. - - Use this when you know WHAT TEXT to look for: an identifier, an error - message, a config key, or a file whose name you know (even if nothing - inside the file references that name — 1C `Form.xml`, `.mdo`, config - XML, media files, etc.). - - **When to use grep_search:** - - Specific identifiers: class/function/variable names, domain events - (e.g. `RepositoryDeleted`, `handlePayment`, `AUTH_PROVIDERS`) - - Literal strings: error messages, URLs, config keys, file paths - - File names whose content may never contain their own name - (e.g. `Form.xml`, `schema.graphql`, `appsettings.json`) - - Import paths, TODO/FIXME comments, annotations - - Regex patterns: `def test_.*async`, `Status\\.(Alive|Failed)` - - Finding ALL occurrences of a known symbol across the codebase - - **When to use `semantic_search` instead:** - - You're exploring a concept or behavior ("how does auth work?") - - You don't know the exact naming convention in the codebase - - You want code that DOES something, not code that CONTAINS a string - - Args: - query: Exact text or regex pattern to match. - Literal examples: "ConnectionString", "TODO: fix", "import numpy" - Regex examples: "def test_.*async", "Status\\.(Alive|Failed)" - - data_sources: Repository or workspace names to search. - Omit to use the API key's default data source. - Call `get_data_sources` first to discover available names. - - paths: Restrict results to specific directory paths. - Example: ["src/services"] - - extensions: Restrict results to specific file extensions. - Example: [".cs", ".py"] - - max_results: Maximum number of results to return (1–500). - - regex: If True, treat `query` as a regex pattern. Default: False (literal). - **Regex currently matches file content only** — file-name/path - matching is literal-substring only. This is a known limitation. - - Returns: - {"results": [...], "hint": "..."} - - Each result contains: - - path: file path - - identifier: pass to `fetch_artifacts` for full source - - matchCount: total matches in this file (0 for file-name-only hits) - - matches: array of line-level hits, each with: - - lineNumber, startColumn, endColumn, lineText - - matchedByName: present and `true` only when the artifact matched - by its file name/path and has no content match. In that case - `matches` is empty and `location.line` defaults to 1 as a - file-level reference — do NOT interpret `location.line` as an - actual line match. Content-match results omit this field. - - The `hint` reminds you that line previews are evidence only — load - full source via `fetch_artifacts` or local `Read()` before reasoning. - - Examples: - 1. Find exact string: - grep_search(query="ConnectionString", - data_sources=["backend"]) - - 2. Find a file by name (returns the file even if nothing inside - it references `Form.xml`): - grep_search(query="Form.xml", - data_sources=["biterp-bsl"]) - - 3. Regex search for test methods (content only): - grep_search(query="def test_.*auth", - data_sources=["backend"], - extensions=[".py"], - regex=True) - """ +) -> str: + """Search indexed code by exact literal text or regex using Tool API v3.""" tool_name = "grep_search" - _validate_query(query, tool_name) + require_text(query, tool_name, "query") _validate_max_results(max_results, tool_name) - - data_source_names = normalize_data_source_names(data_sources) - normalized_paths = _normalize_optional_list(paths) - normalized_extensions = _normalize_optional_list(extensions) - - search_kind = "regex grep" if regex else "literal grep" - if data_source_names: - await ctx.info( - f"{search_kind.capitalize()} for '{query}' across {len(data_source_names)} data source(s)" - ) - else: - await ctx.info( - f"{search_kind.capitalize()} for '{query}' using the API key's default data source" - ) - - params = _build_scope_params( - query=query, - data_sources=data_source_names, - paths=normalized_paths, - extensions=normalized_extensions, - max_results=max_results, - ) - params.append(("Regex", "true" if regex else "false")) - - return await _perform_search_request( - ctx, - tool_name=tool_name, - endpoint="/api/search/grep", - params=params, - transform_response=transform_grep_response, - action_label="grep search", - ) - - -async def codebase_search( - ctx: Context, - query: str, - data_sources: Optional[Union[str, List[str]]] = None, - mode: str = "auto", - description_detail: str = "short", -) -> Dict[str, Any]: - """ - Deprecated legacy semantic search tool. - - Prefer `semantic_search` for new integrations. This compatibility alias keeps the - previous MCP contract and forwards to the legacy backend endpoint unchanged. - """ - tool_name = "codebase_search" - _validate_query(query, tool_name) - - context: CodeAliveContext = ctx.request_context.lifespan_context - data_source_names = normalize_data_source_names(data_sources) - - normalized_mode = mode.lower() if mode else "auto" - if normalized_mode not in ["auto", "fast", "deep"]: - await ctx.warning( - f"[{tool_name}] Invalid search mode: {mode}. " - "Valid modes are 'auto', 'fast', and 'deep'. Using 'auto' instead." - ) - normalized_mode = "auto" - - detail_map = {"short": "Short", "full": "Full"} - normalized_detail = detail_map.get((description_detail or "short").lower(), "Short") - - if data_source_names: - await ctx.info( - f"Legacy codebase_search for '{query}' across {len(data_source_names)} data source(s)" - ) - else: - await ctx.info( - f"Legacy codebase_search for '{query}' using the API key's default data source" - ) - - params = [ - ("Query", query), - ("Mode", normalized_mode), - ("IncludeContent", "false"), - ("DescriptionDetail", normalized_detail), - ] - for data_source in data_source_names: - params.append(("Names", data_source)) - - api_key = get_api_key_from_context(ctx) - headers = { - "Authorization": f"Bearer {api_key}", - "X-CodeAlive-Integration": "mcp", - "X-CodeAlive-Tool": tool_name, - "X-CodeAlive-Client": "fastmcp", - } - - full_url = urljoin(context.base_url, "/api/search") - request_id = log_api_request("GET", full_url, headers, params=params) - - try: - response = await context.client.get("/api/search", params=params, headers=headers) - log_api_response(response, request_id) - response.raise_for_status() - return transform_search_response(response.json()) - except (httpx.HTTPStatusError, Exception) as e: - await handle_api_error( - ctx, - e, - "code search", - method=tool_name, - recovery_hints={ - 404: ( - "(1) call get_data_sources to list available data source names, " - "(2) check spelling and case of the names you passed in data_sources, " - "(3) drop data_sources entirely to fall back to the API key's default" - ), - }, - ) + return await call_tool_api(ctx, tool_name, { + "query": query, + "data_sources": normalize_optional_list(data_sources), + "paths": normalize_optional_list(paths), + "extensions": normalize_optional_list(extensions), + "max_results": max_results, + "exclude_markdown": exclude_markdown, + "regex": regex, + }, action_label="grep search") diff --git a/src/tools/tool_api.py b/src/tools/tool_api.py new file mode 100644 index 0000000..23d629f --- /dev/null +++ b/src/tools/tool_api.py @@ -0,0 +1,91 @@ +"""Shared v3 Tool API caller for MCP tools.""" + +import json +from typing import Any, Iterable, Optional, Union +from urllib.parse import urljoin + +import httpx +from fastmcp import Context +from fastmcp.exceptions import ToolError + +from core import CodeAliveContext, get_api_key_from_context, log_api_request, log_api_response +from utils import handle_api_error + + +def normalize_optional_list(value: Optional[Union[str, list[str]]]) -> list[str]: + if value is None: + return [] + if isinstance(value, str): + stripped = value.strip() + if not stripped: + return [] + if stripped.startswith("["): + try: + parsed = json.loads(stripped) + if isinstance(parsed, list): + return [str(item) for item in parsed if str(item).strip()] + except (json.JSONDecodeError, TypeError): + pass + return [stripped] + return [str(item) for item in value if str(item).strip()] + + +def require_text(value: str, tool_name: str, field: str) -> None: + if not value or not value.strip(): + raise ToolError(f"[{tool_name}] {field} is required.") + + +def omit_empty(payload: dict[str, Any]) -> dict[str, Any]: + return { + key: value + for key, value in payload.items() + if value is not None and value != [] and value != "" + } + + +async def call_tool_api( + ctx: Context, + tool_name: str, + payload: dict[str, Any], + *, + action_label: Optional[str] = None, +) -> str: + context: CodeAliveContext = ctx.request_context.lifespan_context + api_key = get_api_key_from_context(ctx) + body = {**omit_empty(payload), "output_format": "agentic"} + headers = { + "Authorization": f"Bearer {api_key}", + "X-CodeAlive-Integration": "mcp", + "X-CodeAlive-Tool": tool_name, + "X-CodeAlive-Client": "fastmcp-v3", + } + + endpoint = f"/api/tools/{tool_name}" + full_url = urljoin(context.base_url, endpoint) + request_id = log_api_request("POST", full_url, headers, body=body) + + try: + response = await context.client.post(endpoint, json=body, headers=headers) + log_api_response(response, request_id) + response.raise_for_status() + data = response.json() + rendered = data.get("rendered") + if isinstance(rendered, str): + return rendered + obj = data.get("obj") + return json.dumps(obj, ensure_ascii=False, indent=2) + except (httpx.HTTPStatusError, Exception) as exc: + await handle_api_error( + ctx, + exc, + action_label or tool_name, + method=tool_name, + recovery_hints={ + 404: ( + "(1) verify the tool name is a Tool API v3 tool, " + "(2) call get_data_sources to choose visible data sources, " + "(3) retry with canonical snake_case arguments" + ), + }, + ) + raise AssertionError("handle_api_error always raises") diff --git a/src/utils/errors.py b/src/utils/errors.py index 22b662a..2241c18 100644 --- a/src/utils/errors.py +++ b/src/utils/errors.py @@ -11,8 +11,8 @@ Per-tool callers can override the default ``Try:`` text via ``recovery_hints`` when a generic hint isn't actionable enough — e.g. a 404 from ``semantic_search`` -should suggest ``get_data_sources``, while a 404 from ``chat`` (or legacy -``codebase_consultant``) should suggest checking ``conversation_id``. +should suggest ``get_data_sources``, while a 404 from artifact tools should +suggest reusing identifiers returned by v3 search/fetch/read tools. """ import json @@ -102,8 +102,7 @@ class _ErrorTemplate: retry_window=None, default_hint=( "(1) inspect the field-level errors below and fix the offending parameter, " - "(2) for conversation_id / message_id, ensure the value is a 24-character hex " - "Mongo ObjectId taken from a previous response, " + "(2) verify that the request uses canonical Tool API v3 snake_case fields, " "(3) if no field errors are surfaced, re-read the tool docstring and verify " "the request shape matches" ), @@ -135,7 +134,7 @@ class _ErrorTemplate: default_hint=( "(1) call get_data_sources to see available data source names, " "(2) check spelling and case, " - "(3) verify any identifiers were returned by a recent semantic_search, grep_search, or codebase_search" + "(3) verify any identifiers were returned by a recent semantic_search, grep_search, read_file, or fetch_artifacts" ), ), 409: _ErrorTemplate( @@ -155,7 +154,7 @@ class _ErrorTemplate: retry_window="wait 1–5 minutes and retry", default_hint=( "(1) wait for indexing to complete before retrying, " - "(2) call get_data_sources(alive_only=false) to check the processing state, " + "(2) call get_data_sources(ready_only=false) to check the processing state, " "(3) try a different data source if available" ), ), @@ -240,8 +239,7 @@ async def handle_api_error( are easy to attribute. recovery_hints: Optional per-tool overrides for the ``Try: ...`` text, keyed by HTTP status code. Use this when a generic hint isn't - enough — e.g. ``chat`` overrides 404 with - ``"check the conversation_id"``. + enough for a specific tool. Raises: ToolError: Always raised — sets ``isError: true`` in the MCP response From f1c422a772cd5ecfbc0b43b14831aab235e6aef7 Mon Sep 17 00:00:00 2001 From: Rodion Mostovoi <36400912+rodion-m@users.noreply.github.com> Date: Wed, 1 Jul 2026 16:31:55 +0500 Subject: [PATCH 2/3] Remove legacy MCP alias references --- CLAUDE.md | 16 +++++++++------- integration_test.py | 17 ----------------- smoke_test.py | 37 +++++-------------------------------- 3 files changed, 14 insertions(+), 56 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 0c1cc7b..f4b561f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -95,7 +95,7 @@ This is a Model Context Protocol (MCP) server that provides AI clients with acce ### Core Components - **`codealive_mcp_server.py`**: Main entry point — bootstraps logging, tracing, registers tools and middleware -- **Eight tools**: `get_data_sources`, `semantic_search`, `grep_search`, `fetch_artifacts`, `get_artifact_relationships`, `chat`, `codebase_search`, `codebase_consultant` +- **Eleven tools**: `get_data_sources`, `semantic_search`, `grep_search`, `get_repository_ontology`, `get_file_tree`, `read_file`, `fetch_artifacts`, `get_artifact_relationships`, `get_artifact_query_schema`, `query_artifact_metadata`, `chat` - **`core/client.py`**: `CodeAliveContext` dataclass + `codealive_lifespan` (httpx.AsyncClient lifecycle, `_server_ready` flag) - **`core/logging.py`**: loguru structured JSON logging + PII masking + OTel context injection - **`core/observability.py`**: OpenTelemetry TracerProvider setup with OTLP export @@ -106,7 +106,7 @@ This is a Model Context Protocol (MCP) server that provides AI clients with acce 1. **FastMCP Framework**: Uses FastMCP 3.x with lifespan context, middleware hooks, and built-in `Client` for testing 2. **HTTP Auth via `get_http_headers`**: FastMCP 3.x strips the `authorization` header by default (to prevent accidental credential forwarding to downstream services). Our `get_api_key_from_context()` in `core/client.py` must use `get_http_headers(include={"authorization"})` to read Bearer tokens from HTTP/streamable-http clients. **Do not remove the `include=` parameter** — without it, all HTTP-transport clients (LibreChat, n8n, etc.) will fail with a misleading STDIO-mode error. 3. **HTTP Client Management**: Single persistent `httpx.AsyncClient` with connection pooling, created in lifespan -3. **Streaming Support**: `chat` and the deprecated `codebase_consultant` alias use SSE streaming (`response.aiter_lines()`) for chat completions +3. **Tool API v3 Backend Contract**: every MCP tool delegates to `POST /api/tools/{name}` and requests `output_format=agentic` 4. **Environment Configuration**: Supports both .env files and command-line arguments with precedence 5. **Error Handling**: Centralized in `utils/errors.py` — all tools use `handle_api_error()` with `method=` prefix 6. **N8N Middleware**: Strips extra parameters (sessionId, action, chatInput, toolCallId) from n8n tool calls before validation @@ -158,7 +158,7 @@ This project uses **loguru** for structured JSON logging. All logs go to **stder 2. **All logs go to stderr.** The stdio MCP transport uses stdout for protocol messages. Any stray `print()` or stdout write will corrupt the MCP protocol and break the client. If you add a new log sink, it must target `sys.stderr`. -3. **Never call `response.text` without a debug guard.** `log_api_response()` is protected by `_is_debug_enabled()` because reading `response.text` consumes the response body. The `chat` tool and deprecated `codebase_consultant` alias stream SSE via `response.aiter_lines()` — calling `.text` first would silently consume the stream and produce empty results. If you add new response logging, always check `_is_debug_enabled()` first: +3. **Never call `response.text` without a debug guard.** `log_api_response()` is protected by `_is_debug_enabled()` because reading `response.text` consumes the response body. If you add new response logging, always check `_is_debug_enabled()` first: ```python if not _is_debug_enabled(): return # Do NOT touch response body at INFO level @@ -264,8 +264,10 @@ Tools that return **structured metadata** (identifiers, match counts, line numbers, relationship groups, data source listings) return a `dict` (or list of dicts). FastMCP serializes it automatically via `pydantic_core.to_json`, which preserves Unicode — no manual `json.dumps()` needed. Examples: -`semantic_search`, `grep_search`, `codebase_search`, `get_data_sources`, -`get_artifact_relationships`. +`semantic_search`, `grep_search`, `get_data_sources`, +`get_repository_ontology`, `get_file_tree`, `read_file`, +`get_artifact_relationships`, `get_artifact_query_schema`, and +`query_artifact_metadata`. **Never call `json.dumps(...)` from a tool's return path.** Python's `json.dumps` defaults to `ensure_ascii=True` and escapes Cyrillic/CJK/etc. to `\uXXXX`. @@ -289,7 +291,7 @@ description alone — descriptions are not always re-read mid-conversation, but the response is always in front of the model when it decides what to do next. Examples in this repo: -- `codebase_search` returns a `hint` field telling the agent that `description` +- `semantic_search` and `grep_search` return a `hint` field telling the agent that `description` is a triage pointer only and that real understanding must come from `fetch_artifacts(identifier)` or a local `Read(path)`. Implementation: `_SEARCH_HINT` in `src/utils/response_transformer.py`. @@ -352,7 +354,7 @@ Key points: - Custom lifespan yields a real `CodeAliveContext` with a mock-backed httpx client - `monkeypatch.setenv("CODEALIVE_API_KEY", ...)` for `get_api_key_from_context` fallback - Use `raise_on_error=False` when testing error paths, then assert on `result.content[0].text` -- For SSE streaming (`chat` / `codebase_consultant`), return `httpx.Response(200, text=sse_body)` — `aiter_lines()` works on buffered responses +- For chat-style buffered responses, return `httpx.Response(200, json=payload)` and assert against the Tool API v3 envelope content ### Unit Test Patterns diff --git a/integration_test.py b/integration_test.py index b2a5962..a0e1034 100644 --- a/integration_test.py +++ b/integration_test.py @@ -556,23 +556,6 @@ async def test_agent_workflow(s: ClientSession, target: str) -> None: len(text) > 100 and not r.isError, f"len={len(text)}") - # 5. deprecated aliases - r = await s.call_tool("codebase_consultant", { - "question": "What testing patterns are used?", - "data_sources": [target], - }) - record("workflow: codebase_consultant (deprecated)", - len(r.content[0].text) > 50 and not r.isError, - f"len={len(r.content[0].text)}") - - r = await s.call_tool("codebase_search", { - "query": "error handling", - "data_sources": [target], - }) - record("workflow: codebase_search (deprecated)", - not r.isError, - f"len={len(r.content[0].text)}") - # ── Main ───────────────────────────────────────────────────────────────────── diff --git a/smoke_test.py b/smoke_test.py index ebcb133..1b170a1 100644 --- a/smoke_test.py +++ b/smoke_test.py @@ -135,12 +135,15 @@ async def test_list_tools(self) -> bool: expected_tools = { "chat", - "codebase_consultant", - "codebase_search", "fetch_artifacts", "get_artifact_relationships", + "get_artifact_query_schema", "get_data_sources", + "get_file_tree", + "get_repository_ontology", "grep_search", + "query_artifact_metadata", + "read_file", "semantic_search", } actual_tools = {tool.name for tool in tools} @@ -244,35 +247,6 @@ async def test_chat(self) -> bool: self.print_error(f"Tool execution failed: {str(e)}") return False - async def test_codebase_consultant(self) -> bool: - """Test the codebase_consultant tool (deprecated alias).""" - self.print_test("codebase_consultant Tool (deprecated)") - try: - result = await self.session.call_tool("codebase_consultant", { - "question": "test question", - "data_sources": ["test-repo"] - }) - - if result.isError: - # Error is expected if no valid API key - error_str = str(result.content) - if "API key" in error_str or "data source" in error_str or "authorization" in error_str.lower(): - self.print_success("Tool responds correctly (API key/data source required)") - self.print_info("This is expected in smoke test without valid API key") - return True - else: - self.print_error(f"Unexpected error: {result.content}") - return False - - # If we have a valid API key and data source, check response - self.print_success("Tool executed successfully") - self.print_info(f"Response: {str(result.content)[:100]}...") - return True - - except Exception as e: - self.print_error(f"Tool execution failed: {str(e)}") - return False - async def test_parameter_validation(self) -> bool: """Test that tools validate parameters correctly.""" self.print_test("Parameter Validation") @@ -316,7 +290,6 @@ async def run_all_tests(self): await self.test_get_data_sources() await self.test_semantic_search() await self.test_chat() - await self.test_codebase_consultant() await self.test_parameter_validation() except Exception as e: From 13c2521f56dfdedf1abf372fb200d1ce63adb1c4 Mon Sep 17 00:00:00 2001 From: Rodion Mostovoi <36400912+rodion-m@users.noreply.github.com> Date: Wed, 1 Jul 2026 16:55:40 +0500 Subject: [PATCH 3/3] Use snake case relationship profiles --- src/tests/test_tool_api_v3.py | 4 ++-- src/tools/artifact_relationships.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tests/test_tool_api_v3.py b/src/tests/test_tool_api_v3.py index d3674a1..676f417 100644 --- a/src/tests/test_tool_api_v3.py +++ b/src/tests/test_tool_api_v3.py @@ -99,14 +99,14 @@ def _context_with_response(rendered: str = "ok"): lambda ctx: get_artifact_relationships( ctx, identifier="repo::src/Foo.cs::Foo", - profile="AllRelevant", + profile="all_relevant", max_count_per_type=25, data_source="backend", ), "/api/tools/get_artifact_relationships", { "identifier": "repo::src/Foo.cs::Foo", - "profile": "AllRelevant", + "profile": "all_relevant", "max_count_per_type": 25, "data_source": "backend", }, diff --git a/src/tools/artifact_relationships.py b/src/tools/artifact_relationships.py index ab764dd..2f5e14f 100644 --- a/src/tools/artifact_relationships.py +++ b/src/tools/artifact_relationships.py @@ -11,7 +11,7 @@ async def get_artifact_relationships( ctx: Context, identifier: str, - profile: Literal["CallsOnly", "InheritanceOnly", "AllRelevant", "ReferencesOnly"] = "CallsOnly", + profile: Literal["calls_only", "inheritance_only", "all_relevant", "references_only"] = "calls_only", max_count_per_type: int = 50, data_source: Optional[str] = None, ) -> str: