diff --git a/README.md b/README.md index 42babc1..023ce6a 100644 --- a/README.md +++ b/README.md @@ -133,13 +133,13 @@ Example usage through MCP: ### Search Functionality -The server provides a flexible search tool that can search across one or all indices: +The server provides a flexible search tool that can search across one or all indices with support for hybrid search (combining keyword and semantic search): - `search`: Search through Meilisearch indices with optional parameters Example usage through MCP: ```json -// Search in a specific index +// Traditional keyword search in a specific index { "name": "search", "arguments": { @@ -149,6 +149,31 @@ Example usage through MCP: } } +// Hybrid search combining keyword and semantic search +{ + "name": "search", + "arguments": { + "query": "artificial intelligence", + "indexUid": "documents", + "hybrid": { + "semanticRatio": 0.7, + "embedder": "default" + }, + "limit": 20 + } +} + +// Semantic search with custom vector +{ + "name": "search", + "arguments": { + "query": "machine learning", + "indexUid": "articles", + "vector": [0.1, 0.2, 0.3, 0.4, 0.5], + "retrieveVectors": true + } +} + // Search across all indices { "name": "search", @@ -167,6 +192,13 @@ Available search parameters: - `offset`: Number of results to skip (optional, default: 0) - `filter`: Filter expression (optional) - `sort`: Sorting rules (optional) +- `hybrid`: Hybrid search configuration (optional) + - `semanticRatio`: Balance between keyword (0.0) and semantic (1.0) search (optional, default: 0.5) + - `embedder`: Name of the configured embedder to use (required when using hybrid) +- `vector`: Custom vector for semantic search (optional) +- `retrieveVectors`: Include vector data in search results (optional) + +**Note**: To use hybrid search features, you need to have an embedder configured in your Meilisearch index settings. Refer to the [Meilisearch documentation on vector search](https://www.meilisearch.com/docs/learn/vector_search/vector_search_basics) for configuration details. ### Running the Server @@ -210,7 +242,7 @@ npx @modelcontextprotocol/inspector python -m src.meilisearch_mcp - `add-documents`: Add or update documents in an index ### Search -- `search`: Flexible search across single or multiple indices with filtering and sorting options +- `search`: Flexible search across single or multiple indices with support for hybrid search (keyword + semantic), custom vectors, and filtering/sorting options ### Settings Management - `get-settings`: View current settings for an index diff --git a/pyproject.toml b/pyproject.toml index 5f2aac6..be2d53b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ version = "0.5.0" description = "MCP server for Meilisearch" requires-python = ">=3.10" dependencies = [ - "meilisearch>=0.33.0", + "meilisearch>=0.34.0", "mcp>=0.1.0", "httpx>=0.24.0", "pydantic>=2.0.0" diff --git a/src/meilisearch_mcp/client.py b/src/meilisearch_mcp/client.py index 7f175a4..bcd582b 100644 --- a/src/meilisearch_mcp/client.py +++ b/src/meilisearch_mcp/client.py @@ -52,6 +52,9 @@ def search( offset: Optional[int] = 0, filter: Optional[str] = None, sort: Optional[List[str]] = None, + hybrid: Optional[Dict[str, Any]] = None, + vector: Optional[List[float]] = None, + retrieve_vectors: Optional[bool] = None, **kwargs, ) -> Dict[str, Any]: """ @@ -70,6 +73,12 @@ def search( search_params["filter"] = filter if sort is not None: search_params["sort"] = sort + if hybrid is not None: + search_params["hybrid"] = hybrid + if vector is not None: + search_params["vector"] = vector + if retrieve_vectors is not None: + search_params["retrieveVectors"] = retrieve_vectors # Add any additional parameters search_params.update({k: v for k, v in kwargs.items() if v is not None}) diff --git a/src/meilisearch_mcp/server.py b/src/meilisearch_mcp/server.py index db6cca6..d835cef 100644 --- a/src/meilisearch_mcp/server.py +++ b/src/meilisearch_mcp/server.py @@ -211,7 +211,7 @@ async def handle_list_tools() -> list[types.Tool]: ), types.Tool( name="search", - description="Search through Meilisearch indices. If indexUid is not provided, it will search across all indices.", + description="Search through Meilisearch indices with support for hybrid search (combining keyword and semantic search). If indexUid is not provided, it will search across all indices.", inputSchema={ "type": "object", "properties": { @@ -224,6 +224,32 @@ async def handle_list_tools() -> list[types.Tool]: "type": "array", "items": {"type": "string"}, }, + "hybrid": { + "type": "object", + "properties": { + "semanticRatio": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Balance between keyword (0.0) and semantic (1.0) search", + }, + "embedder": { + "type": "string", + "description": "Name of the configured embedder to use", + }, + }, + "required": ["embedder"], + "additionalProperties": False, + }, + "vector": { + "type": "array", + "items": {"type": "number"}, + "description": "Custom vector for semantic search", + }, + "retrieveVectors": { + "type": "boolean", + "description": "Include vector data in search results", + }, }, "required": ["query"], "additionalProperties": False, @@ -498,6 +524,9 @@ async def handle_call_tool( offset=arguments.get("offset"), filter=arguments.get("filter"), sort=arguments.get("sort"), + hybrid=arguments.get("hybrid"), + vector=arguments.get("vector"), + retrieve_vectors=arguments.get("retrieveVectors"), ) # Format the results for better readability diff --git a/tests/test_mcp_client.py b/tests/test_mcp_client.py index 9e5e88a..2fdb418 100644 --- a/tests/test_mcp_client.py +++ b/tests/test_mcp_client.py @@ -612,6 +612,161 @@ async def test_delete_index_integration_workflow(self, mcp_server): assert "Error:" in search_after_text +class TestHybridSearch: + """Test hybrid search functionality with semantic search support""" + + async def test_search_tool_has_hybrid_parameters(self, mcp_server): + """Test that search tool schema includes hybrid search parameters""" + tools = await simulate_list_tools(mcp_server) + + # Find the search tool + search_tool = next((t for t in tools if t.name == "search"), None) + assert search_tool is not None, "Search tool not found" + + # Check that hybrid parameters are in the schema + properties = search_tool.inputSchema["properties"] + + # Check hybrid object parameter + assert "hybrid" in properties + hybrid_schema = properties["hybrid"] + assert hybrid_schema["type"] == "object" + assert "semanticRatio" in hybrid_schema["properties"] + assert "embedder" in hybrid_schema["properties"] + assert hybrid_schema["properties"]["semanticRatio"]["type"] == "number" + assert hybrid_schema["properties"]["semanticRatio"]["minimum"] == 0.0 + assert hybrid_schema["properties"]["semanticRatio"]["maximum"] == 1.0 + assert hybrid_schema["properties"]["embedder"]["type"] == "string" + assert hybrid_schema["required"] == ["embedder"] + + # Check vector parameter + assert "vector" in properties + vector_schema = properties["vector"] + assert vector_schema["type"] == "array" + assert vector_schema["items"]["type"] == "number" + + # Check retrieveVectors parameter + assert "retrieveVectors" in properties + assert properties["retrieveVectors"]["type"] == "boolean" + + @pytest.mark.skip(reason="Requires Meilisearch instance with embedders configured") + async def test_search_with_hybrid_parameters(self, mcp_server): + """Test that search accepts and processes hybrid search parameters""" + # Create test index + index_name = generate_unique_index_name("hybrid_search") + await simulate_mcp_call( + mcp_server, "create-index", {"uid": index_name, "primaryKey": "id"} + ) + + # Add test documents + documents = [ + { + "id": 1, + "title": "Python Programming", + "description": "Learn Python basics", + }, + {"id": 2, "title": "JavaScript Guide", "description": "Modern JS features"}, + {"id": 3, "title": "Machine Learning", "description": "AI and ML concepts"}, + ] + await simulate_mcp_call( + mcp_server, + "add-documents", + {"indexUid": index_name, "documents": documents}, + ) + await wait_for_indexing() + + # Note: This test simulates the API call structure but won't actually + # perform semantic search without a configured embedder in Meilisearch + + # Test search with hybrid parameters + search_params = { + "query": "programming", + "indexUid": index_name, + "hybrid": {"semanticRatio": 0.7, "embedder": "default"}, + "limit": 5, + } + + # The search should accept these parameters without error + response = await simulate_mcp_call(mcp_server, "search", search_params) + response_text = assert_text_content_response(response, "Search results") + + # Even if embedder is not configured, the API should handle the request + assert "Search results for 'programming'" in response_text + + # Cleanup + await simulate_mcp_call(mcp_server, "delete-index", {"uid": index_name}) + + @pytest.mark.skip(reason="Requires Meilisearch instance with embedders configured") + async def test_search_with_vector_parameter(self, mcp_server): + """Test that search accepts vector parameter""" + # Create test index + index_name = generate_unique_index_name("vector_search") + await simulate_mcp_call( + mcp_server, "create-index", {"uid": index_name, "primaryKey": "id"} + ) + + # Add test documents + documents = [{"id": 1, "content": "Test document"}] + await simulate_mcp_call( + mcp_server, + "add-documents", + {"indexUid": index_name, "documents": documents}, + ) + await wait_for_indexing() + + # Test search with vector parameter + search_params = { + "query": "test", + "indexUid": index_name, + "vector": [0.1, 0.2, 0.3, 0.4, 0.5], + "retrieveVectors": True, + } + + # The search should accept these parameters without error + response = await simulate_mcp_call(mcp_server, "search", search_params) + response_text = assert_text_content_response(response, "Search results") + assert "Search results for 'test'" in response_text + + # Cleanup + await simulate_mcp_call(mcp_server, "delete-index", {"uid": index_name}) + + @pytest.mark.skip(reason="Requires Meilisearch instance with embedders configured") + async def test_search_semantic_only(self, mcp_server): + """Test semantic-only search with semanticRatio=1.0""" + # Create test index + index_name = generate_unique_index_name("semantic_only") + await simulate_mcp_call( + mcp_server, "create-index", {"uid": index_name, "primaryKey": "id"} + ) + + # Add test documents + documents = [ + {"id": 1, "content": "Artificial intelligence and machine learning"} + ] + await simulate_mcp_call( + mcp_server, + "add-documents", + {"indexUid": index_name, "documents": documents}, + ) + await wait_for_indexing() + + # Test semantic-only search + search_params = { + "query": "AI ML", + "indexUid": index_name, + "hybrid": { + "semanticRatio": 1.0, # Pure semantic search + "embedder": "default", + }, + } + + response = await simulate_mcp_call(mcp_server, "search", search_params) + response_text = assert_text_content_response(response, "Search results") + assert "Search results for 'AI ML'" in response_text + + # Cleanup + await simulate_mcp_call(mcp_server, "delete-index", {"uid": index_name}) + + class TestIssue27OpenAISchemaCompatibility: """Test for issue #27 - Fix JSON schemas for OpenAI Agent SDK compatibility"""