|
511 | 511 | " # Format results\n",
|
512 | 512 | " formatted_results = []\n",
|
513 | 513 | " for hit in results[0]:\n",
|
| 514 | + " # For COSINE metric, Milvus returns similarity scores (higher = more similar)\n", |
| 515 | + " # No need to convert - use the score directly\n", |
| 516 | + " similarity_score = hit[\"distance\"] # This is actually similarity for COSINE\n", |
| 517 | + " \n", |
514 | 518 | " formatted_results.append({\n",
|
515 | 519 | " \"text\": hit[\"text\"],\n",
|
516 | 520 | " \"metadata\": json.loads(hit[\"metadata\"]),\n",
|
517 |
| - " \"score\": 1.0 - hit[\"distance\"], # Convert distance to similarity score for COSINE\n", |
| 521 | + " \"score\": similarity_score, # Use direct similarity score\n", |
518 | 522 | " \"id\": hit[\"id\"]\n",
|
519 | 523 | " })\n",
|
520 | 524 | " \n",
|
|
851 | 855 | "metadata": {},
|
852 | 856 | "outputs": [],
|
853 | 857 | "source": [
|
854 |
| - "# Test Sample Queries\n", |
855 |
| - "queries = [\n", |
856 |
| - " \"What is artificial intelligence?\",\n", |
857 |
| - " \"How does Milvus work as a vector database?\",\n", |
858 |
| - " \"Explain the RAG pipeline steps\",\n", |
859 |
| - " \"What are the differences between machine learning and deep learning?\"\n", |
860 |
| - "]\n", |
| 858 | + "# Debug Sample Queries - Test one specific query with detailed logging\n", |
| 859 | + "print(\"🔍 Debug: Testing specific query with detailed logging\\n\")\n", |
861 | 860 | "\n",
|
862 |
| - "print(\"Testing RAG Pipeline with sample queries...\\n\")\n", |
| 861 | + "# Test the Milvus query that should definitely work\n", |
| 862 | + "test_query = \"How does Milvus work as a vector database?\"\n", |
| 863 | + "print(f\"Query: {test_query}\")\n", |
| 864 | + "print(\"-\" * 50)\n", |
863 | 865 | "\n",
|
864 |
| - "for i, query in enumerate(queries, 1):\n", |
865 |
| - " print(f\"Query {i}: {query}\")\n", |
866 |
| - " print(\"-\" * 50)\n", |
867 |
| - " \n", |
868 |
| - " result = rag.query(query, top_k=3)\n", |
869 |
| - " \n", |
870 |
| - " print(f\"Answer: {result['answer']}\")\n", |
871 |
| - " print(f\"\\nRetrieved {result['num_retrieved']} documents:\")\n", |
872 |
| - " \n", |
873 |
| - " for j, doc in enumerate(result['retrieved_documents'], 1):\n", |
874 |
| - " print(f\" {j}. Score: {doc['score']:.4f}\")\n", |
875 |
| - " print(f\" Text: {doc['text'][:100]}...\")\n", |
876 |
| - " print(f\" Source: {doc['metadata'].get('source', 'Unknown')}\")\n", |
| 866 | + "# Get query embedding\n", |
| 867 | + "query_embedding = rag.embedding_generator.embed_text(test_query)\n", |
| 868 | + "print(f\"✅ Generated query embedding: shape {query_embedding.shape}\")\n", |
| 869 | + "\n", |
| 870 | + "# Test direct search on vector store\n", |
| 871 | + "print(\"🔍 Testing direct vector store search...\")\n", |
| 872 | + "try:\n", |
| 873 | + " search_results = rag.vector_store.search(query_embedding, top_k=5)\n", |
| 874 | + " print(f\"✅ Direct search returned {len(search_results)} results\")\n", |
877 | 875 | " \n",
|
878 |
| - " print(\"\\n\" + \"=\" * 80 + \"\\n\")" |
| 876 | + " if search_results:\n", |
| 877 | + " for i, result in enumerate(search_results):\n", |
| 878 | + " print(f\" Result {i+1}:\")\n", |
| 879 | + " print(f\" Score: {result.get('score', 'N/A')}\")\n", |
| 880 | + " print(f\" Text preview: {str(result.get('text', 'N/A'))[:100]}...\")\n", |
| 881 | + " print(f\" ID: {result.get('id', 'N/A')}\")\n", |
| 882 | + " else:\n", |
| 883 | + " print(\" ❌ No results from direct search\")\n", |
| 884 | + " \n", |
| 885 | + "except Exception as e:\n", |
| 886 | + " print(f\"❌ Direct search failed: {e}\")\n", |
| 887 | + "\n", |
| 888 | + "print(\"\\n\" + \"=\" * 50)\n", |
| 889 | + "\n", |
| 890 | + "# Now test full RAG pipeline\n", |
| 891 | + "print(\"🔍 Testing full RAG pipeline...\")\n", |
| 892 | + "result = rag.query(test_query, top_k=5)\n", |
| 893 | + "print(f\"Full pipeline returned {result['num_retrieved']} documents\")\n", |
| 894 | + "print(f\"Answer: {result['answer'][:200]}...\")\n", |
| 895 | + "\n", |
| 896 | + "print(\"\\n🔍 Let's also test a simple keyword match:\")\n", |
| 897 | + "keyword_result = rag.query(\"Milvus vector database\", top_k=5) \n", |
| 898 | + "print(f\"Keyword query returned {keyword_result['num_retrieved']} documents\")" |
879 | 899 | ]
|
880 | 900 | },
|
881 | 901 | {
|
|
0 commit comments