FAQ-Chatbot/test_models.py at main · Aronno1920/FAQ-Chatbot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
import os
import sys
import time
from openai import OpenAI

# ----------------------------------------------------------------------
#  GitHub Model API test (unchanged)
# ----------------------------------------------------------------------
def test_github_model_api():
    """Test GitHub‑hosted OpenAI model API."""
    print("=== Testing GitHub Model API ===")

    try:
        import requests
        print("✓ Requests package imported successfully")
    except ImportError as e:
        print(f"✗ Failed to import requests: {e}")
        return False

    # GitHub configuration (replace with your own token)
    token = "ghp_**************************"   # <-- keep your token private
    endpoint = "https://models.github.ai/inference"
    model = "openai/gpt-4.1-nano"

    if not token:
        print("✗ No GitHub token provided")
        return False

    print(f"Token format check: {token[:10]}...{token[-10:]}")
    print(f"Endpoint: {endpoint}")
    print(f"Model: {model}")

    try:
        client = OpenAI(base_url=endpoint, api_key=token)
        print(f"Testing GitHub model: {model}")

        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user",   "content": "Most valuable person in the world is?"}
            ],
            max_tokens=50,
            temperature=0.7,
            top_p=0.7,
        )

        if response.choices:
            content = response.choices[0].message.content
            print("✓ GitHub Model API test successful!")
            print(f"Response: {content}")
            return True
        else:
            print("✗ No choices in response")
            return False

    except Exception as e:
        print(f"✗ Unexpected error: {e}")
        print(f"Error type: {type(e).__name__}")
        import traceback
        print(f"Full traceback: {traceback.format_exc()}")
        return False


# ----------------------------------------------------------------------
#  HuggingFace Embeddings test (unchanged)
# ----------------------------------------------------------------------
def test_huggingface_embeddings():
    """Test HuggingFaceEmbeddings model."""
    print("\n=== Testing HuggingFaceEmbeddings ===")

    try:
        from langchain.embeddings import HuggingFaceEmbeddings
        print("✓ HuggingFaceEmbeddings imported successfully")
    except ImportError as e:
        print(f"✗ Failed to import HuggingFaceEmbeddings: {e}")
        return False

    try:
        model_name = "l3cube-pune/bengali-sentence-similarity-sbert"
        print(f"Loading model: {model_name}")
        embeddings = HuggingFaceEmbeddings(model_name=model_name)
        print("✓ Model loaded successfully")

        test_texts = [
            "প্রাথমিক শিক্ষার গুরুত্ব কি?",
            "স্বাস্থ্য ভালো রাখার উপায়",
            "প্রযুক্তির ব্যবহার"
        ]
        print("Testing embedding generation...")
        for i, txt in enumerate(test_texts, 1):
            emb = embeddings.embed_query(txt)
            print(f"✓ Text {i} embedded (dim: {len(emb)})")
        return True

    except Exception as e:
        print(f"✗ HuggingFaceEmbeddings test failed: {e}")
        return False


# ----------------------------------------------------------------------
#  Chroma vector‑store test (replaces FAISS test)
# ----------------------------------------------------------------------
def test_chroma():
    """Test that the Chroma vector store can be instantiated and queried."""
    print("\n=== Testing Chroma Vector Store ===")

    # ----‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑
    # Try the modern import first; fall back to the older API path if needed.
    try:
        from chromadb import PersistentClient          # Modern import (v0.4+)
        print("✓ chromadb imported via `from chromadb import PersistentClient`")
    except ImportError:
        try:
            from chromadb.api import PersistentClient  # Older versions
            print("✓ chromadb imported via `from chromadb.api import PersistentClient`")
        except ImportError as e:
            print(f"✗ Failed to import PersistentClient from chromadb: {e}")
            return False
    # ------------------------------------------------------------------
    try:
        # Use the same persistence directory defined in rag_core
        persist_dir = "chroma_db"
        client = PersistentClient(path=persist_dir)
        collection = client.get_or_create_collection(name="bangla_faqs")
        print(f"✓ Chroma collection '{collection.name}' ready (persist dir: {persist_dir})")
        return True
    except Exception as e:
        print(f"✗ Chroma test failed: {e}")
        return False


# ----------------------------------------------------------------------
#  RAG integration test (updated for Chroma)
# ----------------------------------------------------------------------
def test_rag_integration():
    """Test the full RAG pipeline using the new Chroma backend."""
    print("\n=== Testing RAG Integration (Chroma) ===")

    try:
        # Import the **correct** core module (it is named rag_core, not chroma_core)
        from chroma_core import (
            initialize_vectorstore,
            RAG_ENABLED,
            retrieve_documents,
            FAQ_DATA,
        )
        print("✓ rag_core imported successfully")
        print(f"✓ FAQ_DATA contains {len(FAQ_DATA)} entries")
    except Exception as e:
        print(f"✗ Failed to import rag_core: {e}")
        return False

    # Initialise the Chroma store
    print("Initialising Chroma vector store...")
    init_ok = initialize_vectorstore()

    if not init_ok or not RAG_ENABLED:
        print("✗ RAG system initialisation failed")
        return False

    print("✓ RAG system initialised")

    # Perform a simple retrieval test
    test_query = "শিক্ষা"
    print(f"Retrieving documents for query: '{test_query}' (topic='সব')")
    docs = retrieve_documents(query=test_query, topic="সব", k=2)

    if not docs:
        print("✗ No documents retrieved")
        return False

    print(f"✓ Retrieved {len(docs)} documents")
    for i, doc in enumerate(docs, 1):
        snippet = doc.page_content[:120].replace("\n", " ")
        print(f"  Doc {i}: {snippet}...")

    return True


# ----------------------------------------------------------------------
#  Main driver
# ----------------------------------------------------------------------
def main():
    print("GitHub Model Testing Script")
    print("=" * 50)

    github_ok = test_github_model_api()
    hf_ok     = test_huggingface_embeddings()
    chroma_ok = test_chroma()
    rag_ok    = test_rag_integration()

    print("\n" + "=" * 50)
    print("TEST RESULTS SUMMARY")
    print("=" * 50)
    print(f"GitHub Model API: {'✓ PASS' if github_ok else '✗ FAIL'}")
    print(f"HuggingFace:      {'✓ PASS' if hf_ok else '✗ FAIL'}")
    print(f"Chroma DB:        {'✓ PASS' if chroma_ok else '✗ FAIL'}")
    print(f"RAG Integration:  {'✓ PASS' if rag_ok else '✗ FAIL'}")

    if all([github_ok, hf_ok, chroma_ok, rag_ok]):
        print("\n🎉 All tests passed! Your chatbot is ready.")
    else:
        print("\n❌ Some tests failed. Review the messages above.")
        if not github_ok:
            print("- GitHub Model: verify token & endpoint.")
        if not hf_ok:
            print("- HuggingFace: check internet & disk space.")
        if not chroma_ok:
            print("- Chroma: ensure `chromadb` is installed (`pip install chromadb`).")
        if not rag_ok:
            print("- RAG: confirm `initialize_vectorstore()` succeeds and FAQ data is correct.")


if __name__ == "__main__":
    main()