kevoreilly
diff --git a/‎.github/ISSUE_TEMPLATE/bug_report.md‎
Lines changed: 1 addition & 0 deletions b/‎.github/ISSUE_TEMPLATE/bug_report.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/auto_answer.yml‎
Lines changed: 35 additions & 0 deletions b/‎.github/workflows/auto_answer.yml‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎KnowledgeBaseBot/all_texts.json‎
Lines changed: 3014 additions & 0 deletions b/‎KnowledgeBaseBot/all_texts.json‎
Lines changed: 3014 additions & 0 deletions
diff --git a/‎KnowledgeBaseBot/auto_answer.yml‎
Lines changed: 35 additions & 0 deletions b/‎KnowledgeBaseBot/auto_answer.yml‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎KnowledgeBaseBot/auto_answer_bot.py‎
Lines changed: 118 additions & 0 deletions b/‎KnowledgeBaseBot/auto_answer_bot.py‎
Lines changed: 118 additions & 0 deletions
diff --git a/‎KnowledgeBaseBot/build_knowledge_base.py‎
Lines changed: 130 additions & 0 deletions b/‎KnowledgeBaseBot/build_knowledge_base.py‎
Lines changed: 130 additions & 0 deletions
diff --git a/‎KnowledgeBaseBot/kb_state.json‎
Lines changed: 1 addition & 0 deletions b/‎KnowledgeBaseBot/kb_state.json‎
Lines changed: 1 addition & 0 deletions
@@ -22,6 +22,7 @@ Please answer the following questions for yourself before submitting an issue.
 - [ ] I checked to make sure that this issue has not already been filed
 - [ ] I'm reporting the issue to the correct repository (for multi-repository projects)
 - [ ] I have read and checked all configs (with all optional parts)
+- [ ] Asked and no solution about my issue with [deepwiki](https://deepwiki.com/kevoreilly/CAPEv2)
 
 
 # Expected Behavior
 
@@ -0,0 +1,35 @@
+name: Auto Answer Bot (using uv run)
+
+on:
+  issues:
+    types: [opened]
+
+jobs:
+  answer:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository code
+        uses: actions/checkout@v4
+
+      - name: Set up Python with caching
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+          cache: 'pip'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+
+      - name: Install the project
+        run: uv run pip install -r requirements.txt
+
+      - name: Run the answer bot with uv run
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          ISSUE_NUMBER: ${{ github.event.issue.number }}
+          REPO_NAME: ${{ github.repository }}
+        # This single step installs dependencies (if needed) and runs the script
+        run: cd KnowledgeBaseBot && uv run python auto_answer_bot.py
@@ -0,0 +1,35 @@
+name: Auto Answer Bot (using uv run)
+
+on:
+  issues:
+    types: [opened]
+
+jobs:
+  answer:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository code
+        uses: actions/checkout@v4
+
+      - name: Set up Python with caching
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+          cache: 'pip'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+
+      - name: Install the project
+        run: uv run pip install -r requirements.txt
+
+      - name: Run the answer bot with uv run
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          ISSUE_NUMBER: ${{ github.event.issue.number }}
+          REPO_NAME: ${{ github.repository }}
+        # This single step installs dependencies (if needed) and runs the script
+        run: cd KnowledgeBaseBot && uv run python auto_answer_bot.py
@@ -0,0 +1,118 @@
+import os
+import json
+import faiss
+from github import Github
+from sentence_transformers import SentenceTransformer
+import google.generativeai as genai
+
+# --- Configuration ---
+GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')
+REPO_NAME = os.getenv('REPO_NAME')
+try:
+    ISSUE_NUMBER = int(os.getenv('ISSUE_NUMBER'))
+except (TypeError, ValueError):
+    print("Error: Invalid or missing ISSUE_NUMBER environment variable. Exiting.")
+    exit(1)
+MODEL_NAME = 'all-MiniLM-L6-v2'
+K_NEAREST_NEIGHBORS = 5 # Number of similar items to retrieve
+GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
+if not GEMINI_API_KEY:
+    exit("Missed GEMINI api key")
+genai.configure(api_key=GEMINI_API_KEY)
+
+# --- Initialization ---
+g = Github(GITHUB_TOKEN)
+repo = g.get_repo(REPO_NAME)
+issue = repo.get_issue(number=ISSUE_NUMBER)
+llm_model = SentenceTransformer(MODEL_NAME)
+
+# --- Load the Unified Knowledge Base ---
+index = faiss.read_index("unified_index.faiss")
+with open("metadata.json", "rb") as f:
+    metadata = json.load(f)
+with open("all_texts.json", "rb") as f:
+    all_texts = json.load(f)
+
+# --- Process the New Issue ---
+new_issue_text = f"Title: {issue.title}\nBody: {issue.body}"
+new_issue_embedding = llm_model.encode([new_issue_text]).astype('float32')
+
+# --- Semantic Search ---
+distances, indices = index.search(new_issue_embedding, K_NEAREST_NEIGHBORS)
+context_pieces = []
+
+for i in indices[0]:
+    source_metadata = metadata[i]
+    source_text = all_texts[i]
+
+    if source_metadata['source'] == 'documentation':
+        context_pieces.append(f"--- Context from Documentation (file: {source_metadata['file']}) ---\n{source_text}")
+    elif source_metadata['source'] == 'issue':
+        context_pieces.append(f"--- Context from a Similar Issue ({source_metadata['url']}) ---\n{source_text}")
+
+context = "\n\n".join(context_pieces)
+
+# --- Generate Answer with LLM (Improved Prompt) ---
+prompt = f"""
+You are an expert GitHub Triage Assistant for an open-source project. Your primary goal is to ensure every issue is actionable for the developers. Your secondary goal is to answer questions using the provided context. Follow these steps in order:
+
+**Step 1: Triage the User's Issue Quality**
+First, analyze the "New Issue" content. Does it contain enough detail to be actionable?
+- **GOOD ISSUE:** It has a clear description, steps to reproduce, error messages, or a specific question.
+- **POOR ISSUE:** It's a short, vague question (e.g., "it doesn't work"), it's missing crucial details, or the user has deleted the issue template.
+
+**Step 2: Take Action Based on Triage**
+
+<if_issue_is_poor>
+- Gently inform the user that more information is needed for the community to help.
+- Explain *why* details are important (e.g., "to understand the context and reproduce the problem").
+- Provide a clear, actionable list of what's missing. Use the official issue template as a guide (e.g., "Please provide steps to reproduce, the version you are using, and any error logs.").
+- Politely remind them that this is a community-supported open-source project and that clear, detailed reports are the best way to get helpful and fast responses from volunteers.
+- **Do NOT attempt to answer the question.** Your only goal is to improve the quality of the issue report.
+</if_issue_is_poor>
+
+<if_issue_is_good>
+- Acknowledge their well-detailed report.
+- Now, analyze the "Relevant Context" provided (from documentation and past issues).
+- Generate a clear and helpful response based **strictly** on this context.
+- If the documentation provides an answer, summarize it and cite the source file.
+- If a past issue offers a solution, explain it and provide the URL to that issue.
+- If the context doesn't seem to fully resolve their detailed question, state that and mention a maintainer will look into it.
+- Conclude by thanking them for their contribution to the project.
+</if_issue_is_good>
+**New Issue:**
+{new_issue_text}
+
+**Relevant Context (from Documentation and Past Issues):**
+{context}
+
+**Suggested Answer (include links to sources if available):**
+"""
+"""
+response = openai.chat.completions.create(
+  model="gpt-4", # Or "gpt-3.5-turbo", or any other model you prefer
+  messages=[
+    {"role": "system", "content": "You are an expert GitHub support assistant. Your mission is to answer user issues based solely on official documentation and the history of resolved issues."},
+    {"role": "user", "content": prompt}
+  ]
+)
+"""
+# The system prompt from OpenAI is handled by system_instruction in Gemini
+system_instruction = "You are an expert GitHub support assistant. Your mission is to answer user issues based solely on official documentation and the history of resolved issues."
+
+# https://ai.google.dev/gemini-api/docs/models
+# Create the model with the system instruction
+generative_model = genai.GenerativeModel(
+    model_name="gemini-2.5-flash",
+    system_instruction=system_instruction
+)
+
+response = generative_model.generate_content(prompt)
+# --- Post the Comment ---
+final_comment = f"Hello @{issue.user.login}, thanks for reaching out.\n\n"
+final_comment += response.text
+final_comment += "\n\n---\n*This is an automated message generated from our documentation and issue history. If this doesn't solve your problem, someone will try to help you soon. Ensure that you checked other issues for the same issue!.* 🤖"
+
+issue.create_comment(final_comment)
+
+print(f"Enriched answer posted to issue #{ISSUE_NUMBER}")
@@ -0,0 +1,130 @@
+import os
+import json
+import faiss
+import numpy as np
+from github import Auth
+from github import Github
+from sentence_transformers import SentenceTransformer
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import DirectoryLoader
+from datetime import datetime, timezone
+
+# --- Configuration ---
+GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')
+# GITHUB_TOKEN =
+REPO_NAME = "kevoreilly/CAPEv2"
+DOCS_PATH = "../docs" # Path to the folder with your documentation files (e.g., .md)
+MODEL_NAME = 'all-MiniLM-L6-v2' # An efficient embedding model
+
+# --- File Paths for State ---
+INDEX_FILE = "unified_index.faiss"
+METADATA_FILE = "metadata.json"
+TEXTS_FILE = "all_texts.json"
+STATE_FILE = "kb_state.json"
+
+# init pandoc
+# from pypandoc.pandoc_download import download_pandoc
+# download_pandoc()
+
+# --- Initialization ---
+auth = Auth.Token(GITHUB_TOKEN)
+g = Github(auth=auth)
+# auth=github.Auth.Token(...)
+repo = g.get_repo(REPO_NAME)
+model = SentenceTransformer(MODEL_NAME)
+
+# --- Load Existing Knowledge Base or Initialize a New One ---
+if os.path.exists(INDEX_FILE):
+    print("Loading existing knowledge base...")
+    index = faiss.read_index(INDEX_FILE)
+    with open(METADATA_FILE, "r") as f:
+        metadata = json.load(f)
+    with open(TEXTS_FILE, "r") as f:
+        all_texts = json.load(f)
+    with open(STATE_FILE, "r") as f:
+        last_update_time = datetime.fromisoformat(json.load(f))
+    print(f"Knowledge base loaded. Last update was at: {last_update_time}")
+    new_issues = repo.get_issues(state='all', since=last_update_time, sort='created', direction='asc')
+else:
+    print("No existing knowledge base found. Creating a new one.")
+    index = None
+    metadata = []
+    all_texts = []
+    # Set a very old date to fetch all issues for the first time
+    last_update_time = datetime(1970, 1, 1, tzinfo=timezone.utc)
+    new_issues = repo.get_issues(state='all', sort='updated', direction='asc')
+    # Initial processing of documentation (only on first build)
+    print("Processing documentation for the first time...")
+    loader = DirectoryLoader(DOCS_PATH, glob="**/*.rst")
+    docs = loader.load()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+    doc_chunks = text_splitter.split_documents(docs)
+    for chunk in doc_chunks:
+        all_texts.append(chunk.page_content)
+        metadata.append({'source': 'documentation', 'file': chunk.metadata.get('source', 'N/A')})
+
+# --- Process GitHub Issues ---
+# --- Fetch New Issues from GitHub ---
+print(f"Fetching issues updated since {last_update_time.isoformat()}...")
+# The 'since' parameter fetches issues updated on or after the given time
+# be aware since might not work
+
+new_issue_texts = []
+new_issue_metadata = []
+latest_issue_time = last_update_time
+existing_issue_urls = {m['url'] for m in metadata if m.get('source') == 'issue'}
+
+for issue in new_issues:
+    # We check the updated_at time to ensure we save the most recent timestamp
+    # ToDo this doesn't work properly
+    #if issue.updated_at.replace(tzinfo=timezone.utc) > latest_issue_time:
+    #    latest_issue_time = issue.updated_at.replace(tzinfo=timezone.utc)
+
+    # Simple logic to avoid adding duplicates. For a robust system, you might check IDs.
+    issue_url = issue.html_url
+    if issue_url in existing_issue_urls:
+        print(f"Skipping issue #{issue.number} as it might be a duplicate or minor update.")
+        continue
+
+    print(f"Processing new/updated issue #{issue.number}")
+    full_text = f"Title: {issue.title}\nBody: {issue.body}"
+    for comment in issue.get_comments():
+        full_text += f"\nComment: {comment.body}"
+
+    new_issue_texts.append(full_text)
+    new_issue_metadata.append({'source': 'issue', 'number': issue.number, 'url': issue.html_url})
+
+# --- Add New Issues to the Knowledge Base ---
+if new_issue_texts:
+    print(f"Found {len(new_issue_texts)} new/updated issues to add.")
+
+    # Generate embeddings for new issues only
+    new_embeddings = model.encode(new_issue_texts, show_progress_bar=True)
+    new_embeddings = np.array(new_embeddings).astype('float32')
+
+    # If the index is new, create it
+    if index is None:
+        dimension = new_embeddings.shape[1]
+        index = faiss.IndexFlatL2(dimension)
+
+    # Add new embeddings to the index and update metadata lists
+    index.add(new_embeddings)
+    all_texts.extend(new_issue_texts)
+    metadata.extend(new_issue_metadata)
+
+    print("Knowledge base updated.")
+else:
+    print("No new issues found. Knowledge base is already up-to-date.")
+
+# --- Save the Updated Knowledge Base and State ---
+print("Saving knowledge base and state...")
+faiss.write_index(index, INDEX_FILE)
+with open(METADATA_FILE, "w") as f:
+    json.dump(metadata, f, indent=2)
+with open(TEXTS_FILE, "w") as f:
+    json.dump(all_texts, f, indent=2)
+# Save the timestamp of the latest issue we processed for the next run
+with open(STATE_FILE, "w") as f:
+    json.dump(latest_issue_time.isoformat(), f)
+
+print("Process complete!")
@@ -0,0 +1 @@
+"1970-01-01T00:00:00+00:00"