interviewstreet · vivekTiw120303 · Oct 7, 2025
diff --git a/llm_utils.py b/llm_utils.py
@@ -5,7 +5,7 @@
 import logging
 from typing import Any, Dict, Optional
 from models import ModelProvider, OllamaProvider, GeminiProvider
-from prompt import MODEL_PROVIDER_MAPPING, GEMINI_API_KEY
+from prompt import MODEL_PROVIDER_MAPPING, GEMINI_API_KEY, LLM_PROVIDER
 
 logger = logging.getLogger(__name__)
 
@@ -39,24 +39,22 @@ def extract_json_from_response(response_text: str) -> str:
 
 def initialize_llm_provider(model_name: str) -> Any:
     """
-    Initialize the appropriate LLM provider based on the model name.
+    Initialize the appropriate LLM provider based on the LLM_PROVIDER environment variable.
 
     Args:
         model_name: The name of the model to use
 
     Returns:
         An initialized LLM provider (either OllamaProvider or GeminiProvider)
     """
-    # Default to Ollama provider
-    provider = OllamaProvider()
-    # If using Gemini and API key is available, use Gemini provider
-    model_provider = MODEL_PROVIDER_MAPPING.get(model_name, ModelProvider.OLLAMA)
-    if model_provider == ModelProvider.GEMINI:
+    # Use the environment variable to determine the provider
+    if LLM_PROVIDER == ModelProvider.GEMINI.value:
         if not GEMINI_API_KEY:
-            logger.warning("⚠️ Gemini API key not found. Falling back to Ollama.")
-        else:
-            logger.info(f"🔄 Using Google Gemini API provider with model {model_name}")
-            provider = GeminiProvider(api_key=GEMINI_API_KEY)
-    else:
-        logger.info(f"🔄 Using Ollama provider with model {model_name}")
-    return provider
+            logger.error("❌ Gemini provider selected, but GEMINI_API_KEY is not set.")
+            raise ValueError("GEMINI_API_KEY not found for Gemini provider")
+        logger.info(f"🔄 Using Google Gemini API provider with model {model_name}")
+        return GeminiProvider(api_key=GEMINI_API_KEY)
+
+    # Default to Ollama for any other case
+    logger.info(f"🔄 Using Ollama provider with model {model_name}")
+    return OllamaProvider()
diff --git a/models.py b/models.py
@@ -1,6 +1,7 @@
 from typing import List, Optional, Dict, Tuple, Any, Protocol, runtime_checkable
 from pydantic import BaseModel, Field, field_validator
 from enum import Enum
+from vertexai.generative_models import Part
 
 
 class ModelProvider(Enum):
@@ -101,6 +102,14 @@ class Award(BaseModel):
     summary: Optional[str] = None
 
 
+class Scholarship(BaseModel):
+    """Scholarship and Mentorship information for JSON Resume format."""
+    title: Optional[str] = None
+    date: Optional[str] = None
+    awarder: Optional[str] = None
+    summary: Optional[str] = None
+
+
 class Certificate(BaseModel):
     """Certificate information for JSON Resume format."""
 
@@ -198,6 +207,11 @@ class AwardsSection(BaseModel):
     awards: Optional[List[Award]] = None
 
 
+class ScholarshipsSection(BaseModel):
+    """Scholarships section containing a list of scholarships."""
+    scholarships: Optional[List[Scholarship]] = None
+
+
 class JSONResume(BaseModel):
     """Complete JSON Resume format model."""
 
@@ -206,6 +220,7 @@ class JSONResume(BaseModel):
     volunteer: Optional[List[Volunteer]] = None
     education: Optional[List[Education]] = None
     awards: Optional[List[Award]] = None
+    scholarships: Optional[List[Scholarship]] = None
     certificates: Optional[List[Certificate]] = None
     publications: Optional[List[Publication]] = None
     skills: Optional[List[Skill]] = None
@@ -226,6 +241,7 @@ class Scores(BaseModel):
     self_projects: CategoryScore
     production: CategoryScore
     technical_skills: CategoryScore
+    scholarship_mentorship: CategoryScore
 
 
 class BonusPoints(BaseModel):
@@ -320,34 +336,26 @@ def __init__(self, api_key: str):
         self.client = genai
 
     def chat(
-        self,
-        model: str,
-        messages: List[Dict[str, str]],
-        options: Dict[str, Any] = None,
-        **kwargs
+        self, model: str, messages: List[Dict[str, str]], options: Dict[str, Any] = None, **kwargs
     ) -> Dict[str, Any]:
         """Send a chat request to Google Gemini API."""
-        # Map options to Gemini parameters
         generation_config = {}
         if options:
             if "temperature" in options:
                 generation_config["temperature"] = options["temperature"]
             if "top_p" in options:
                 generation_config["top_p"] = options["top_p"]
 
-        # Create a Gemini model
         gemini_model = self.client.GenerativeModel(
             model_name=model, generation_config=generation_config
         )
 
-        # Convert messages to Gemini format
         gemini_messages = []
         for msg in messages:
             role = "user" if msg["role"] == "user" else "model"
             gemini_messages.append({"role": role, "parts": [msg["content"]]})
 
-        # Send the chat request
         response = gemini_model.generate_content(gemini_messages)
 
-        # Convert Gemini response to Ollama-like format for compatibility
         return {"message": {"role": "assistant", "content": response.text}}
+
diff --git a/pdf.py b/pdf.py
@@ -13,12 +13,14 @@
     Skill,
     Project,
     Award,
+    Scholarship,
     BasicsSection,
     WorkSection,
     EducationSection,
     SkillsSection,
     ProjectsSection,
     AwardsSection,
+    ScholarshipsSection,
 )
 from llm_utils import initialize_llm_provider, extract_json_from_response
 from pymupdf_rag import to_markdown
@@ -189,6 +191,18 @@ def extract_awards_section(self, resume_text: str) -> Optional[Dict]:
             logger.error("❌ Failed to render awards template")
             return None
         return self._call_llm_for_section("awards", resume_text, prompt, AwardsSection)
+
+    def extract_scholarships_section(self, resume_text: str) -> Optional[Dict]:
+        """Extracts the scholarships section from the resume text."""
+        prompt = self.template_manager.render_template(
+            "scholarships", text_content=resume_text
+        )
+        if not prompt:
+            logger.error("❌ Failed to render scholarships template")
+            return None
+        return self._call_llm_for_section(
+            "scholarships", resume_text, prompt, ScholarshipsSection
+        )
 
     def extract_json_from_text(self, resume_text: str) -> Optional[JSONResume]:
         try:
@@ -227,6 +241,7 @@ def _extract_section_data(
             "skills": self.extract_skills_section,
             "projects": self.extract_projects_section,
             "awards": self.extract_awards_section,
+            "scholarships": self.extract_scholarships_section,
         }
 
         if section_name not in section_extractors:
@@ -269,14 +284,15 @@ def _extract_all_sections_separately(
     ) -> Optional[JSONResume]:
         start_time = time.time()
 
-        sections = ["basics", "work", "education", "skills", "projects", "awards"]
+        sections = ["basics", "work", "education", "skills", "projects", "awards", "scholarships"]
 
         complete_resume = {
             "basics": None,
             "work": None,
             "volunteer": None,
             "education": None,
             "awards": None,
+            "scholarships": None,
             "certificates": None,
             "publications": None,
             "skills": None,

diff --git a/prompt.py b/prompt.py
@@ -18,11 +18,11 @@
 
 # Get model and provider from environment or use defaults
 DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", DEFAULT_MODEL_NAME)
-PROVIDER = os.getenv("LLM_PROVIDER", DEFAULT_PROVIDER.value)
+LLM_PROVIDER = os.getenv("LLM_PROVIDER", DEFAULT_PROVIDER.value)
 
 # Validate provider
-if PROVIDER not in [p.value for p in ModelProvider]:
-    PROVIDER = DEFAULT_PROVIDER.value
+if LLM_PROVIDER not in [p.value for p in ModelProvider]:
+    LLM_PROVIDER = DEFAULT_PROVIDER.value
 
 # Model-specific parameters
 MODEL_PARAMETERS = {

diff --git a/prompts/template_manager.py b/prompts/template_manager.py
@@ -41,6 +41,7 @@ def _load_templates(self):
             "skills": "skills.jinja",
             "projects": "projects.jinja",
             "awards": "awards.jinja",
+            "scholarships": "scholarships.jinja",
             "system_message": "system_message.jinja",
             "github_project_selection": "github_project_selection.jinja",
             "resume_evaluation_criteria": "resume_evaluation_criteria.jinja",

diff --git a/prompts/templates/resume_evaluation_criteria.jinja b/prompts/templates/resume_evaluation_criteria.jinja
@@ -1,6 +1,6 @@
 You are evaluating a resume for a Software Intern position at HackerRank. Analyze the resume data and provide scores based on these criteria:
 
-**MANDATORY: You MUST always fill ALL FOUR categories: open_source, self_projects, production, technical_skills.**
+**MANDATORY: You MUST always fill ALL FOUR categories: open_source, self_projects, production, technical_skills, scholarship_mentorship.**
 
 ## CRITICAL FAIRNESS REQUIREMENTS
 **SCORES MUST NEVER DEPEND ON:**
@@ -86,6 +86,21 @@ You are evaluating a resume for a Software Intern position at HackerRank. Analyz
 - Analyze the 'work' and 'volunteer' sections for real-world, internship, or production experience
 - **SPECIAL CONSIDERATION**: Give extra points for founder roles, co-founder positions, or early-stage engineer roles (first 10-20 employees) at startups
 
+### Scholarship & Mentorship (0-10 points)
+**HIGH SCORES (7-10 points):**
+- Prestigious, well-known scholarships (e.g., national/international academic awards).
+- Recognized mentorship roles in formal programs (e.g., community or technical mentoring).
+
+**MEDIUM SCORES (4-6 points):**
+- University-level scholarships or academic honors.
+- Significant achievements in academic competitions.
+
+**LOW SCORES (1-3 points):**
+- Minor awards or honorable mentions.
+
+**ZERO SCORES (0 points):**
+- No scholarships, mentorships, or relevant awards listed.
+
 ### Technical Skills (0-10 points)
 - Analyze the 'skills', 'languages', and evidence of technical breadth or problem-solving in projects, work, or competitions
 
@@ -160,6 +175,7 @@ You are evaluating a resume for a Software Intern position at HackerRank. Analyz
   - self_projects: 0-30 points (maximum 30)
   - production: 0-25 points (maximum 25)
   - technical_skills: 0-10 points (maximum 10)
+  - scholarship_mentorship: 0-10 points (maximum 10)
 - Bonus points total must be <= 20 (maximum 20 points)
 - **OVERALL SCORE LIMIT**: The total score (categories + bonus - deductions) cannot exceed 120 points
 
@@ -173,6 +189,7 @@ Analyze the following resume and provide a JSON response with this EXACT structu
         "self_projects": {"score": 0, "max": 30, "evidence": "string"},
         "production": {"score": 0, "max": 25, "evidence": "string"},
         "technical_skills": {"score": 0, "max": 10, "evidence": "string"}
+        "scholarship_mentorship": {"score": 0, "max": 10, "evidence": "string"}
     },
     "bonus_points": {"total": 0, "breakdown": "string"},
     "deductions": {"total": 0, "reasons": "string"},

diff --git a/prompts/templates/scholarships.jinja b/prompts/templates/scholarships.jinja
@@ -0,0 +1,40 @@
+You are an expert data extractor. Your task is to extract all scholarships and mentorships from the provided resume markdown.
+
+The items may appear as a single block of text or as separate bullet points. Identify each distinct scholarship or mentorship role. If no scholarships or mentorships are found, return an empty list.
+
+Follow this example precisely.
+
+--- EXAMPLE ---
+INPUT TEXT:
+**Some Other Section**
+...
+**Scholarships & Mentorships**
+**University Merit Award**, for academic excellence (2023).
+**Mentor at Coding Club**, guided students (2024).
+**Another Section**
+...
+
+OUTPUT JSON:
+{
+  "scholarships": [
+    {
+      "title": "University Merit Award",
+      "date": "2023",
+      "awarder": null,
+      "summary": "For academic excellence"
+    },
+    {
+      "title": "Mentor at Coding Club",
+      "date": "2024",
+      "awarder": null,
+      "summary": "Guided students"
+    }
+  ]
+}
+--- END EXAMPLE ---
+
+--- ACTUAL TASK ---
+INPUT TEXT:
+{{ text_content }}
+
+OUTPUT JSON:
diff --git a/requirements.txt b/requirements.txt
@@ -4,6 +4,6 @@ pydantic==2.11.7
 requests==2.32.4
 pymupdf4llm==0.0.27
 Jinja2==3.1.6
-google-generativeai==0.4.0
+google-cloud-aiplatform
 python-dotenv==1.0.1
 black==25.9.0
diff --git a/score.py b/score.py
@@ -82,6 +82,7 @@ def print_evaluation_results(
             "self_projects": 30,
             "production": 25,
             "technical_skills": 10,
+            "scholarship_mentorship": 10,
         }
 
         # Open Source
@@ -122,6 +123,17 @@ def print_evaluation_results(
             print(f"   Evidence: {tech_score.evidence}")
             print()
 
+        # Scholarship & Mentorship
+        if (
+            hasattr(evaluation.scores, "scholarship_mentorship")
+            and evaluation.scores.scholarship_mentorship
+        ):
+            sm_score = evaluation.scores.scholarship_mentorship
+            capped_score = min(sm_score.score, category_maxes["scholarship_mentorship"])
+            print(f"🎓 Scholarship & Mentorship: {capped_score}/{sm_score.max}")
+            print(f"   Evidence: {sm_score.evidence}")
+            print()
+
     # Bonus Points
     if hasattr(evaluation, "bonus_points") and evaluation.bonus_points:
         print(f"\n⭐ BONUS POINTS: {evaluation.bonus_points.total}")

diff --git a/transform.py b/transform.py
@@ -27,6 +27,9 @@ def transform_parsed_data(parsed_data: Dict) -> Dict:
                             ),
                         )
                     ),
+                    "scholarships": transform_achievements(
+                        parsed_data.get("scholarships", [])
+                    ),
                     "certificates": parsed_data.get("certificates", []),
                     "publications": parsed_data.get("publications", []),
                     "skills": transform_skills_comprehensive(parsed_data),