Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 12 additions & 14 deletions llm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import logging
from typing import Any, Dict, Optional
from models import ModelProvider, OllamaProvider, GeminiProvider
from prompt import MODEL_PROVIDER_MAPPING, GEMINI_API_KEY
from prompt import MODEL_PROVIDER_MAPPING, GEMINI_API_KEY, LLM_PROVIDER

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -39,24 +39,22 @@ def extract_json_from_response(response_text: str) -> str:

def initialize_llm_provider(model_name: str) -> Any:
"""
Initialize the appropriate LLM provider based on the model name.
Initialize the appropriate LLM provider based on the LLM_PROVIDER environment variable.

Args:
model_name: The name of the model to use

Returns:
An initialized LLM provider (either OllamaProvider or GeminiProvider)
"""
# Default to Ollama provider
provider = OllamaProvider()
# If using Gemini and API key is available, use Gemini provider
model_provider = MODEL_PROVIDER_MAPPING.get(model_name, ModelProvider.OLLAMA)
if model_provider == ModelProvider.GEMINI:
# Use the environment variable to determine the provider
if LLM_PROVIDER == ModelProvider.GEMINI.value:
if not GEMINI_API_KEY:
logger.warning("⚠️ Gemini API key not found. Falling back to Ollama.")
else:
logger.info(f"🔄 Using Google Gemini API provider with model {model_name}")
provider = GeminiProvider(api_key=GEMINI_API_KEY)
else:
logger.info(f"🔄 Using Ollama provider with model {model_name}")
return provider
logger.error("❌ Gemini provider selected, but GEMINI_API_KEY is not set.")
raise ValueError("GEMINI_API_KEY not found for Gemini provider")
logger.info(f"🔄 Using Google Gemini API provider with model {model_name}")
return GeminiProvider(api_key=GEMINI_API_KEY)

# Default to Ollama for any other case
logger.info(f"🔄 Using Ollama provider with model {model_name}")
return OllamaProvider()
28 changes: 18 additions & 10 deletions models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import List, Optional, Dict, Tuple, Any, Protocol, runtime_checkable
from pydantic import BaseModel, Field, field_validator
from enum import Enum
from vertexai.generative_models import Part


class ModelProvider(Enum):
Expand Down Expand Up @@ -101,6 +102,14 @@ class Award(BaseModel):
summary: Optional[str] = None


class Scholarship(BaseModel):
"""Scholarship and Mentorship information for JSON Resume format."""
title: Optional[str] = None
date: Optional[str] = None
awarder: Optional[str] = None
summary: Optional[str] = None


class Certificate(BaseModel):
"""Certificate information for JSON Resume format."""

Expand Down Expand Up @@ -198,6 +207,11 @@ class AwardsSection(BaseModel):
awards: Optional[List[Award]] = None


class ScholarshipsSection(BaseModel):
"""Scholarships section containing a list of scholarships."""
scholarships: Optional[List[Scholarship]] = None


class JSONResume(BaseModel):
"""Complete JSON Resume format model."""

Expand All @@ -206,6 +220,7 @@ class JSONResume(BaseModel):
volunteer: Optional[List[Volunteer]] = None
education: Optional[List[Education]] = None
awards: Optional[List[Award]] = None
scholarships: Optional[List[Scholarship]] = None
certificates: Optional[List[Certificate]] = None
publications: Optional[List[Publication]] = None
skills: Optional[List[Skill]] = None
Expand All @@ -226,6 +241,7 @@ class Scores(BaseModel):
self_projects: CategoryScore
production: CategoryScore
technical_skills: CategoryScore
scholarship_mentorship: CategoryScore


class BonusPoints(BaseModel):
Expand Down Expand Up @@ -320,34 +336,26 @@ def __init__(self, api_key: str):
self.client = genai

def chat(
self,
model: str,
messages: List[Dict[str, str]],
options: Dict[str, Any] = None,
**kwargs
self, model: str, messages: List[Dict[str, str]], options: Dict[str, Any] = None, **kwargs
) -> Dict[str, Any]:
"""Send a chat request to Google Gemini API."""
# Map options to Gemini parameters
generation_config = {}
if options:
if "temperature" in options:
generation_config["temperature"] = options["temperature"]
if "top_p" in options:
generation_config["top_p"] = options["top_p"]

# Create a Gemini model
gemini_model = self.client.GenerativeModel(
model_name=model, generation_config=generation_config
)

# Convert messages to Gemini format
gemini_messages = []
for msg in messages:
role = "user" if msg["role"] == "user" else "model"
gemini_messages.append({"role": role, "parts": [msg["content"]]})

# Send the chat request
response = gemini_model.generate_content(gemini_messages)

# Convert Gemini response to Ollama-like format for compatibility
return {"message": {"role": "assistant", "content": response.text}}

18 changes: 17 additions & 1 deletion pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@
Skill,
Project,
Award,
Scholarship,
BasicsSection,
WorkSection,
EducationSection,
SkillsSection,
ProjectsSection,
AwardsSection,
ScholarshipsSection,
)
from llm_utils import initialize_llm_provider, extract_json_from_response
from pymupdf_rag import to_markdown
Expand Down Expand Up @@ -189,6 +191,18 @@ def extract_awards_section(self, resume_text: str) -> Optional[Dict]:
logger.error("❌ Failed to render awards template")
return None
return self._call_llm_for_section("awards", resume_text, prompt, AwardsSection)

def extract_scholarships_section(self, resume_text: str) -> Optional[Dict]:
"""Extracts the scholarships section from the resume text."""
prompt = self.template_manager.render_template(
"scholarships", text_content=resume_text
)
if not prompt:
logger.error("❌ Failed to render scholarships template")
return None
return self._call_llm_for_section(
"scholarships", resume_text, prompt, ScholarshipsSection
)

def extract_json_from_text(self, resume_text: str) -> Optional[JSONResume]:
try:
Expand Down Expand Up @@ -227,6 +241,7 @@ def _extract_section_data(
"skills": self.extract_skills_section,
"projects": self.extract_projects_section,
"awards": self.extract_awards_section,
"scholarships": self.extract_scholarships_section,
}

if section_name not in section_extractors:
Expand Down Expand Up @@ -269,14 +284,15 @@ def _extract_all_sections_separately(
) -> Optional[JSONResume]:
start_time = time.time()

sections = ["basics", "work", "education", "skills", "projects", "awards"]
sections = ["basics", "work", "education", "skills", "projects", "awards", "scholarships"]

complete_resume = {
"basics": None,
"work": None,
"volunteer": None,
"education": None,
"awards": None,
"scholarships": None,
"certificates": None,
"publications": None,
"skills": None,
Expand Down
6 changes: 3 additions & 3 deletions prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@

# Get model and provider from environment or use defaults
DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", DEFAULT_MODEL_NAME)
PROVIDER = os.getenv("LLM_PROVIDER", DEFAULT_PROVIDER.value)
LLM_PROVIDER = os.getenv("LLM_PROVIDER", DEFAULT_PROVIDER.value)

# Validate provider
if PROVIDER not in [p.value for p in ModelProvider]:
PROVIDER = DEFAULT_PROVIDER.value
if LLM_PROVIDER not in [p.value for p in ModelProvider]:
LLM_PROVIDER = DEFAULT_PROVIDER.value

# Model-specific parameters
MODEL_PARAMETERS = {
Expand Down
1 change: 1 addition & 0 deletions prompts/template_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def _load_templates(self):
"skills": "skills.jinja",
"projects": "projects.jinja",
"awards": "awards.jinja",
"scholarships": "scholarships.jinja",
"system_message": "system_message.jinja",
"github_project_selection": "github_project_selection.jinja",
"resume_evaluation_criteria": "resume_evaluation_criteria.jinja",
Expand Down
19 changes: 18 additions & 1 deletion prompts/templates/resume_evaluation_criteria.jinja
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
You are evaluating a resume for a Software Intern position at HackerRank. Analyze the resume data and provide scores based on these criteria:

**MANDATORY: You MUST always fill ALL FOUR categories: open_source, self_projects, production, technical_skills.**
**MANDATORY: You MUST always fill ALL FOUR categories: open_source, self_projects, production, technical_skills, scholarship_mentorship.**

## CRITICAL FAIRNESS REQUIREMENTS
**SCORES MUST NEVER DEPEND ON:**
Expand Down Expand Up @@ -86,6 +86,21 @@ You are evaluating a resume for a Software Intern position at HackerRank. Analyz
- Analyze the 'work' and 'volunteer' sections for real-world, internship, or production experience
- **SPECIAL CONSIDERATION**: Give extra points for founder roles, co-founder positions, or early-stage engineer roles (first 10-20 employees) at startups

### Scholarship & Mentorship (0-10 points)
**HIGH SCORES (7-10 points):**
- Prestigious, well-known scholarships (e.g., national/international academic awards).
- Recognized mentorship roles in formal programs (e.g., community or technical mentoring).

**MEDIUM SCORES (4-6 points):**
- University-level scholarships or academic honors.
- Significant achievements in academic competitions.

**LOW SCORES (1-3 points):**
- Minor awards or honorable mentions.

**ZERO SCORES (0 points):**
- No scholarships, mentorships, or relevant awards listed.

### Technical Skills (0-10 points)
- Analyze the 'skills', 'languages', and evidence of technical breadth or problem-solving in projects, work, or competitions

Expand Down Expand Up @@ -160,6 +175,7 @@ You are evaluating a resume for a Software Intern position at HackerRank. Analyz
- self_projects: 0-30 points (maximum 30)
- production: 0-25 points (maximum 25)
- technical_skills: 0-10 points (maximum 10)
- scholarship_mentorship: 0-10 points (maximum 10)
- Bonus points total must be <= 20 (maximum 20 points)
- **OVERALL SCORE LIMIT**: The total score (categories + bonus - deductions) cannot exceed 120 points

Expand All @@ -173,6 +189,7 @@ Analyze the following resume and provide a JSON response with this EXACT structu
"self_projects": {"score": 0, "max": 30, "evidence": "string"},
"production": {"score": 0, "max": 25, "evidence": "string"},
"technical_skills": {"score": 0, "max": 10, "evidence": "string"}
"scholarship_mentorship": {"score": 0, "max": 10, "evidence": "string"}
},
"bonus_points": {"total": 0, "breakdown": "string"},
"deductions": {"total": 0, "reasons": "string"},
Expand Down
40 changes: 40 additions & 0 deletions prompts/templates/scholarships.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
You are an expert data extractor. Your task is to extract all scholarships and mentorships from the provided resume markdown.

The items may appear as a single block of text or as separate bullet points. Identify each distinct scholarship or mentorship role. If no scholarships or mentorships are found, return an empty list.

Follow this example precisely.

--- EXAMPLE ---
INPUT TEXT:
**Some Other Section**
...
**Scholarships & Mentorships**
**University Merit Award**, for academic excellence (2023).
**Mentor at Coding Club**, guided students (2024).
**Another Section**
...

OUTPUT JSON:
{
"scholarships": [
{
"title": "University Merit Award",
"date": "2023",
"awarder": null,
"summary": "For academic excellence"
},
{
"title": "Mentor at Coding Club",
"date": "2024",
"awarder": null,
"summary": "Guided students"
}
]
}
--- END EXAMPLE ---

--- ACTUAL TASK ---
INPUT TEXT:
{{ text_content }}

OUTPUT JSON:
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ pydantic==2.11.7
requests==2.32.4
pymupdf4llm==0.0.27
Jinja2==3.1.6
google-generativeai==0.4.0
google-cloud-aiplatform
python-dotenv==1.0.1
black==25.9.0
12 changes: 12 additions & 0 deletions score.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def print_evaluation_results(
"self_projects": 30,
"production": 25,
"technical_skills": 10,
"scholarship_mentorship": 10,
}

# Open Source
Expand Down Expand Up @@ -122,6 +123,17 @@ def print_evaluation_results(
print(f" Evidence: {tech_score.evidence}")
print()

# Scholarship & Mentorship
if (
hasattr(evaluation.scores, "scholarship_mentorship")
and evaluation.scores.scholarship_mentorship
):
sm_score = evaluation.scores.scholarship_mentorship
capped_score = min(sm_score.score, category_maxes["scholarship_mentorship"])
print(f"🎓 Scholarship & Mentorship: {capped_score}/{sm_score.max}")
print(f" Evidence: {sm_score.evidence}")
print()

# Bonus Points
if hasattr(evaluation, "bonus_points") and evaluation.bonus_points:
print(f"\n⭐ BONUS POINTS: {evaluation.bonus_points.total}")
Expand Down
3 changes: 3 additions & 0 deletions transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ def transform_parsed_data(parsed_data: Dict) -> Dict:
),
)
),
"scholarships": transform_achievements(
parsed_data.get("scholarships", [])
),
"certificates": parsed_data.get("certificates", []),
"publications": parsed_data.get("publications", []),
"skills": transform_skills_comprehensive(parsed_data),
Expand Down