Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions evaluator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
from typing import Dict, List, Optional, Tuple, Any
from pydantic import BaseModel, Field, field_validator
from models import JSONResume, EvaluationData
from llm_utils import initialize_llm_provider, extract_json_from_response
from llm_utils import (
initialize_llm_provider,
extract_json_from_response,
parse_llm_response,
supports_structured_output,
)
import logging
import json
import re
Expand Down Expand Up @@ -78,12 +83,21 @@ def evaluate_resume(self, resume_text: str) -> EvaluationData:
response = self.provider.chat(**chat_params, **kwargs)

response_text = response["message"]["content"]
response_text = extract_json_from_response(response_text)
logger.error(f"🔤 Prompt response: {response_text}")
logger.info(f"🔤 Prompt response: {response_text}")

evaluation_dict = json.loads(response_text)
# Check if we used structured output
used_structured_output = supports_structured_output(
self.provider, self.model_name
)
evaluation_dict = parse_llm_response(
response_text, structured_output=used_structured_output
)
evaluation_data = EvaluationData(**evaluation_dict)

logger.info(
f"✅ Successfully parsed evaluation (structured_output: {used_structured_output})"
)

return evaluation_data

except Exception as e:
Expand Down
14 changes: 10 additions & 4 deletions github.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@
from pdf import logger
from prompts.template_manager import TemplateManager
from prompt import DEFAULT_MODEL, MODEL_PARAMETERS
from llm_utils import initialize_llm_provider, extract_json_from_response
from llm_utils import (
initialize_llm_provider,
extract_json_from_response,
parse_llm_response,
supports_structured_output,
)
from config import DEVELOPMENT_MODE


Expand Down Expand Up @@ -340,9 +345,10 @@ def generate_projects_json(projects: List[Dict]) -> List[Dict]:

try:
response_text = response_text.strip()
response_text = extract_json_from_response(response_text)

selected_projects = json.loads(response_text)
# GitHub project selection doesn't use structured output yet (no schema defined)
selected_projects = parse_llm_response(
response_text, structured_output=False
)

unique_projects = []
seen_names = set()
Expand Down
59 changes: 58 additions & 1 deletion llm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
Utility functions for LLM providers.
"""

import json
import logging
from typing import Any, Dict, Optional
from typing import Any, Dict, Optional, Union
from models import ModelProvider, OllamaProvider, GeminiProvider
from prompt import MODEL_PROVIDER_MAPPING, GEMINI_API_KEY

Expand Down Expand Up @@ -60,3 +61,59 @@ def initialize_llm_provider(model_name: str) -> Any:
else:
logger.info(f"🔄 Using Ollama provider with model {model_name}")
return provider


def parse_llm_response(
response_text: str, structured_output: bool = False
) -> Union[Dict, Any]:
"""
Parse LLM response, with special handling for structured output.

Args:
response_text: Raw response text from LLM
structured_output: Whether this response came from structured output

Returns:
Parsed data structure

Raises:
Exception: If parsing fails
"""
if structured_output:
# For structured output, the response should already be valid JSON
try:
return json.loads(response_text)
except json.JSONDecodeError as e:
logger.warning(
f"Structured output JSON parsing failed: {e}. Trying cleanup..."
)
# Fallback to cleanup and parse
cleaned_text = extract_json_from_response(response_text)
return json.loads(cleaned_text)
else:
# Regular parsing with cleanup
try:
cleaned_text = extract_json_from_response(response_text)
return json.loads(cleaned_text)
except Exception as json_error:
logger.error(f"JSON parsing failed: {json_error}")
raise json_error


def supports_structured_output(provider: Any, model: str) -> bool:
"""
Check if the provider and model support structured output.

Args:
provider: LLM provider instance
model: Model name

Returns:
True if structured output is supported
"""
# Only Gemini supports structured output for now
if isinstance(provider, GeminiProvider):
return hasattr(provider, "use_new_api") and provider.use_new_api

# Ollama doesn't use structured output (keep traditional JSON parsing)
return False
81 changes: 65 additions & 16 deletions models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def chat(
model: str,
messages: List[Dict[str, str]],
options: Dict[str, Any] = None,
**kwargs
**kwargs,
) -> Dict[str, Any]:
"""Send a chat request to the LLM provider."""
...
Expand Down Expand Up @@ -216,9 +216,9 @@ class JSONResume(BaseModel):


class CategoryScore(BaseModel):
score: float = Field(ge=0, description="Score achieved in this category")
max: int = Field(gt=0, description="Maximum possible score")
evidence: str = Field(min_length=1, description="Evidence supporting the score")
score: float = Field(description="Score achieved in this category")
max: int = Field(description="Maximum possible score")
evidence: str = Field(description="Evidence supporting the score")


class Scores(BaseModel):
Expand All @@ -229,14 +229,13 @@ class Scores(BaseModel):


class BonusPoints(BaseModel):
total: float = Field(ge=0, le=20, description="Total bonus points")
total: float = Field(description="Total bonus points")
breakdown: str = Field(description="Breakdown of bonus points")


class Deductions(BaseModel):
total: float = Field(
ge=0,
description="Total deduction points (stored as positive, applied as negative)",
description="Total deduction points (stored as positive, applied as negative)"
)
reasons: str = Field(description="Reasons for deductions")

Expand All @@ -245,8 +244,10 @@ class EvaluationData(BaseModel):
scores: Scores
bonus_points: BonusPoints
deductions: Deductions
key_strengths: List[str] = Field(min_items=1, max_items=5)
areas_for_improvement: List[str] = Field(min_items=1, max_items=5)
key_strengths: List[str] = Field(description="Key strengths (1-5 items)")
areas_for_improvement: List[str] = Field(
description="Areas for improvement (1-3 items)"
)


class GitHubProfile(BaseModel):
Expand Down Expand Up @@ -281,7 +282,7 @@ def chat(
model: str,
messages: List[Dict[str, str]],
options: Dict[str, Any] = None,
**kwargs
**kwargs,
) -> Dict[str, Any]:
"""Send a chat request to Ollama."""

Expand All @@ -304,8 +305,9 @@ def chat(
if "stream" in kwargs:
chat_params["stream"] = kwargs["stream"]

if "format" in kwargs:
chat_params["format"] = kwargs["format"]
# Note: Ollama format parameter disabled for now - using traditional JSON parsing
# if "format" in kwargs:
# chat_params["format"] = kwargs["format"]

return self.client.chat(**chat_params)

Expand All @@ -314,20 +316,67 @@ class GeminiProvider:
"""Google Gemini API provider implementation."""

def __init__(self, api_key: str):
import google.generativeai as genai
self.api_key = api_key
from google import genai

genai.configure(api_key=api_key)
self.client = genai
self.client = genai.Client(api_key=api_key)

def chat(
self,
model: str,
messages: List[Dict[str, str]],
options: Dict[str, Any] = None,
**kwargs
**kwargs,
) -> Dict[str, Any]:
"""Send a chat request to Google Gemini API."""
return self._chat_new_api(model, messages, options, **kwargs)

def _chat_new_api(
self,
model: str,
messages: List[Dict[str, str]],
options: Dict[str, Any] = None,
**kwargs,
) -> Dict[str, Any]:
"""Use new Gemini API with structured output support."""
# Combine all messages into a single content string
combined_content = ""
for msg in messages:
if msg["role"] == "system":
combined_content += f"System: {msg['content']}\n\n"
elif msg["role"] == "user":
combined_content += f"User: {msg['content']}\n\n"

# Prepare config
config = {}

# Map options to Gemini parameters
if options:
if "temperature" in options:
config["temperature"] = options["temperature"]
if "top_p" in options:
config["top_p"] = options["top_p"]

# Handle structured output for Gemini
if "format" in kwargs and kwargs["format"]:
config["response_mime_type"] = "application/json"
config["response_schema"] = kwargs["format"]

# Send the chat request using new Gemini API
response = self.client.models.generate_content(
model=model, contents=combined_content.strip(), config=config
)

return {"message": {"role": "assistant", "content": response.text}}

def _chat_old_api(
self,
model: str,
messages: List[Dict[str, str]],
options: Dict[str, Any] = None,
**kwargs,
) -> Dict[str, Any]:
"""(Deprecated) Send a chat request to Google Gemini API.""" # Map options to Gemini parameters
generation_config = {}
if options:
if "temperature" in options:
Expand Down
31 changes: 21 additions & 10 deletions pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@
ProjectsSection,
AwardsSection,
)
from llm_utils import initialize_llm_provider, extract_json_from_response
from llm_utils import (
initialize_llm_provider,
extract_json_from_response,
parse_llm_response,
supports_structured_output,
)
from pymupdf_rag import to_markdown
from typing import List, Optional, Dict, Any
from prompt import (
Expand Down Expand Up @@ -109,13 +114,17 @@ def _call_llm_for_section(
response_text = response["message"]["content"]

try:
response_text = extract_json_from_response(response_text)
json_start = response_text.find("{")
json_end = response_text.rfind("}")
if json_start != -1 and json_end != -1:
response_text = response_text[json_start : json_end + 1]
parsed_data = json.loads(response_text)
logger.debug(f"✅ Successfully extracted {section_name} section")
# Check if we used structured output
used_structured_output = (
return_model is not None
and supports_structured_output(self.provider, DEFAULT_MODEL)
)
parsed_data = parse_llm_response(
response_text, structured_output=used_structured_output
)
logger.debug(
f"✅ Successfully extracted {section_name} section (structured_output: {used_structured_output})"
)

transformed_data = transform_parsed_data(parsed_data)
end_time = time.time()
Expand All @@ -125,8 +134,10 @@ def _call_llm_for_section(
)

return transformed_data
except json.JSONDecodeError as e:
logger.error(f"❌ Error parsing JSON for {section_name} section: {e}")
except Exception as e:
logger.error(
f"❌ Error parsing response for {section_name} section: {e}"
)
logger.error(f"Raw response: {response_text}")
return None

Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ requests==2.32.4
pymupdf4llm==0.0.27
Jinja2==3.1.6
google-generativeai==0.4.0
google-genai>=0.3.0
python-dotenv==1.0.1
black==25.9.0