diff --git a/github.py b/github.py index 202412b..93a8a70 100644 --- a/github.py +++ b/github.py @@ -142,22 +142,72 @@ def fetch_contributions_count(owner: str, contributors_data): return user_contributions, total_contributions -def fetch_repo_contributors(owner: str, repo_name: str) -> int: +def determine_enhanced_project_type( + repo: Dict, contributor_count: int, user_contributions: int, username: str +) -> str: + """ + Enhanced project type determination that better detects open source projects. + """ + # Check if it's a fork with significant contributions + if repo.get("fork", False): + if user_contributions > 10: + return "open_source" + elif user_contributions > 0: + return "fork_contribution" + + # Check for multiple contributors (classic open source indicator) + if contributor_count > 1: + return "open_source" + + # Check for community engagement indicators + stars = repo.get("stargazers_count", 0) + forks = repo.get("forks_count", 0) + topics = repo.get("topics", []) + + # If repository has community engagement, it's likely open source + if stars > 10 or forks > 5 or len(topics) > 0: + return "open_source" + + # Check if it's a well-maintained project with good activity + if ( + user_contributions > 20 + and repo.get("updated_at") + and not repo.get("archived", False) + ): + return "open_source" + + # Check for popular languages that indicate serious projects + language = repo.get("language", "") + serious_languages = [ + "Python", + "JavaScript", + "TypeScript", + "Java", + "C++", + "Go", + "Rust", + "C#", + ] + if language in serious_languages and user_contributions > 5 and stars > 2: + return "open_source" + + return "self_project" + + +def fetch_repo_contributors(owner: str, repo_name: str) -> List[Dict]: + """Fetch repository contributors data.""" try: api_url = f"https://api.github.com/repos/{owner}/{repo_name}/contributors" - status_code, contributors_data = _fetch_github_api(api_url) - return contributors_data - if status_code == 200: - return len(contributors_data) + return contributors_data else: - return 1 + return [] except Exception as e: logger.error(f"Error fetching contributors for {owner}/{repo_name}: {e}") - return 1 + return [] def fetch_all_github_repos(github_url: str, max_repos: int = 100) -> List[Dict]: @@ -188,8 +238,8 @@ def fetch_all_github_repos(github_url: str, max_repos: int = 100) -> List[Dict]: username, contributors_data ) - project_type = ( - "open_source" if contributor_count > 1 else "self_project" + project_type = determine_enhanced_project_type( + repo, contributor_count, user_contributions, username ) project = { @@ -237,11 +287,11 @@ def fetch_all_github_repos(github_url: str, max_repos: int = 100) -> List[Dict]: ) return projects - elif response.status_code == 404: + elif status_code == 404: print(f"GitHub user not found: {username}") return [] else: - print(f"GitHub API error: {response.status_code} - {response.text}") + print(f"GitHub API error: {status_code} - {data}") return [] except requests.exceptions.RequestException as e: @@ -333,7 +383,6 @@ def generate_projects_json(projects: List[Dict]) -> List[Dict]: "options": model_params, } - # Call the LLM provider response = provider.chat(**chat_params) response_text = response["message"]["content"] @@ -402,6 +451,251 @@ def generate_projects_json(projects: List[Dict]) -> List[Dict]: return projects_data +def fetch_user_pull_requests(username: str, state: str = "all") -> List[Dict]: + """ + Fetch all pull requests created by the user with accurate merged status. + This includes PRs to repositories they don't own (true open source contributions). + """ + try: + all_prs = [] + + # Fetch merged PRs separately for accurate status + if state in ["all", "merged"]: + merged_params = { + "q": f"author:{username} type:pr is:merged", + "sort": "created", + "order": "desc", + "per_page": 100, + } + status_code, merged_data = _fetch_github_api( + "https://api.github.com/search/issues", params=merged_params + ) + if status_code == 200: + for item in merged_data.get("items", []): + all_prs.append( + { + "title": item.get("title", ""), + "url": item.get("html_url", ""), + "state": "merged", + "created_at": item.get("created_at", ""), + "updated_at": item.get("updated_at", ""), + "repository": item.get("repository_url", "").replace( + "https://api.github.com/repos/", "" + ), + "number": item.get("number", 0), + "merged": True, + "draft": False, + "is_own_repo": item.get("repository_url", "") + .replace("https://api.github.com/repos/", "") + .startswith(f"{username}/"), + } + ) + + # Fetch open PRs + if state in ["all", "open"]: + open_params = { + "q": f"author:{username} type:pr is:open", + "sort": "created", + "order": "desc", + "per_page": 100, + } + status_code, open_data = _fetch_github_api( + "https://api.github.com/search/issues", params=open_params + ) + if status_code == 200: + for item in open_data.get("items", []): + all_prs.append( + { + "title": item.get("title", ""), + "url": item.get("html_url", ""), + "state": "open", + "created_at": item.get("created_at", ""), + "updated_at": item.get("updated_at", ""), + "repository": item.get("repository_url", "").replace( + "https://api.github.com/repos/", "" + ), + "number": item.get("number", 0), + "merged": False, + "draft": item.get("draft", False), + "is_own_repo": item.get("repository_url", "") + .replace("https://api.github.com/repos/", "") + .startswith(f"{username}/"), + } + ) + + # Fetch closed (unmerged) PRs + if state in ["all", "closed"]: + closed_params = { + "q": f"author:{username} type:pr is:closed is:unmerged", + "sort": "created", + "order": "desc", + "per_page": 100, + } + status_code, closed_data = _fetch_github_api( + "https://api.github.com/search/issues", params=closed_params + ) + if status_code == 200: + for item in closed_data.get("items", []): + all_prs.append( + { + "title": item.get("title", ""), + "url": item.get("html_url", ""), + "state": "closed", + "created_at": item.get("created_at", ""), + "updated_at": item.get("updated_at", ""), + "repository": item.get("repository_url", "").replace( + "https://api.github.com/repos/", "" + ), + "number": item.get("number", 0), + "merged": False, + "draft": False, + "is_own_repo": item.get("repository_url", "") + .replace("https://api.github.com/repos/", "") + .startswith(f"{username}/"), + } + ) + + print(f"āœ… Found {len(all_prs)} pull requests for {username}") + return all_prs + + except Exception as e: + print(f"āŒ Error fetching pull requests: {e}") + return [] + + +def analyze_open_source_contributions(username: str) -> Dict: + """ + Analyze open source contributions by fetching PRs and analyzing them. + """ + try: + print(f"šŸ” Analyzing open source contributions for {username}...") + + # Fetch all PRs created by the user + all_prs = fetch_user_pull_requests(username) + + # Categorize PRs + own_repo_prs = [pr for pr in all_prs if pr.get("is_own_repo", False)] + external_prs = [pr for pr in all_prs if not pr.get("is_own_repo", False)] + merged_prs = [pr for pr in all_prs if pr.get("merged", False)] + + # Analyze external contributions (true open source) + external_contributions = [] + for pr in external_prs: + repo_name = pr.get("repository", "") + if repo_name: + repo_api_url = f"https://api.github.com/repos/{repo_name}" + status_code, repo_data = _fetch_github_api(repo_api_url) + + if status_code == 200: + contribution = { + "repository": repo_name, + "repository_stars": repo_data.get("stargazers_count", 0), + "repository_forks": repo_data.get("forks_count", 0), + "repository_language": repo_data.get("language", ""), + "repository_description": repo_data.get("description", ""), + "repository_topics": repo_data.get("topics", []), + "pr_title": pr.get("title", ""), + "pr_url": pr.get("url", ""), + "pr_state": pr.get("state", ""), + "pr_merged": pr.get("merged", False), + "pr_created_at": pr.get("created_at", ""), + "pr_labels": pr.get("labels", []), + "is_popular_project": repo_data.get("stargazers_count", 0) + >= 500, + } + external_contributions.append(contribution) + + # Calculate metrics + total_external_prs = len(external_prs) + merged_external_prs = len( + [pr for pr in external_prs if pr.get("merged", False)] + ) + popular_project_contributions = len( + [c for c in external_contributions if c.get("is_popular_project", False)] + ) + + analysis = { + "total_prs": len(all_prs), + "own_repo_prs": len(own_repo_prs), + "external_prs": total_external_prs, + "merged_prs": len(merged_prs), + "merged_external_prs": merged_external_prs, + "popular_project_contributions": popular_project_contributions, + "external_contributions": external_contributions, + "open_source_score": calculate_open_source_score(external_contributions), + "contribution_quality": assess_contribution_quality(external_contributions), + } + + return analysis + + except Exception as e: + print(f"āŒ Error analyzing open source contributions: {e}") + return { + "total_prs": 0, + "own_repo_prs": 0, + "external_prs": 0, + "merged_prs": 0, + "merged_external_prs": 0, + "popular_project_contributions": 0, + "external_contributions": [], + "open_source_score": 0, + "contribution_quality": "No contributions", + } + + +def calculate_open_source_score(contributions: List[Dict]) -> int: + """Calculate open source score based on contributions.""" + if not contributions: + return 0 + + score = 0 + + for contribution in contributions: + # Base score for any external contribution + score += 5 + + # Bonus for merged PRs + if contribution.get("pr_merged", False): + score += 10 + + # Bonus for popular projects (500+ stars) + if contribution.get("is_popular_project", False): + score += 15 + + # Bonus for multiple contributions to same project + repo_name = contribution.get("repository", "") + if repo_name: + same_repo_count = len( + [c for c in contributions if c.get("repository") == repo_name] + ) + if same_repo_count > 1: + score += same_repo_count * 5 + + return min(score, 100) # Cap at 100 + + +def assess_contribution_quality(contributions: List[Dict]) -> str: + """Assess the quality of open source contributions.""" + if not contributions: + return "No open source contributions" + + merged_count = len([c for c in contributions if c.get("pr_merged", False)]) + popular_count = len( + [c for c in contributions if c.get("is_popular_project", False)] + ) + + if popular_count > 2: + return "Excellent - multiple contributions to popular projects" + elif popular_count > 0: + return "Good - contributions to popular projects" + elif merged_count > 2: + return "Good - multiple merged contributions" + elif merged_count > 0: + return "Fair - some merged contributions" + else: + return "Basic - contributions present but not merged" + + def fetch_and_display_github_info(github_url: str) -> Dict: logger.info(f"{github_url}") github_profile = fetch_github_profile(github_url) @@ -415,6 +709,13 @@ def fetch_and_display_github_info(github_url: str) -> Dict: if not projects: print("\nāŒ No repositories found or failed to fetch repository details.") + # Get username for PR analysis + username = extract_github_username(github_url) + open_source_analysis = {} + if username: + print("šŸ” Analyzing open source contributions...") + open_source_analysis = analyze_open_source_contributions(username) + profile_json = generate_profile_json(github_profile) projects_json = generate_projects_json(projects) @@ -422,6 +723,7 @@ def fetch_and_display_github_info(github_url: str) -> Dict: "profile": profile_json, "projects": projects_json, "total_projects": len(projects_json), + "open_source_analysis": open_source_analysis, } return result diff --git a/prompts/templates/github_project_selection.jinja b/prompts/templates/github_project_selection.jinja index d25799e..361f3e5 100644 --- a/prompts/templates/github_project_selection.jinja +++ b/prompts/templates/github_project_selection.jinja @@ -5,7 +5,7 @@ You are an expert technical recruiter analyzing GitHub repositories to identify Given a list of GitHub repositories, select the TOP 7 most impressive projects that would be most relevant for evaluating a candidate's technical skills and experience. **IMPORTANT: Contributions to Popular Open Source Projects** -- **HIGH PRIORITY**: Contributions to well-known, popular open source projects (1000+ stars) are extremely valuable, even if the contribution is small +- **HIGH PRIORITY**: Contributions to well-known, popular open source projects (500+ stars) are extremely valuable, even if the contribution is small - Popular projects include: React, Vue, Angular, Node.js, Express, Django, Flask, TensorFlow, PyTorch, Kubernetes, Docker, VS Code, etc. - A small contribution to a popular project (bug fix, documentation, feature) is often more impressive than a complete personal project - Look for repositories that are forks of popular projects where the candidate has made meaningful contributions @@ -13,7 +13,7 @@ Given a list of GitHub repositories, select the TOP 7 most impressive projects t **Selection Criteria (in order of importance):** 1. **Author Contribution Level**: Projects where the candidate has made significant contributions (high author_commit_count) - HIGHEST PRIORITY -2. **Popular Open Source Contributions**: Contributions to well-known projects (1000+ stars) - HIGH PRIORITY +2. **Popular Open Source Contributions**: Contributions to well-known projects (500+ stars) - HIGH PRIORITY 3. **Technical Complexity**: Projects that demonstrate advanced programming concepts, architecture, or problem-solving 4. **Real-world Impact**: Projects with actual users, deployments, or practical applications 5. **Code Quality**: Well-documented, maintained, and professional code diff --git a/prompts/templates/resume_evaluation_criteria.jinja b/prompts/templates/resume_evaluation_criteria.jinja index 45c0daf..0278470 100644 --- a/prompts/templates/resume_evaluation_criteria.jinja +++ b/prompts/templates/resume_evaluation_criteria.jinja @@ -33,15 +33,17 @@ You are evaluating a resume for a Software Intern position at HackerRank. Analyz ### Open Source (0-35 points) **HIGH SCORES (25-35 points):** -- Contributions to popular open source projects (1000+ stars) +- Contributions to popular open source projects (500+ stars) - Significant contributions to well-known projects - Google Summer of Code (GSoC) participation - Substantial community involvement +- External PRs to repositories they don't own (true open source contributions) **MEDIUM SCORES (15-24 points):** - Contributions to smaller open source projects - Active GitHub presence with meaningful contributions to other repositories - Participation in open source programs +- Some external PRs merged to popular projects **LOW SCORES (5-10 points):** - Only personal GitHub repositories with no contributions to other projects @@ -57,7 +59,10 @@ You are evaluating a resume for a Software Intern position at HackerRank. Analyz **CRITICAL RULES:** - Having personal GitHub repositories does NOT constitute open source contribution - True open source contribution means contributing to OTHER people's projects -- When GitHub data shows all projects are 'self_project' type, open source score MUST be 10 points or less +- Use open_source_analysis data to determine actual external contributions +- Prioritize external PRs over personal repository activity +- When GitHub data shows all projects are 'self_project' type AND no external PRs, open source score MUST be 10 points or less +- If open_source_analysis shows external_prs > 0, this indicates true open source involvement ### Self Projects (0-30 points) **HIGH SCORES (20-30 points):** diff --git a/score.py b/score.py index f69a3d0..ef25fde 100644 --- a/score.py +++ b/score.py @@ -27,7 +27,9 @@ def print_evaluation_results( - evaluation: EvaluationData, candidate_name: str = "Candidate" + evaluation: EvaluationData, + candidate_name: str = "Candidate", + github_data: dict = None, ): """Print evaluation results in a readable format.""" print("\n" + "=" * 80) @@ -90,6 +92,50 @@ def print_evaluation_results( capped_score = min(os_score.score, category_maxes["open_source"]) print(f"🌐 Open Source: {capped_score}/{os_score.max}") print(f" Evidence: {os_score.evidence}") + + if github_data and "open_source_analysis" in github_data: + analysis = github_data["open_source_analysis"] + if analysis.get("total_prs", 0) > 0: + print(f"\n šŸ“Š DETAILED OPEN SOURCE METRICS:") + print(f" šŸ“ Total PRs: {analysis.get('total_prs', 0)}") + print(f" šŸ  Own Repo PRs: {analysis.get('own_repo_prs', 0)}") + print(f" šŸŒ External PRs: {analysis.get('external_prs', 0)}") + + external_prs = analysis.get("external_prs", 0) + merged_external = analysis.get("merged_external_prs", 0) + if external_prs > 0: + merge_rate = (merged_external / external_prs) * 100 + print( + f" āœ… Merged External: {merged_external}/{external_prs} ({merge_rate:.1f}%)" + ) + + print( + f" 🌟 Popular Projects (500+ stars): {analysis.get('popular_project_contributions', 0)}" + ) + print( + f" šŸ’Æ Analysis Score: {analysis.get('open_source_score', 0)}/100" + ) + print( + f" šŸ“ˆ Quality: {analysis.get('contribution_quality', 'N/A')}" + ) + + # Show top 3 external contributions + external_contributions = analysis.get("external_contributions", []) + if external_contributions: + print(f"\n šŸ† Top External Contributions:") + unique_repos = {} + for contrib in external_contributions: + repo = contrib.get("repository", "") + if repo and repo not in unique_repos: + unique_repos[repo] = contrib.get("repository_stars", 0) + + # Sort by stars and show top 3 + top_repos = sorted( + unique_repos.items(), key=lambda x: x[1], reverse=True + )[:3] + for i, (repo, stars) in enumerate(top_repos, 1): + print(f" {i}. {repo} ({stars:,} ⭐)") + print() # Self Projects @@ -266,7 +312,7 @@ def main(pdf_path): candidate_name = resume_data.basics.name # Print evaluation results in readable format - print_evaluation_results(score, candidate_name) + print_evaluation_results(score, candidate_name, github_data) if DEVELOPMENT_MODE: csv_row = transform_evaluation_response( diff --git a/transform.py b/transform.py index 25eab1d..9309753 100644 --- a/transform.py +++ b/transform.py @@ -918,6 +918,42 @@ def convert_github_data_to_text(github_data: dict) -> str: github_text += f" Language: {details.get('language', 'N/A')}\n" github_text += "\n" + if "open_source_analysis" in github_data: + analysis = github_data["open_source_analysis"] + github_text += f"\n=== OPEN SOURCE CONTRIBUTIONS ANALYSIS ===\n" + github_text += f"Total Pull Requests: {analysis.get('total_prs', 0)}\n" + github_text += ( + f"External PRs (to other repos): {analysis.get('external_prs', 0)}\n" + ) + github_text += ( + f"Merged External PRs: {analysis.get('merged_external_prs', 0)}\n" + ) + github_text += f"Popular Project Contributions: {analysis.get('popular_project_contributions', 0)}\n" + github_text += ( + f"Open Source Score: {analysis.get('open_source_score', 0)}/100\n" + ) + github_text += ( + f"Contribution Quality: {analysis.get('contribution_quality', 'N/A')}\n" + ) + + # Add detailed external contributions + external_contributions = analysis.get("external_contributions", []) + if external_contributions: + github_text += f"\nExternal Contributions Details:\n" + for i, contrib in enumerate(external_contributions[:5], 1): + github_text += f"{i}. Repository: {contrib.get('repository', 'N/A')}\n" + github_text += f" Stars: {contrib.get('repository_stars', 'N/A')}\n" + github_text += f" PR Title: {contrib.get('pr_title', 'N/A')}\n" + github_text += f" PR State: {contrib.get('pr_state', 'N/A')}\n" + github_text += f" Merged: {contrib.get('pr_merged', False)}\n" + github_text += ( + f" Popular Project: {contrib.get('is_popular_project', False)}\n" + ) + github_text += ( + f" Major Project: {contrib.get('is_major_project', False)}\n" + ) + github_text += "\n" + return github_text