Skip to content

Commit 4628b70

Browse files
committed
Collect GitHub PoCs and add a test
Signed-off-by: ziad hany <[email protected]>
1 parent ab99939 commit 4628b70

File tree

7 files changed

+550
-0
lines changed

7 files changed

+550
-0
lines changed

vulnerabilities/improvers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
computer_package_version_rank as compute_version_rank_v2,
2626
)
2727
from vulnerabilities.pipelines.v2_improvers import enhance_with_exploitdb as exploitdb_v2
28+
from vulnerabilities.pipelines.v2_improvers import enhance_with_github_poc
2829
from vulnerabilities.pipelines.v2_improvers import enhance_with_kev as enhance_with_kev_v2
2930
from vulnerabilities.pipelines.v2_improvers import (
3031
enhance_with_metasploit as enhance_with_metasploit_v2,
@@ -70,5 +71,6 @@
7071
compute_advisory_todo_v2.ComputeToDo,
7172
unfurl_version_range_v2.UnfurlVersionRangePipeline,
7273
compute_advisory_todo.ComputeToDo,
74+
enhance_with_github_poc.GithubPocsImproverPipeline,
7375
]
7476
)
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import json
11+
from pathlib import Path
12+
13+
import dateparser
14+
from aboutcode.pipeline import LoopProgress
15+
from fetchcode.vcs import fetch_via_vcs
16+
17+
from vulnerabilities.models import AdvisoryAlias
18+
from vulnerabilities.models import AdvisoryExploit
19+
from vulnerabilities.pipelines import VulnerableCodePipeline
20+
21+
22+
class GithubPocsImproverPipeline(VulnerableCodePipeline):
23+
pipeline_id = "enhance_with_github_poc"
24+
repo_url = "https://github.com/nomi-sec/PoC-in-GitHub"
25+
26+
@classmethod
27+
def steps(cls):
28+
return (
29+
cls.clone_repo,
30+
cls.collect_and_store_exploits,
31+
cls.clean_downloads,
32+
)
33+
34+
def clone_repo(self):
35+
self.log(f"Cloning `{self.repo_url}`")
36+
self.vcs_response = fetch_via_vcs(self.repo_url)
37+
38+
def collect_and_store_exploits(self):
39+
"""
40+
Parse PoC JSON files, match them to advisories via aliases,
41+
and create or update related exploit records.
42+
"""
43+
44+
base_directory = Path(self.vcs_response.dest_dir)
45+
json_files = list(base_directory.rglob("**/*.json"))
46+
exploits_count = len(json_files)
47+
self.log(f"Enhancing the vulnerability with {exploits_count:,d} exploit records")
48+
progress = LoopProgress(total_iterations=exploits_count, logger=self.log)
49+
for file_path in progress.iter(json_files):
50+
with open(file_path, "r") as f:
51+
try:
52+
exploits_data = json.load(f)
53+
except json.JSONDecodeError:
54+
self.log(f"Invalid JSON in {file_path}, skipping.")
55+
continue
56+
57+
filename = file_path.stem.strip()
58+
advisories = set()
59+
60+
try:
61+
if alias := AdvisoryAlias.objects.get(alias=filename):
62+
for adv in alias.advisories.all():
63+
advisories.add(adv)
64+
except AdvisoryAlias.DoesNotExist:
65+
self.log(f"Advisory {filename} not found.")
66+
continue
67+
68+
for advisory in advisories:
69+
for exploit_data in exploits_data:
70+
exploit_repo_url = exploit_data.get("html_url")
71+
if not exploit_repo_url:
72+
continue
73+
74+
AdvisoryExploit.objects.update_or_create(
75+
advisory=advisory,
76+
data_source="GitHub-PoC",
77+
source_url=exploit_repo_url,
78+
defaults={
79+
"description": exploit_data.get("description"),
80+
"source_date_published": dateparser.parse(
81+
exploit_data.get("created_at")
82+
),
83+
},
84+
)
85+
86+
self.log(f"Successfully added {exploits_count:,d} exploit advisory")
87+
88+
def clean_downloads(self):
89+
if self.vcs_response:
90+
self.log(f"Removing cloned repository")
91+
self.vcs_response.delete()
92+
93+
def on_failure(self):
94+
self.clean_downloads()
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
from github import Github
11+
from vulnerabilities.models import AdvisoryAlias, AdvisoryExploit, AdvisoryV2
12+
from vulnerabilities.pipelines import VulnerableCodePipeline
13+
from vulnerablecode.settings import env
14+
15+
GITHUB_TOKEN = env.str("GITHUB_TOKEN")
16+
17+
class GitHubPocImproverPipeline(VulnerableCodePipeline):
18+
"""
19+
Pipeline to collect GitHub PoCs for vulnerabilities.
20+
"""
21+
22+
pipeline_id = "collect_poc"
23+
24+
def __init__(self, *args, **kwargs):
25+
super().__init__(*args, **kwargs)
26+
self.github = Github(login_or_token=GITHUB_TOKEN)
27+
28+
@classmethod
29+
def steps(cls):
30+
return (
31+
cls.collect_and_store_poc_results,
32+
)
33+
34+
def search_github_pocs(self, cve_id):
35+
"""Search for PoCs on GitHub for each CVE"""
36+
self.log(f"Searching GitHub for PoCs for {cve_id}")
37+
38+
query = f'"{cve_id}" PoC OR exploit OR "proof of concept"'
39+
return self.github.search_repositories(query)
40+
41+
def collect_and_store_poc_results(self):
42+
"""Store PoC results in the database"""
43+
self.log("Storing PoC results in database...")
44+
for advisory_alias in reversed(AdvisoryAlias.objects.filter(alias__startswith="CVE")):
45+
repositories = self.search_github_pocs(advisory_alias.alias)
46+
47+
if not repositories:
48+
continue
49+
50+
for repository in repositories:
51+
for advisory in advisory_alias.advisories.all():
52+
AdvisoryExploit.objects.update_or_create(
53+
advisory=advisory,
54+
data_source="GitHub POC",
55+
defaults={
56+
"description": repository.description,
57+
"notes": str(repository),
58+
"platform": "github",
59+
},
60+
)
61+
print(repository)
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import os
11+
from datetime import datetime
12+
from unittest import mock
13+
from unittest.mock import MagicMock
14+
15+
import pytest
16+
17+
from vulnerabilities.models import AdvisoryAlias
18+
from vulnerabilities.models import AdvisoryExploit
19+
from vulnerabilities.models import AdvisoryV2
20+
from vulnerabilities.pipelines.v2_improvers.enhance_with_github_poc import (
21+
GithubPocsImproverPipeline,
22+
)
23+
24+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
25+
26+
TEST_REPO_DIR = os.path.join(BASE_DIR, "../../test_data/github_poc")
27+
28+
29+
@pytest.mark.django_db
30+
@mock.patch("vulnerabilities.pipelines.v2_improvers.enhance_with_github_poc.fetch_via_vcs")
31+
def test_github_poc_db_improver(mock_fetch_via_vcs):
32+
mock_vcs = MagicMock()
33+
mock_vcs.dest_dir = TEST_REPO_DIR
34+
mock_vcs.delete = MagicMock()
35+
mock_fetch_via_vcs.return_value = mock_vcs
36+
37+
adv1 = AdvisoryV2.objects.create(
38+
advisory_id="VCIO-123-0001",
39+
datasource_id="ds",
40+
avid="ds/VCIO-123-0001",
41+
unique_content_id="sgsdg45",
42+
url="https://test.com",
43+
date_collected=datetime.now(),
44+
)
45+
adv2 = AdvisoryV2.objects.create(
46+
advisory_id="VCIO-123-1002",
47+
datasource_id="ds",
48+
avid="ds/VCIO-123-1002",
49+
unique_content_id="6hd4d6f",
50+
url="https://test.com",
51+
date_collected=datetime.now(),
52+
)
53+
adv3 = AdvisoryV2.objects.create(
54+
advisory_id="VCIO-123-1003",
55+
datasource_id="ds",
56+
avid="ds/VCIO-123-1003",
57+
unique_content_id="sd6h4sh",
58+
url="https://test.com",
59+
date_collected=datetime.now(),
60+
)
61+
62+
alias1 = AdvisoryAlias.objects.create(alias="CVE-2022-0236")
63+
alias2 = AdvisoryAlias.objects.create(alias="CVE-2025-0108")
64+
alias3 = AdvisoryAlias.objects.create(alias="CVE-2025-0309")
65+
adv1.aliases.add(alias1)
66+
adv2.aliases.add(alias2)
67+
adv3.aliases.add(alias3)
68+
69+
improver = GithubPocsImproverPipeline()
70+
improver.execute()
71+
72+
assert len(AdvisoryExploit.objects.all()) == 10
73+
exploit = AdvisoryExploit.objects.first()
74+
assert exploit.data_source == "GitHub-PoC"
75+
assert exploit.source_url == "https://github.com/iSee857/CVE-2025-0108-PoC"
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
[
2+
{
3+
"id": 448514056,
4+
"name": "CVE-2022-0236",
5+
"full_name": "qurbat\/CVE-2022-0236",
6+
"owner": {
7+
"login": "qurbat",
8+
"id": 37518297,
9+
"avatar_url": "https:\/\/avatars.githubusercontent.com\/u\/37518297?v=4",
10+
"html_url": "https:\/\/github.com\/qurbat",
11+
"user_view_type": "public"
12+
},
13+
"html_url": "https:\/\/github.com\/qurbat\/CVE-2022-0236",
14+
"description": "Proof of concept for unauthenticated sensitive data disclosure affecting the wp-import-export WordPress plugin (CVE-2022-0236)",
15+
"fork": false,
16+
"created_at": "2022-01-16T09:52:28Z",
17+
"updated_at": "2023-01-28T03:56:57Z",
18+
"pushed_at": "2022-01-18T17:14:53Z",
19+
"stargazers_count": 3,
20+
"watchers_count": 3,
21+
"has_discussions": false,
22+
"forks_count": 3,
23+
"allow_forking": true,
24+
"is_template": false,
25+
"web_commit_signoff_required": false,
26+
"topics": [
27+
"wordpress-security"
28+
],
29+
"visibility": "public",
30+
"forks": 3,
31+
"watchers": 3,
32+
"score": 0,
33+
"subscribers_count": 1
34+
},
35+
{
36+
"id": 448893968,
37+
"name": "CVE-2022-0236",
38+
"full_name": "xiska62314\/CVE-2022-0236",
39+
"owner": {
40+
"login": "xiska62314",
41+
"id": 97891523,
42+
"avatar_url": "https:\/\/avatars.githubusercontent.com\/u\/97891523?v=4",
43+
"html_url": "https:\/\/github.com\/xiska62314",
44+
"user_view_type": "public"
45+
},
46+
"html_url": "https:\/\/github.com\/xiska62314\/CVE-2022-0236",
47+
"description": "CVE-2022-0236",
48+
"fork": false,
49+
"created_at": "2022-01-17T12:56:19Z",
50+
"updated_at": "2022-01-17T12:56:19Z",
51+
"pushed_at": "2022-01-17T12:56:20Z",
52+
"stargazers_count": 0,
53+
"watchers_count": 0,
54+
"has_discussions": false,
55+
"forks_count": 0,
56+
"allow_forking": true,
57+
"is_template": false,
58+
"web_commit_signoff_required": false,
59+
"topics": [],
60+
"visibility": "public",
61+
"forks": 0,
62+
"watchers": 0,
63+
"score": 0,
64+
"subscribers_count": 1
65+
}
66+
]

0 commit comments

Comments
 (0)