Skip to content

Commit b8ea2ad

Browse files
committed
Update the model to have Patch and PackageCommitPatch
Signed-off-by: ziad hany <[email protected]>
1 parent 98e5160 commit b8ea2ad

28 files changed

+1347
-157
lines changed

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ MarkupSafe==2.1.1
6464
matplotlib-inline==0.1.3
6565
multidict==6.0.2
6666
mypy-extensions==0.4.3
67-
packageurl-python==0.15.6
67+
packageurl-python==0.17.6
6868
packaging==21.3
6969
paramiko==3.4.0
7070
parso==0.8.3

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ install_requires =
7171
drf-spectacular[sidecar]>=0.24.2
7272

7373
#essentials
74-
packageurl-python>=0.15
74+
packageurl-python>=0.17
7575
univers>=30.12.0
7676
license-expression>=30.0.0
7777

vulnerabilities/importer.py

Lines changed: 135 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import dataclasses
1111
import datetime
1212
import functools
13+
import hashlib
1314
import logging
1415
import traceback
1516
import xml.etree.ElementTree as ET
@@ -27,6 +28,8 @@
2728
from fetchcode.vcs import fetch_via_vcs
2829
from license_expression import Licensing
2930
from packageurl import PackageURL
31+
from packageurl.contrib.purl2url import get_repo_url
32+
from packageurl.contrib.url2purl import url2purl
3033
from univers.version_range import RANGE_CLASS_BY_SCHEMES
3134
from univers.version_range import VersionRange
3235
from univers.versions import Version
@@ -37,6 +40,7 @@
3740
from vulnerabilities.severity_systems import ScoringSystem
3841
from vulnerabilities.utils import classproperty
3942
from vulnerabilities.utils import get_reference_id
43+
from vulnerabilities.utils import is_commit
4044
from vulnerabilities.utils import is_cve
4145
from vulnerabilities.utils import nearest_patched_package
4246
from vulnerabilities.utils import purl_to_dict
@@ -194,6 +198,101 @@ def from_url(cls, url):
194198
return cls(url=url)
195199

196200

201+
@dataclasses.dataclass(eq=True)
202+
@functools.total_ordering
203+
class PackageCommitPatchData:
204+
vcs_url: str
205+
commit_hash: str
206+
patch_text: Optional[str] = None
207+
208+
def __post_init__(self):
209+
if not self.commit_hash:
210+
raise ValueError("Commit must have a non-empty commit_hash.")
211+
212+
if not is_commit(self.commit_hash):
213+
raise ValueError(f"Commit must be a valid a commit_hash: {self.commit_hash}.")
214+
215+
if not self.vcs_url:
216+
raise ValueError("Commit must have a non-empty vcs_url.")
217+
218+
def __lt__(self, other):
219+
if not isinstance(other, PackageCommitPatchData):
220+
return NotImplemented
221+
return self._cmp_key() < other._cmp_key()
222+
223+
# TODO: Add cache
224+
def _cmp_key(self):
225+
return (
226+
self.vcs_url,
227+
self.commit_hash,
228+
self.patch_text,
229+
)
230+
231+
def to_dict(self) -> dict:
232+
"""Return a normalized dictionary representation of the commit."""
233+
return {
234+
"vcs_url": self.vcs_url,
235+
"commit_hash": self.commit_hash,
236+
"patch_text": self.patch_text,
237+
}
238+
239+
@classmethod
240+
def from_dict(cls, data: dict):
241+
"""Create a PackageCommitPatchData instance from a dictionary."""
242+
return cls(
243+
vcs_url=data.get("vcs_url"),
244+
commit_hash=data.get("commit_hash"),
245+
patch_text=data.get("patch_text"),
246+
)
247+
248+
249+
@dataclasses.dataclass(eq=True)
250+
@functools.total_ordering
251+
class PatchData:
252+
patch_url: Optional[str] = None
253+
patch_text: Optional[str] = None
254+
vcs_url: Optional[str] = None
255+
commit_hash: Optional[str] = None
256+
patch_checksum: Optional[str] = None
257+
258+
def __post_init__(self):
259+
if not self.vcs_url and not self.patch_text and not self.patch_url:
260+
raise ValueError("A patch must include patch_url, patch_text, or vcs_url")
261+
262+
def __lt__(self, other):
263+
if not isinstance(other, PatchData):
264+
return NotImplemented
265+
return self._cmp_key() < other._cmp_key()
266+
267+
def _cmp_key(self):
268+
return (
269+
self.vcs_url,
270+
self.commit_hash,
271+
self.patch_text,
272+
self.patch_checksum,
273+
)
274+
275+
def to_dict(self) -> dict:
276+
"""Return a normalized dictionary representation of the commit."""
277+
return {
278+
"patch_url": self.patch_url,
279+
"vcs_url": self.vcs_url,
280+
"commit_hash": self.commit_hash,
281+
"patch_text": self.patch_text,
282+
"patch_checksum": self.patch_checksum,
283+
}
284+
285+
@classmethod
286+
def from_dict(cls, data: dict):
287+
"""Create a PatchData instance from a dictionary."""
288+
return cls(
289+
patch_url=data.get("patch_url"),
290+
vcs_url=data.get("vcs_url"),
291+
commit_hash=data.get("commit_hash"),
292+
patch_text=data.get("patch_text"),
293+
)
294+
295+
197296
class UnMergeablePackageError(Exception):
198297
"""
199298
Raised when a package cannot be merged with another one.
@@ -344,21 +443,30 @@ class AffectedPackageV2:
344443
"""
345444
Relate a Package URL with a range of affected versions and fixed versions.
346445
The Package URL must *not* have a version.
347-
AffectedPackage must contain either ``affected_version_range`` or ``fixed_version_range``.
446+
AffectedPackage must contain either ``affected_version_range`` or ``fixed_version_range`` or ``introduced_by_commits`` or ``fixed_by_commits``.
348447
"""
349448

350449
package: PackageURL
351450
affected_version_range: Optional[VersionRange] = None
352451
fixed_version_range: Optional[VersionRange] = None
452+
introduced_by_commit_patches: List[PackageCommitPatchData] = dataclasses.field(
453+
default_factory=list
454+
)
455+
fixed_by_commit_patches: List[PackageCommitPatchData] = dataclasses.field(default_factory=list)
353456

354457
def __post_init__(self):
355458
if self.package.version:
356459
raise ValueError(f"Affected Package URL {self.package!r} cannot have a version.")
357460

358-
if not (self.affected_version_range or self.fixed_version_range):
461+
if not (
462+
self.affected_version_range
463+
or self.fixed_version_range
464+
or self.introduced_by_commit_patches
465+
or self.fixed_by_commit_patches
466+
):
359467
raise ValueError(
360-
f"Affected Package {self.package!r} should have either fixed version range or an "
361-
"affected version range."
468+
f"Affected package {self.package!r} must have either a fixed version range, "
469+
"an affected version range, introduced commit patches, or fixed commit patches."
362470
)
363471

364472
def __lt__(self, other):
@@ -372,6 +480,8 @@ def _cmp_key(self):
372480
str(self.package),
373481
str(self.affected_version_range or ""),
374482
str(self.fixed_version_range or ""),
483+
str(self.introduced_by_commit_patches or []),
484+
str(self.fixed_by_commit_patches or []),
375485
)
376486

377487
def to_dict(self):
@@ -385,6 +495,12 @@ def to_dict(self):
385495
"package": purl_to_dict(self.package),
386496
"affected_version_range": affected_version_range,
387497
"fixed_version_range": fixed_version_range,
498+
"introduced_by_commit_patches": [
499+
commit.to_dict() for commit in self.introduced_by_commit_patches
500+
],
501+
"fixed_by_commit_patches": [
502+
commit.to_dict() for commit in self.fixed_by_commit_patches
503+
],
388504
}
389505

390506
@classmethod
@@ -396,6 +512,10 @@ def from_dict(cls, affected_pkg: dict):
396512
fixed_version_range = None
397513
affected_range = affected_pkg["affected_version_range"]
398514
fixed_range = affected_pkg["fixed_version_range"]
515+
introduced_by_commit_patches = (
516+
affected_pkg.get("introduced_by_package_commit_patches") or []
517+
)
518+
fixed_by_commit_patches = affected_pkg.get("fixed_by_package_commit_patches") or []
399519

400520
try:
401521
affected_version_range = VersionRange.from_string(affected_range)
@@ -417,6 +537,12 @@ def from_dict(cls, affected_pkg: dict):
417537
package=package,
418538
affected_version_range=affected_version_range,
419539
fixed_version_range=fixed_version_range,
540+
introduced_by_commit_patches=[
541+
PackageCommitPatchData.from_dict(commit) for commit in introduced_by_commit_patches
542+
],
543+
fixed_by_commit_patches=[
544+
PackageCommitPatchData.from_dict(commit) for commit in fixed_by_commit_patches
545+
],
420546
)
421547

422548

@@ -441,6 +567,7 @@ class AdvisoryData:
441567
)
442568
references: List[Reference] = dataclasses.field(default_factory=list)
443569
references_v2: List[ReferenceV2] = dataclasses.field(default_factory=list)
570+
patches: List[PatchData] = dataclasses.field(default_factory=list)
444571
date_published: Optional[datetime.datetime] = None
445572
weaknesses: List[int] = dataclasses.field(default_factory=list)
446573
severities: List[VulnerabilitySeverity] = dataclasses.field(default_factory=list)
@@ -473,6 +600,7 @@ def to_dict(self):
473600
"summary": self.summary,
474601
"affected_packages": [pkg.to_dict() for pkg in self.affected_packages],
475602
"references_v2": [ref.to_dict() for ref in self.references_v2],
603+
"patches": [patch.to_dict() for patch in self.patches],
476604
"severities": [sev.to_dict() for sev in self.severities],
477605
"date_published": self.date_published.isoformat() if self.date_published else None,
478606
"weaknesses": self.weaknesses,
@@ -533,6 +661,7 @@ class AdvisoryDataV2:
533661
summary: Optional[str] = ""
534662
affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list)
535663
references: List[ReferenceV2] = dataclasses.field(default_factory=list)
664+
patches: List[PatchData] = dataclasses.field(default_factory=list)
536665
date_published: Optional[datetime.datetime] = None
537666
weaknesses: List[int] = dataclasses.field(default_factory=list)
538667
url: Optional[str] = None
@@ -557,6 +686,7 @@ def to_dict(self):
557686
"summary": self.summary,
558687
"affected_packages": [pkg.to_dict() for pkg in self.affected_packages],
559688
"references": [ref.to_dict() for ref in self.references],
689+
"patches": [ref.to_dict() for ref in self.patches],
560690
"date_published": self.date_published.isoformat() if self.date_published else None,
561691
"weaknesses": self.weaknesses,
562692
"url": self.url if self.url else "",
@@ -574,6 +704,7 @@ def from_dict(cls, advisory_data):
574704
if pkg is not None
575705
],
576706
"references": [Reference.from_dict(ref) for ref in advisory_data["references"]],
707+
"patches": [PatchData.from_dict(ref) for ref in advisory_data["patches"]],
577708
"date_published": datetime.datetime.fromisoformat(date_published)
578709
if date_published
579710
else None,

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from vulnerabilities.pipelines import nvd_importer
4242
from vulnerabilities.pipelines import pypa_importer
4343
from vulnerabilities.pipelines import pysec_importer
44+
from vulnerabilities.pipelines.v2_importers import aosp_importer as aosp_importer_v2
4445
from vulnerabilities.pipelines.v2_importers import apache_httpd_importer as apache_httpd_v2
4546
from vulnerabilities.pipelines.v2_importers import archlinux_importer as archlinux_importer_v2
4647
from vulnerabilities.pipelines.v2_importers import curl_importer as curl_importer_v2
@@ -81,6 +82,7 @@
8182
mozilla_importer_v2.MozillaImporterPipeline,
8283
github_osv_importer_v2.GithubOSVImporterPipeline,
8384
redhat_importer_v2.RedHatImporterPipeline,
85+
aosp_importer_v2.AospImporterPipeline,
8486
nvd_importer.NVDImporterPipeline,
8587
github_importer.GitHubAPIImporterPipeline,
8688
gitlab_importer.GitLabImporterPipeline,

vulnerabilities/importers/curl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def parse_advisory_data(raw_data) -> AdvisoryData:
9797
... ]
9898
... }
9999
>>> parse_advisory_data(raw_data)
100-
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json', original_advisory_text=None)
100+
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], patches=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json', original_advisory_text=None)
101101
"""
102102

103103
affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else []
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Generated by Django 4.2.22 on 2025-11-18 20:45
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("vulnerabilities", "0103_codecommit_impactedpackage_affecting_commits_and_more"),
10+
]
11+
12+
operations = [
13+
migrations.AlterUniqueTogether(
14+
name="codecommit",
15+
unique_together={("commit_hash", "vcs_url", "commit_rank")},
16+
),
17+
migrations.AddField(
18+
model_name="codecommit",
19+
name="commit_patch",
20+
field=models.TextField(blank=True, help_text="patch content of the commit.", null=True),
21+
),
22+
migrations.RemoveField(
23+
model_name="codecommit",
24+
name="commit_author",
25+
),
26+
migrations.RemoveField(
27+
model_name="codecommit",
28+
name="commit_date",
29+
),
30+
migrations.RemoveField(
31+
model_name="codecommit",
32+
name="commit_message",
33+
),
34+
]
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Generated by Django 4.2.22 on 2025-11-18 20:46
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("vulnerabilities", "0104_alter_codecommit_unique_together_and_more"),
10+
]
11+
12+
operations = [
13+
migrations.RenameModel(
14+
old_name="CodeCommit",
15+
new_name="CodePatch",
16+
),
17+
migrations.RemoveField(
18+
model_name="impactedpackage",
19+
name="affecting_commits",
20+
),
21+
migrations.AddField(
22+
model_name="impactedpackage",
23+
name="introduced_by_commits",
24+
field=models.ManyToManyField(
25+
help_text="Commits introducing this impact.",
26+
related_name="introducing_commits_in_impacts",
27+
to="vulnerabilities.codepatch",
28+
),
29+
),
30+
]

0 commit comments

Comments
 (0)