1010import dataclasses
1111import datetime
1212import functools
13+ import hashlib
1314import logging
1415import traceback
1516import xml .etree .ElementTree as ET
2728from fetchcode .vcs import fetch_via_vcs
2829from license_expression import Licensing
2930from packageurl import PackageURL
31+ from packageurl .contrib .purl2url import get_repo_url
32+ from packageurl .contrib .url2purl import url2purl
3033from univers .version_range import RANGE_CLASS_BY_SCHEMES
3134from univers .version_range import VersionRange
3235from univers .versions import Version
3740from vulnerabilities .severity_systems import ScoringSystem
3841from vulnerabilities .utils import classproperty
3942from vulnerabilities .utils import get_reference_id
43+ from vulnerabilities .utils import is_commit
4044from vulnerabilities .utils import is_cve
4145from vulnerabilities .utils import nearest_patched_package
4246from vulnerabilities .utils import purl_to_dict
@@ -194,6 +198,101 @@ def from_url(cls, url):
194198 return cls (url = url )
195199
196200
201+ @dataclasses .dataclass (eq = True )
202+ @functools .total_ordering
203+ class PackageCommitPatchData :
204+ vcs_url : str
205+ commit_hash : str
206+ patch_text : Optional [str ] = None
207+
208+ def __post_init__ (self ):
209+ if not self .commit_hash :
210+ raise ValueError ("Commit must have a non-empty commit_hash." )
211+
212+ if not is_commit (self .commit_hash ):
213+ raise ValueError (f"Commit must be a valid a commit_hash: { self .commit_hash } ." )
214+
215+ if not self .vcs_url :
216+ raise ValueError ("Commit must have a non-empty vcs_url." )
217+
218+ def __lt__ (self , other ):
219+ if not isinstance (other , PackageCommitPatchData ):
220+ return NotImplemented
221+ return self ._cmp_key () < other ._cmp_key ()
222+
223+ # TODO: Add cache
224+ def _cmp_key (self ):
225+ return (
226+ self .vcs_url ,
227+ self .commit_hash ,
228+ self .patch_text ,
229+ )
230+
231+ def to_dict (self ) -> dict :
232+ """Return a normalized dictionary representation of the commit."""
233+ return {
234+ "vcs_url" : self .vcs_url ,
235+ "commit_hash" : self .commit_hash ,
236+ "patch_text" : self .patch_text ,
237+ }
238+
239+ @classmethod
240+ def from_dict (cls , data : dict ):
241+ """Create a PackageCommitPatchData instance from a dictionary."""
242+ return cls (
243+ vcs_url = data .get ("vcs_url" ),
244+ commit_hash = data .get ("commit_hash" ),
245+ patch_text = data .get ("patch_text" ),
246+ )
247+
248+
249+ @dataclasses .dataclass (eq = True )
250+ @functools .total_ordering
251+ class PatchData :
252+ patch_url : Optional [str ] = None
253+ patch_text : Optional [str ] = None
254+ vcs_url : Optional [str ] = None
255+ commit_hash : Optional [str ] = None
256+ patch_checksum : Optional [str ] = None
257+
258+ def __post_init__ (self ):
259+ if not self .vcs_url and not self .patch_text and not self .patch_url :
260+ raise ValueError ("A patch must include patch_url, patch_text, or vcs_url" )
261+
262+ def __lt__ (self , other ):
263+ if not isinstance (other , PatchData ):
264+ return NotImplemented
265+ return self ._cmp_key () < other ._cmp_key ()
266+
267+ def _cmp_key (self ):
268+ return (
269+ self .vcs_url ,
270+ self .commit_hash ,
271+ self .patch_text ,
272+ self .patch_checksum ,
273+ )
274+
275+ def to_dict (self ) -> dict :
276+ """Return a normalized dictionary representation of the commit."""
277+ return {
278+ "patch_url" : self .patch_url ,
279+ "vcs_url" : self .vcs_url ,
280+ "commit_hash" : self .commit_hash ,
281+ "patch_text" : self .patch_text ,
282+ "patch_checksum" : self .patch_checksum ,
283+ }
284+
285+ @classmethod
286+ def from_dict (cls , data : dict ):
287+ """Create a PatchData instance from a dictionary."""
288+ return cls (
289+ patch_url = data .get ("patch_url" ),
290+ vcs_url = data .get ("vcs_url" ),
291+ commit_hash = data .get ("commit_hash" ),
292+ patch_text = data .get ("patch_text" ),
293+ )
294+
295+
197296class UnMergeablePackageError (Exception ):
198297 """
199298 Raised when a package cannot be merged with another one.
@@ -344,21 +443,30 @@ class AffectedPackageV2:
344443 """
345444 Relate a Package URL with a range of affected versions and fixed versions.
346445 The Package URL must *not* have a version.
347- AffectedPackage must contain either ``affected_version_range`` or ``fixed_version_range``.
446+ AffectedPackage must contain either ``affected_version_range`` or ``fixed_version_range`` or ``introduced_by_commits`` or ``fixed_by_commits`` .
348447 """
349448
350449 package : PackageURL
351450 affected_version_range : Optional [VersionRange ] = None
352451 fixed_version_range : Optional [VersionRange ] = None
452+ introduced_by_commit_patches : List [PackageCommitPatchData ] = dataclasses .field (
453+ default_factory = list
454+ )
455+ fixed_by_commit_patches : List [PackageCommitPatchData ] = dataclasses .field (default_factory = list )
353456
354457 def __post_init__ (self ):
355458 if self .package .version :
356459 raise ValueError (f"Affected Package URL { self .package !r} cannot have a version." )
357460
358- if not (self .affected_version_range or self .fixed_version_range ):
461+ if not (
462+ self .affected_version_range
463+ or self .fixed_version_range
464+ or self .introduced_by_commit_patches
465+ or self .fixed_by_commit_patches
466+ ):
359467 raise ValueError (
360- f"Affected Package { self .package !r} should have either fixed version range or an "
361- "affected version range."
468+ f"Affected package { self .package !r} must have either a fixed version range, "
469+ "an affected version range, introduced commit patches, or fixed commit patches ."
362470 )
363471
364472 def __lt__ (self , other ):
@@ -372,6 +480,8 @@ def _cmp_key(self):
372480 str (self .package ),
373481 str (self .affected_version_range or "" ),
374482 str (self .fixed_version_range or "" ),
483+ str (self .introduced_by_commit_patches or []),
484+ str (self .fixed_by_commit_patches or []),
375485 )
376486
377487 def to_dict (self ):
@@ -385,6 +495,12 @@ def to_dict(self):
385495 "package" : purl_to_dict (self .package ),
386496 "affected_version_range" : affected_version_range ,
387497 "fixed_version_range" : fixed_version_range ,
498+ "introduced_by_commit_patches" : [
499+ commit .to_dict () for commit in self .introduced_by_commit_patches
500+ ],
501+ "fixed_by_commit_patches" : [
502+ commit .to_dict () for commit in self .fixed_by_commit_patches
503+ ],
388504 }
389505
390506 @classmethod
@@ -396,6 +512,10 @@ def from_dict(cls, affected_pkg: dict):
396512 fixed_version_range = None
397513 affected_range = affected_pkg ["affected_version_range" ]
398514 fixed_range = affected_pkg ["fixed_version_range" ]
515+ introduced_by_commit_patches = (
516+ affected_pkg .get ("introduced_by_package_commit_patches" ) or []
517+ )
518+ fixed_by_commit_patches = affected_pkg .get ("fixed_by_package_commit_patches" ) or []
399519
400520 try :
401521 affected_version_range = VersionRange .from_string (affected_range )
@@ -417,6 +537,12 @@ def from_dict(cls, affected_pkg: dict):
417537 package = package ,
418538 affected_version_range = affected_version_range ,
419539 fixed_version_range = fixed_version_range ,
540+ introduced_by_commit_patches = [
541+ PackageCommitPatchData .from_dict (commit ) for commit in introduced_by_commit_patches
542+ ],
543+ fixed_by_commit_patches = [
544+ PackageCommitPatchData .from_dict (commit ) for commit in fixed_by_commit_patches
545+ ],
420546 )
421547
422548
@@ -441,6 +567,7 @@ class AdvisoryData:
441567 )
442568 references : List [Reference ] = dataclasses .field (default_factory = list )
443569 references_v2 : List [ReferenceV2 ] = dataclasses .field (default_factory = list )
570+ patches : List [PatchData ] = dataclasses .field (default_factory = list )
444571 date_published : Optional [datetime .datetime ] = None
445572 weaknesses : List [int ] = dataclasses .field (default_factory = list )
446573 severities : List [VulnerabilitySeverity ] = dataclasses .field (default_factory = list )
@@ -473,6 +600,7 @@ def to_dict(self):
473600 "summary" : self .summary ,
474601 "affected_packages" : [pkg .to_dict () for pkg in self .affected_packages ],
475602 "references_v2" : [ref .to_dict () for ref in self .references_v2 ],
603+ "patches" : [patch .to_dict () for patch in self .patches ],
476604 "severities" : [sev .to_dict () for sev in self .severities ],
477605 "date_published" : self .date_published .isoformat () if self .date_published else None ,
478606 "weaknesses" : self .weaknesses ,
@@ -533,6 +661,7 @@ class AdvisoryDataV2:
533661 summary : Optional [str ] = ""
534662 affected_packages : List [AffectedPackage ] = dataclasses .field (default_factory = list )
535663 references : List [ReferenceV2 ] = dataclasses .field (default_factory = list )
664+ patches : List [PatchData ] = dataclasses .field (default_factory = list )
536665 date_published : Optional [datetime .datetime ] = None
537666 weaknesses : List [int ] = dataclasses .field (default_factory = list )
538667 url : Optional [str ] = None
@@ -557,6 +686,7 @@ def to_dict(self):
557686 "summary" : self .summary ,
558687 "affected_packages" : [pkg .to_dict () for pkg in self .affected_packages ],
559688 "references" : [ref .to_dict () for ref in self .references ],
689+ "patches" : [ref .to_dict () for ref in self .patches ],
560690 "date_published" : self .date_published .isoformat () if self .date_published else None ,
561691 "weaknesses" : self .weaknesses ,
562692 "url" : self .url if self .url else "" ,
@@ -574,6 +704,7 @@ def from_dict(cls, advisory_data):
574704 if pkg is not None
575705 ],
576706 "references" : [Reference .from_dict (ref ) for ref in advisory_data ["references" ]],
707+ "patches" : [PatchData .from_dict (ref ) for ref in advisory_data ["patches" ]],
577708 "date_published" : datetime .datetime .fromisoformat (date_published )
578709 if date_published
579710 else None ,
0 commit comments