Skip to content

Commit dcb0511

Browse files
authored
Merge pull request #1971 from aboutcode-org/redhat-v2
Add v2 pipeline for importing Red Hat advisories
2 parents abf81d5 + 737d94a commit dcb0511

File tree

16 files changed

+10272
-3
lines changed

16 files changed

+10272
-3
lines changed

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ install_requires =
101101
#vulntotal
102102
python-dotenv
103103
texttable
104+
extractcode[full]==31.0.0
104105

105106

106107
[options.extras_require]

vulnerabilities/importer.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,8 @@ def from_url(cls, url):
187187
reference_id = get_reference_id(url)
188188
if "GHSA-" in reference_id.upper():
189189
return cls(reference_id=reference_id, url=url)
190+
if reference_id.startswith(("RHSA-", "RHEA-", "RHBA-")):
191+
return cls(reference_id=reference_id, url=url)
190192
if is_cve(reference_id):
191193
return cls(url=url, reference_id=reference_id.upper())
192194
return cls(url=url)
@@ -458,6 +460,24 @@ def clean_summary(self, summary):
458460
return summary
459461

460462
def to_dict(self):
463+
is_adv_v2 = (
464+
self.advisory_id
465+
or self.severities
466+
or self.references_v2
467+
or (self.affected_packages and isinstance(self.affected_packages[0], AffectedPackageV2))
468+
)
469+
if is_adv_v2:
470+
return {
471+
"advisory_id": self.advisory_id,
472+
"aliases": self.aliases,
473+
"summary": self.summary,
474+
"affected_packages": [pkg.to_dict() for pkg in self.affected_packages],
475+
"references_v2": [ref.to_dict() for ref in self.references_v2],
476+
"severities": [sev.to_dict() for sev in self.severities],
477+
"date_published": self.date_published.isoformat() if self.date_published else None,
478+
"weaknesses": self.weaknesses,
479+
"url": self.url if self.url else "",
480+
}
461481
return {
462482
"aliases": self.aliases,
463483
"summary": self.summary,

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
from vulnerabilities.pipelines.v2_importers import postgresql_importer as postgresql_importer_v2
5858
from vulnerabilities.pipelines.v2_importers import pypa_importer as pypa_importer_v2
5959
from vulnerabilities.pipelines.v2_importers import pysec_importer as pysec_importer_v2
60+
from vulnerabilities.pipelines.v2_importers import redhat_importer as redhat_importer_v2
6061
from vulnerabilities.pipelines.v2_importers import vulnrichment_importer as vulnrichment_importer_v2
6162
from vulnerabilities.pipelines.v2_importers import xen_importer as xen_importer_v2
6263
from vulnerabilities.utils import create_registry
@@ -79,6 +80,7 @@
7980
postgresql_importer_v2.PostgreSQLImporterPipeline,
8081
mozilla_importer_v2.MozillaImporterPipeline,
8182
github_osv_importer_v2.GithubOSVImporterPipeline,
83+
redhat_importer_v2.RedHatImporterPipeline,
8284
nvd_importer.NVDImporterPipeline,
8385
github_importer.GitHubAPIImporterPipeline,
8486
gitlab_importer.GitLabImporterPipeline,

vulnerabilities/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2890,6 +2890,7 @@ def to_advisory_data(self) -> "AdvisoryData":
28902890
from vulnerabilities.importer import AdvisoryData
28912891

28922892
return AdvisoryData(
2893+
advisory_id=self.advisory_id,
28932894
aliases=[item.alias for item in self.aliases.all()],
28942895
summary=self.summary,
28952896
affected_packages=[

vulnerabilities/pipelines/v2_importers/archlinux_importer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10+
import json
1011
from typing import Iterable
1112
from typing import Mapping
1213

@@ -97,4 +98,5 @@ def parse_advisory(self, record) -> AdvisoryData:
9798
affected_packages=affected_packages,
9899
weaknesses=[],
99100
url=f"https://security.archlinux.org/{avg_name}.json",
101+
original_advisory_text=json.dumps(record),
100102
)
Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import json
11+
import logging
12+
import shutil
13+
import tempfile
14+
from io import DEFAULT_BUFFER_SIZE
15+
from pathlib import Path
16+
from typing import Iterable
17+
from urllib.parse import urljoin
18+
19+
import dateparser
20+
import requests
21+
from extractcode import ExtractError
22+
from packageurl import PackageURL
23+
from univers.version_range import RpmVersionRange
24+
from univers.version_range import VersionRange
25+
26+
from vulnerabilities.importer import AdvisoryData
27+
from vulnerabilities.importer import AffectedPackageV2
28+
from vulnerabilities.importer import ReferenceV2
29+
from vulnerabilities.importer import VulnerabilitySeverity
30+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
31+
from vulnerabilities.pipes import extractcode_utils
32+
from vulnerabilities.severity_systems import REDHAT_AGGREGATE
33+
from vulnerabilities.utils import load_json
34+
from vulntotal import vulntotal_utils
35+
36+
37+
class RedHatImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
38+
"""Import RedHat Advisories (RHSA, RHEA and RHBA)
39+
40+
Ingest CSAF advisories published by RedHat, including Red Hat Security Advisory (RHSA),
41+
Red Hat Enhancement Advisory (RHEA), and Red Hat Bug Fix Advisory (RHBA).
42+
"""
43+
44+
pipeline_id = "redhat_importer_v2"
45+
spdx_license_expression = "CC-BY-4.0"
46+
license_url = "https://access.redhat.com/security/data/"
47+
url = "https://security.access.redhat.com/data/csaf/v2/advisories/"
48+
49+
@classmethod
50+
def steps(cls):
51+
return (
52+
cls.fetch,
53+
cls.collect_and_store_advisories,
54+
cls.clean_download,
55+
)
56+
57+
def fetch(self):
58+
archive_latest_url = urljoin(self.url, "archive_latest.txt")
59+
response = requests.get(archive_latest_url)
60+
response.raise_for_status()
61+
self.latest_archive_name = response.text.strip()
62+
63+
self.location = self.cleanup_location = Path(tempfile.mkdtemp())
64+
archive_path = self.location / self.latest_archive_name
65+
archive_url = urljoin(self.url, self.latest_archive_name)
66+
67+
response = requests.get(archive_url, stream=True)
68+
response.raise_for_status()
69+
70+
with open(archive_path, "wb") as f:
71+
for chunk in response.iter_content(chunk_size=DEFAULT_BUFFER_SIZE):
72+
f.write(chunk)
73+
74+
if errors := extractcode_utils.extract_archive(
75+
source=archive_path,
76+
destination=self.location,
77+
):
78+
self.log(
79+
f"Error while extracting archive {archive_path}: {errors}",
80+
level=logging.ERROR,
81+
)
82+
raise ExtractError(errors)
83+
84+
def advisories_count(self) -> int:
85+
return sum(1 for _ in self.location.rglob("*.json"))
86+
87+
def collect_advisories(self) -> Iterable[AdvisoryData]:
88+
for record in self.location.rglob("*.json"):
89+
yield self.parse_advisory(record)
90+
91+
def parse_advisory(self, record):
92+
advisory = load_json(record)
93+
document = advisory.get("document", {})
94+
if (csaf_version := document.get("csaf_version")) and not csaf_version == "2.0":
95+
self.log(f"Unsupported CSAF version: {csaf_version}.", level=logging.ERROR)
96+
return
97+
98+
severities = []
99+
references = []
100+
impacts = []
101+
affected_packages = []
102+
notes = document.get("notes", [])
103+
adv_sub_path = f"{record.parent.name}/{record.name}"
104+
url = urljoin(self.url, adv_sub_path)
105+
advisory_id = get_item(document, "tracking", "id")
106+
release_date = get_item(document, "tracking", "initial_release_date")
107+
108+
summary = "\n\n".join(
109+
note["text"] for note in notes if note["category"] != "legal_disclaimer"
110+
)
111+
aliases = [vul["cve"] for vul in advisory.get("vulnerabilities", [])]
112+
113+
for ref in document.get("references", []):
114+
ref_url = ref.get("url")
115+
if ref_url.startswith("https://bugzilla.redhat.com/"):
116+
references.append(
117+
ReferenceV2(
118+
reference_id=ref.get("summary"),
119+
reference_type="bug",
120+
url=ref_url,
121+
)
122+
)
123+
continue
124+
references.append(ReferenceV2.from_url(url=ref_url))
125+
126+
if aggregate_severity := document.get("aggregate_severity"):
127+
severities.append(
128+
VulnerabilitySeverity(
129+
system=REDHAT_AGGREGATE,
130+
value=aggregate_severity["text"],
131+
url=url,
132+
)
133+
)
134+
135+
impacts = get_item(advisory, "product_tree", "branches", 0, "branches", default=[])
136+
for impact in impacts:
137+
if impact["category"] == "product_family":
138+
continue
139+
for branch in impact.get("branches", []):
140+
if purl := get_item(
141+
branch,
142+
"product",
143+
"product_identification_helper",
144+
"purl",
145+
default=None,
146+
):
147+
if not purl.startswith("pkg:rpm/"):
148+
continue
149+
package_purl = PackageURL.from_string(purl=purl)
150+
fixed_version = package_purl.version
151+
if not fixed_version:
152+
continue
153+
154+
fixed_version_range = RpmVersionRange.from_versions([fixed_version])
155+
affected_version_range = VersionRange.from_string(f"vers:rpm/<{fixed_version}")
156+
purl_dict = package_purl.to_dict()
157+
del purl_dict["version"]
158+
base_purl = PackageURL(**purl_dict)
159+
160+
affected_packages.append(
161+
AffectedPackageV2(
162+
package=base_purl,
163+
affected_version_range=affected_version_range,
164+
fixed_version_range=fixed_version_range,
165+
)
166+
)
167+
168+
return AdvisoryData(
169+
advisory_id=advisory_id,
170+
aliases=aliases,
171+
summary=summary,
172+
references_v2=references,
173+
affected_packages=affected_packages,
174+
severities=severities,
175+
weaknesses=[],
176+
date_published=dateparser.parse(release_date) if release_date else None,
177+
url=url,
178+
original_advisory_text=json.dumps(advisory),
179+
)
180+
181+
def clean_download(self):
182+
if hasattr(self, "cleanup_location") and self.cleanup_location.exists():
183+
self.log(f"Removing downloaded archive: {self.latest_archive_name}")
184+
shutil.rmtree(self.cleanup_location)
185+
186+
def on_failure(self):
187+
self.clean_download()
188+
189+
190+
def get_item(entity, *attributes, default=None):
191+
try:
192+
result = vulntotal_utils.get_item(entity, *attributes)
193+
except (KeyError, IndexError, TypeError) as e:
194+
result = default
195+
return result
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
from extractcode import api
11+
12+
13+
def extract_archive(source, destination):
14+
"""Extract an archive at `source` to `destination`directory."""
15+
errors = {}
16+
for event in api.extract_archive(source, destination):
17+
if event.done and event.errors:
18+
errors[str(event.source)] = event.errors
19+
20+
return errors
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import json
11+
import os
12+
from pathlib import Path
13+
from unittest.mock import Mock
14+
from unittest.mock import patch
15+
16+
from django.test import TestCase
17+
18+
from vulnerabilities.models import AdvisoryV2
19+
from vulnerabilities.models import PackageV2
20+
from vulnerabilities.pipelines.v2_importers.redhat_importer import RedHatImporterPipeline
21+
from vulnerabilities.tests import util_tests
22+
23+
TEST_DATA = Path(__file__).parent.parent.parent / "test_data" / "redhat" / "csaf_2_0"
24+
25+
26+
class TestArchLinuxImporterPipeline(TestCase):
27+
@patch("vulnerabilities.pipelines.v2_importers.redhat_importer.RedHatImporterPipeline.fetch")
28+
def test_redhat_advisories_v2(self, mock_fetch):
29+
mock_fetch.__name__ = "fetch"
30+
pipeline = RedHatImporterPipeline()
31+
pipeline.location = TEST_DATA
32+
pipeline.execute()
33+
self.assertEqual(6, AdvisoryV2.objects.count())
34+
self.assertEqual(93, PackageV2.objects.count())
35+
expected_file = TEST_DATA.parent / "redhat_advisoryv2-expected.json"
36+
result = [adv.to_advisory_data().to_dict() for adv in AdvisoryV2.objects.all()]
37+
util_tests.check_results_against_json(result, expected_file)

0 commit comments

Comments
 (0)