|
1 | 1 | package nvdloader
|
2 | 2 |
|
3 | 3 | import (
|
4 |
| - "encoding/json" |
5 |
| - "fmt" |
6 |
| - "net/http" |
7 |
| - "os" |
8 |
| - "path/filepath" |
9 |
| - "time" |
10 |
| - |
11 |
| - apischema "github.com/facebookincubator/nvdtools/cveapi/nvd/schema" |
12 |
| - jsonschema "github.com/facebookincubator/nvdtools/cvefeed/nvd/schema" |
13 |
| - "github.com/facebookincubator/nvdtools/wfn" |
14 |
| - log "github.com/sirupsen/logrus" |
15 |
| - "github.com/stackrox/rox/pkg/httputil/proxy" |
16 |
| - "github.com/stackrox/rox/pkg/utils" |
17 | 4 | "github.com/stackrox/scanner/pkg/env"
|
18 | 5 | "github.com/stackrox/scanner/pkg/vulndump"
|
19 | 6 | "github.com/stackrox/scanner/pkg/vulnloader"
|
20 | 7 | )
|
21 | 8 |
|
22 |
| -const urlFmt = `https://services.nvd.nist.gov/rest/json/cves/2.0?noRejected&startIndex=%d` |
23 |
| - |
24 |
| -var client = http.Client{ |
25 |
| - Timeout: 5 * time.Minute, |
26 |
| - Transport: proxy.RoundTripper(), |
27 |
| -} |
28 |
| - |
29 | 9 | func init() {
|
30 |
| - if env.LegacyNVDLoader.Enabled() { |
31 |
| - vulnloader.RegisterLoader(vulndump.NVDDirName, &legacyLoader{}) |
| 10 | + if env.NVDFeedLoader.Enabled() { |
| 11 | + vulnloader.RegisterLoader(vulndump.NVDDirName, &feedLoader{}) |
32 | 12 | } else {
|
33 |
| - vulnloader.RegisterLoader(vulndump.NVDDirName, &loader{}) |
34 |
| - } |
35 |
| -} |
36 |
| - |
37 |
| -var _ vulnloader.Loader = (*loader)(nil) |
38 |
| - |
39 |
| -type loader struct{} |
40 |
| - |
41 |
| -// DownloadFeedsToPath downloads the NVD feeds to the given path. |
42 |
| -// If this function is successful, it will fill the directory with |
43 |
| -// one json file for each year of NVD data. |
44 |
| -func (l *loader) DownloadFeedsToPath(outputDir string) error { |
45 |
| - log.Info("Downloading NVD data using NVD 2.0 API") |
46 |
| - |
47 |
| - // Fetch NVD enrichment data from curated repos |
48 |
| - enrichments, err := Fetch() |
49 |
| - if err != nil { |
50 |
| - return fmt.Errorf("could not fetch NVD enrichment sources: %w", err) |
51 |
| - } |
52 |
| - |
53 |
| - nvdDir := filepath.Join(outputDir, vulndump.NVDDirName) |
54 |
| - if err := os.MkdirAll(nvdDir, 0755); err != nil { |
55 |
| - return fmt.Errorf("creating subdir for %s: %w", vulndump.NVDDirName, err) |
| 13 | + vulnloader.RegisterLoader(vulndump.NVDDirName, &apiLoader{}) |
56 | 14 | }
|
57 |
| - |
58 |
| - var fileNo, totalVulns int |
59 |
| - |
60 |
| - // Explicitly set startIdx to parallel how this is all done within the loop below. |
61 |
| - startIdx := 0 |
62 |
| - apiResp, err := query(fmt.Sprintf(urlFmt, startIdx)) |
63 |
| - if err != nil { |
64 |
| - return err |
65 |
| - } |
66 |
| - var i int |
67 |
| - // Buffer to store vulns until they are written to a file. |
68 |
| - cveItems := make([]*jsonschema.NVDCVEFeedJSON10DefCVEItem, 0, 20_000) |
69 |
| - for apiResp.ResultsPerPage != 0 { |
70 |
| - vulns, err := toJSON(apiResp.Vulnerabilities) |
71 |
| - if err != nil { |
72 |
| - return fmt.Errorf("failed to convert API vulns to JSON: %w", err) |
73 |
| - } |
74 |
| - |
75 |
| - if len(vulns) != 0 { |
76 |
| - cveItems = append(cveItems, vulns...) |
77 |
| - |
78 |
| - i++ |
79 |
| - // Write to disk every ~20,000 vulnerabilities. |
80 |
| - if i == 10 { |
81 |
| - i = 0 |
82 |
| - |
83 |
| - enrichCVEItems(&cveItems, enrichments) |
84 |
| - |
85 |
| - feed := &jsonschema.NVDCVEFeedJSON10{ |
86 |
| - CVEItems: cveItems, |
87 |
| - } |
88 |
| - if err := writeFile(filepath.Join(nvdDir, fmt.Sprintf("%d.json", fileNo)), feed); err != nil { |
89 |
| - return fmt.Errorf("writing to file: %w", err) |
90 |
| - } |
91 |
| - |
92 |
| - fileNo++ |
93 |
| - totalVulns += len(cveItems) |
94 |
| - log.Infof("Loaded %d NVD vulnerabilities", totalVulns) |
95 |
| - // Reduce, reuse, and recycle. |
96 |
| - cveItems = cveItems[:0] |
97 |
| - } |
98 |
| - } |
99 |
| - |
100 |
| - // Rudimentary rate-limiting. |
101 |
| - // NVD limits users without an API key to roughly one call every 6 seconds. |
102 |
| - // With an API key, it is roughly one call every 0.6 seconds. |
103 |
| - // We'll play it safe and do one call every 3 seconds. |
104 |
| - // As of writing there are ~216,000 vulnerabilities, so this whole process should take ~5.4 minutes. |
105 |
| - time.Sleep(3 * time.Second) |
106 |
| - |
107 |
| - startIdx += apiResp.ResultsPerPage |
108 |
| - apiResp, err = query(fmt.Sprintf(urlFmt, startIdx)) |
109 |
| - if err != nil { |
110 |
| - return err |
111 |
| - } |
112 |
| - } |
113 |
| - |
114 |
| - // Write the remaining vulnerabilities. |
115 |
| - if len(cveItems) != 0 { |
116 |
| - enrichCVEItems(&cveItems, enrichments) |
117 |
| - |
118 |
| - feed := &jsonschema.NVDCVEFeedJSON10{ |
119 |
| - CVEItems: cveItems, |
120 |
| - } |
121 |
| - if err := writeFile(filepath.Join(nvdDir, fmt.Sprintf("%d.json", fileNo)), feed); err != nil { |
122 |
| - return fmt.Errorf("writing to file: %w", err) |
123 |
| - } |
124 |
| - |
125 |
| - totalVulns += len(cveItems) |
126 |
| - log.Infof("Loaded %d NVD vulnerabilities", totalVulns) |
127 |
| - } |
128 |
| - |
129 |
| - return nil |
130 |
| -} |
131 |
| - |
132 |
| -func query(url string) (*apischema.CVEAPIJSON20, error) { |
133 |
| - log.Debugf("Querying %s", url) |
134 |
| - req, err := http.NewRequest(http.MethodGet, url, nil) |
135 |
| - if err != nil { |
136 |
| - return nil, fmt.Errorf("creating HTTP request: %w", err) |
137 |
| - } |
138 |
| - req.Header.Set("apiKey", os.Getenv("NVD_API_KEY")) |
139 |
| - |
140 |
| - apiResp, err := queryWithBackoff(req) |
141 |
| - if err != nil { |
142 |
| - return nil, err |
143 |
| - } |
144 |
| - |
145 |
| - return apiResp, nil |
146 |
| -} |
147 |
| - |
148 |
| -func queryWithBackoff(req *http.Request) (*apischema.CVEAPIJSON20, error) { |
149 |
| - var ( |
150 |
| - apiResp *apischema.CVEAPIJSON20 |
151 |
| - err error |
152 |
| - ) |
153 |
| - for i := 1; i <= 5; i++ { |
154 |
| - var resp *http.Response |
155 |
| - resp, err = tryQuery(req) |
156 |
| - if err == nil { |
157 |
| - apiResp, err = parseResponse(resp) |
158 |
| - if err == nil { |
159 |
| - break |
160 |
| - } |
161 |
| - } |
162 |
| - log.Warnf("Failed query attempt %d for %s: %v", i, req.URL.String(), err) |
163 |
| - // Wait some multiple of 3 seconds before next attempt. |
164 |
| - time.Sleep(time.Duration(3*i) * time.Second) |
165 |
| - } |
166 |
| - |
167 |
| - return apiResp, err |
168 |
| -} |
169 |
| - |
170 |
| -func tryQuery(req *http.Request) (*http.Response, error) { |
171 |
| - resp, err := client.Do(req) |
172 |
| - if err != nil { |
173 |
| - return nil, fmt.Errorf("fetching NVD API results: %w", err) |
174 |
| - } |
175 |
| - |
176 |
| - log.Debugf("Queried %s with status code %d", req.URL.String(), resp.StatusCode) |
177 |
| - if resp.StatusCode != 200 { |
178 |
| - utils.IgnoreError(resp.Body.Close) |
179 |
| - return nil, fmt.Errorf("unexpected status code when querying %s: %d", req.URL.String(), resp.StatusCode) |
180 |
| - } |
181 |
| - |
182 |
| - return resp, nil |
183 |
| -} |
184 |
| - |
185 |
| -func parseResponse(resp *http.Response) (*apischema.CVEAPIJSON20, error) { |
186 |
| - defer utils.IgnoreError(resp.Body.Close) |
187 |
| - |
188 |
| - apiResp := new(apischema.CVEAPIJSON20) |
189 |
| - if err := json.NewDecoder(resp.Body).Decode(apiResp); err != nil { |
190 |
| - return nil, fmt.Errorf("decoding API response: %w", err) |
191 |
| - } |
192 |
| - |
193 |
| - return apiResp, nil |
194 |
| -} |
195 |
| - |
196 |
| -func enrichCVEItems(cveItems *[]*jsonschema.NVDCVEFeedJSON10DefCVEItem, enrichments map[string]*FileFormatWrapper) { |
197 |
| - if cveItems == nil { |
198 |
| - return |
199 |
| - } |
200 |
| - |
201 |
| - cves := (*cveItems)[:0] |
202 |
| - for _, item := range *cveItems { |
203 |
| - if _, ok := manuallyEnrichedVulns[item.CVE.CVEDataMeta.ID]; ok { |
204 |
| - log.Warnf("Skipping vuln %s because it is being manually enriched", item.CVE.CVEDataMeta.ID) |
205 |
| - continue |
206 |
| - } |
207 |
| - |
208 |
| - for _, node := range item.Configurations.Nodes { |
209 |
| - removeInvalidCPEs(node) |
210 |
| - } |
211 |
| - |
212 |
| - if enrichedEntry, ok := enrichments[item.CVE.CVEDataMeta.ID]; ok { |
213 |
| - // Add the CPE matches instead of removing for backwards compatibility purposes |
214 |
| - item.Configurations.Nodes = append(item.Configurations.Nodes, &jsonschema.NVDCVEFeedJSON10DefNode{ |
215 |
| - CPEMatch: enrichedEntry.AffectedPackages, |
216 |
| - Operator: "OR", |
217 |
| - }) |
218 |
| - item.LastModifiedDate = enrichedEntry.LastUpdated |
219 |
| - } |
220 |
| - cves = append(cves, item) |
221 |
| - } |
222 |
| - |
223 |
| - for _, item := range manuallyEnrichedVulns { |
224 |
| - cves = append(cves, item) |
225 |
| - } |
226 |
| - |
227 |
| - *cveItems = cves |
228 |
| -} |
229 |
| - |
230 |
| -func removeInvalidCPEs(item *jsonschema.NVDCVEFeedJSON10DefNode) { |
231 |
| - cpeMatches := item.CPEMatch[:0] |
232 |
| - for _, cpeMatch := range item.CPEMatch { |
233 |
| - if cpeMatch.Cpe23Uri == "" { |
234 |
| - cpeMatches = append(cpeMatches, cpeMatch) |
235 |
| - continue |
236 |
| - } |
237 |
| - attr, err := wfn.UnbindFmtString(cpeMatch.Cpe23Uri) |
238 |
| - if err != nil { |
239 |
| - log.Errorf("error parsing %+v", item) |
240 |
| - continue |
241 |
| - } |
242 |
| - if attr.Product == wfn.Any { |
243 |
| - log.Warnf("Filtering out CPE: %+v", attr) |
244 |
| - continue |
245 |
| - } |
246 |
| - cpeMatches = append(cpeMatches, cpeMatch) |
247 |
| - } |
248 |
| - item.CPEMatch = cpeMatches |
249 |
| - for _, child := range item.Children { |
250 |
| - removeInvalidCPEs(child) |
251 |
| - } |
252 |
| -} |
253 |
| - |
254 |
| -func writeFile(path string, feed *jsonschema.NVDCVEFeedJSON10) error { |
255 |
| - outF, err := os.Create(path) |
256 |
| - if err != nil { |
257 |
| - return fmt.Errorf("failed to create file %s: %w", outF.Name(), err) |
258 |
| - } |
259 |
| - defer utils.IgnoreError(outF.Close) |
260 |
| - |
261 |
| - if err := json.NewEncoder(outF).Encode(feed); err != nil { |
262 |
| - return fmt.Errorf("could not encode JSON for %s: %w", outF.Name(), err) |
263 |
| - } |
264 |
| - |
265 |
| - return nil |
266 | 15 | }
|
0 commit comments