Skip to content

Commit 6dd4a86

Browse files
jshartleyj-hartley
authored andcommitted
Add NTIA minimum SBOM elements
Problem: The BOM generated by syft does not contain all the recommended minimum data for SBOM best practice, see: https://www.ntia.doc.gov/report/2021/minimum-elements-software-bill-materials-sbom Solution: Patch the missing elements into the SBOM prior to uploading. Validate the SBOM against the cyclone DX 1.2 XML schema to make sure it is correctly constructed. Signed-off-by: John Hartley
1 parent 7bc34ff commit 6dd4a86

File tree

3 files changed

+218
-35
lines changed

3 files changed

+218
-35
lines changed

scripts/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
cyclonedx.xsd
2+
spdx.xsd
3+
credentials

scripts/cyclonedx-wrapper.xsd

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<schema elementFormDefault="qualified" xmlns="http://www.w3.org/2001/XMLSchema">
3+
<import namespace="http://cyclonedx.org/schema/spdx" schemaLocation="spdx.xsd"/>
4+
<import namespace="http://cyclonedx.org/schema/bom/1.2" schemaLocation="cyclonedx.xsd"/>
5+
</schema>

scripts/sbom_scraper.sh

Lines changed: 210 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env bash
22
#
3-
# Scrape a docker image and upload as public or private SBOM file
3+
# Scrape a docker image and upload as public (default) or private SBOM file
44
#
55
# Preparation:
66
#
@@ -13,39 +13,47 @@
1313
# and note down the CLIENT_ID and SECRET.
1414
#
1515
# Copy the SECRET generated to the file specified by ${CLIENTSECRET_FILE} below. This
16-
# file should reside in a subdirectory with 0600 permissions.
16+
# file should reside in a subdirectory with 0700 permissions.
1717
#
1818
# Use the CLIENT_ID as the first fixed argument to this script.
1919
#
2020

2121
SCRIPTNAME=$(basename "$0")
2222

23-
SYFT=$(which syft)
24-
if [ -z "${SYFT}" ]
25-
then
26-
echo "syft command not found"
27-
exit 10
28-
fi
29-
JQ=$(which jq)
30-
if [ -z "${JQ}" ]
31-
then
32-
JQ="cat"
33-
else
34-
JQ="jq ."
35-
fi
23+
for TOOL in syft jq xq xmllint python3
24+
do
25+
if ! type $TOOL > /dev/null
26+
then
27+
echo >&2 "please make sure this tool is on your PATH"
28+
exit 10
29+
fi
30+
done
3631

3732
set -e
3833
set -u
3934

4035
LOGTAG=$$
4136
log() {
42-
echo "${LOGTAG}:$(date --rfc-3339=seconds):$* ..."
37+
echo "${LOGTAG}:$(date ):$*"
4338
}
4439

40+
GIT_STATUS=$(git status --porcelain)
41+
4542
# defaults
4643
FORMAT=cyclonedx
44+
AUTHOR_NAME="$(git config user.name)"
45+
AUTHOR_EMAIL="$(git config user.email)"
46+
COMPONENT_AUTHOR_NAME="$AUTHOR_NAME"
47+
SUPPLIER_NAME=dockerhub
48+
SUPPLIER_URL=https://hub.docker.com
49+
TOOL_NAME="$(git config --get remote.origin.url) $(git ls-files --full-name "$SCRIPTNAME")"
50+
TOOL_VERSION=$(git describe --tags)${GIT_STATUS:++}
51+
TOOL_VENDOR="Jitsuin Inc"
52+
TOOL_HASH_ALG=SHA-256
53+
# shellcheck disable=SC2002
54+
TOOL_HASH_CONTENT=$(cat "$0" | openssl dgst -sha256)
4755

48-
# credentials directory has 0600 permissions
56+
# credentials directory should have 0700 permissions
4957
CLIENTSECRET_FILE=credentials/client_secret
5058
SBOM=false
5159
PRIVACY=PUBLIC
@@ -55,37 +63,44 @@ URL=https://app.rkvst.io
5563
usage() {
5664
cat >&2 <<EOF
5765
58-
Scrape an SBOM from a docker image and upload to abom archivist
66+
Create a Cyclone DX 1.2 XML SBOM from a docker image and upload to RKVST SBOM Hub
5967
60-
Usage: $SCRIPTNAME [-p] [-c clientsecretfile] [-o output format] [-s sbomFile ] [-u url] client_id [docker-image|sbom file]
68+
Usage: $SCRIPTNAME [-a AUTHOR_NAME] [-A AUTHOR_NAME] [-c CLIENT_SECRET_FILE] [-e AUTHOR_EMAIL] [-s] [-p] [-u URL] CLIENT_ID [docker-image:tag|sbom file]
6169
62-
-c clientsecretfile containing client secret (default ${CLIENTSECRET_FILE})
63-
-o FORMAT default ($FORMAT) [cyclonedx]
64-
-s default ($SBOM) if specified the second argument is an sbom file
65-
and -o is ignored.
66-
-p upload private SBOM
67-
-u URL URL Default ($URL)
70+
-a AUTHOR name of the author of the SBOM. Default ($AUTHOR_NAME)
71+
-A COMPONENT_AUTHOR name of the author of the docker image. Default ($COMPONENT_AUTHOR_NAME)
72+
-c CLIENT_SECRET_FILE containing client secret (default ${CLIENTSECRET_FILE})
73+
-e AUTHOR_EMAIL email address of the author of the SBOM. Default ($AUTHOR_EMAIL)
74+
-s if specified the second argument is an sbom file.
75+
Default ($SBOM)
76+
-p upload private SBOM
77+
-u URL URL of archivist SBOM hub. Default ($URL)
6878
69-
Example:
79+
Examples:
7080
7181
$0 29b48af4-45ca-465b-b136-206674f8aa9b ubuntu:21.10
82+
$0 -s 29b48af4-45ca-465b-b136-206674f8aa9b ./my-sbom.xml
7283
7384
EOF
7485

7586
exit 1
7687
}
7788

78-
while getopts "c:ho:psu:" o; do
89+
while getopts "a:A:c:e:hpsu:" o; do
7990
case "${o}" in
91+
a) AUTHOR_NAME="${OPTARG}"
92+
;;
93+
A) COMPONENT_AUTHOR_NAME="${OPTARG}"
94+
;;
8095
c) CLIENTSECRET_FILE="${OPTARG}"
8196
;;
82-
o) FORMAT=${OPTARG}
97+
e) AUTHOR_EMAIL="${OPTARG}"
8398
;;
8499
p) PRIVACY=PRIVATE
85100
;;
86101
s) SBOM=true
87102
;;
88-
u) URL=$OPTARG
103+
u) URL="$OPTARG"
89104
;;
90105
*)
91106
usage
@@ -133,19 +148,179 @@ SECRET=$(cat "${CLIENTSECRET_FILE}")
133148
# ----------------------------------------------------------------------------
134149
if [ "${SBOM}" = "false" ]
135150
then
136-
log "Scrape ${FORMAT} SBOM from ${DOCKER_IMAGE} to ${OUTFILE}..."
151+
log "Scrape ${FORMAT} SBOM from ${DOCKER_IMAGE} to ${OUTFILE} ..."
137152
OUTPUT="${TEMPDIR}/${OUTFILE}"
138-
${SYFT} -q packages -o "${FORMAT}" "${DOCKER_IMAGE}"> "${OUTPUT}"
153+
syft -q packages -o "${FORMAT}" "${DOCKER_IMAGE}"> "${OUTPUT}"
139154
else
140155
OUTPUT="${DOCKER_IMAGE}"
141156
fi
142157

158+
# ----------------------------------------------------------------------------
159+
# Update SBOM including NTIA minimum elments
160+
# ----------------------------------------------------------------------------
161+
ORIG_COMPONENT_NAME=$(xq -r .bom.metadata.component.name "$OUTPUT")
162+
ORIG_COMPONENT_VERSION=$(xq -r .bom.metadata.component.version "$OUTPUT")
163+
COMPONENT_NAME=${ORIG_COMPONENT_NAME%%:*}
164+
COMPONENT_VERSION=${ORIG_COMPONENT_NAME##*:}
165+
HASH_ALG="${ORIG_COMPONENT_VERSION%%:*}"
166+
case ${HASH_ALG^^} in
167+
SHA256) COMPONENT_HASH_ALG="SHA-256"
168+
;;
169+
*) echo >&2 "Unknonwn hash algorithm $HASH_ALG"
170+
esac
171+
COMPONENT_HASH_CONTENT="${ORIG_COMPONENT_VERSION##*:}"
172+
173+
echo "metadata:"
174+
echo " tools:"
175+
echo " tool:"
176+
echo " vendor: $TOOL_VENDOR"
177+
echo " name: $TOOL_NAME"
178+
echo " version: $TOOL_VERSION"
179+
echo " hashes:"
180+
echo " hash:"
181+
echo " alg: $TOOL_HASH_ALG"
182+
echo " content: $TOOL_HASH_CONTENT"
183+
echo " authors:"
184+
echo " author:"
185+
echo " name: $AUTHOR_NAME"
186+
echo " email: $AUTHOR_EMAIL"
187+
echo " component:"
188+
echo " supplier:"
189+
echo " name: $SUPPLIER_NAME"
190+
echo " url: $SUPPLIER_URL"
191+
echo " author: $COMPONENT_AUTHOR_NAME"
192+
echo " name: $ORIG_COMPONENT_NAME -> $COMPONENT_NAME"
193+
echo " version: $ORIG_COMPONENT_VERSION -> $COMPONENT_VERSION"
194+
echo " hashes:"
195+
echo " hash:"
196+
echo " alg: $COMPONENT_HASH_ALG"
197+
echo " content: $COMPONENT_HASH_CONTENT"
198+
199+
[ -z "$TOOL_VENDOR" ] && echo >&2 "Unable to determine SBOM tool vendor" && exit 1
200+
[ -z "$TOOL_NAME" ] && echo >&2 "Unable to determine SBOM tool name" && exit 1
201+
[ -z "$TOOL_VERSION" ] && echo >&2 "Unable to determine SBOM tool version" && exit 1
202+
[ -z "$TOOL_HASH_ALG" ] && echo >&2 "Unable to determine SBOM tool hash algorithm" && exit 1
203+
[ -z "$TOOL_HASH_CONTENT" ] && echo >&2 "Unable to determine SBOM tool hash content" && exit 1
204+
[ -z "$AUTHOR_NAME" ] && echo >&2 "Unable to determine SBOM author name" && exit 1
205+
[ -z "$AUTHOR_EMAIL" ] && echo >&2 "Unable to determine SBOM author email" && exit 1
206+
[ -z "$SUPPLIER_NAME" ] && echo >&2 "Unable to determine component supplier name" && exit 1
207+
[ -z "$SUPPLIER_URL" ] && echo >&2 "Unable to determine component supplier url" && exit 1
208+
[ -z "$COMPONENT_AUTHOR_NAME" ] && echo >&2 "Unable to determine component author name" && exit 1
209+
[ -z "$COMPONENT_NAME" ] && echo >&2 "Unable to determine component name" && exit 1
210+
[ -z "$COMPONENT_VERSION" ] && echo >&2 "Unable to determine component version" && exit 1
211+
[ -z "$COMPONENT_HASH_ALG" ] && echo >&2 "Unable to determine component hash algorithm" && exit 1
212+
[ -z "$COMPONENT_HASH_CONTENT" ] && echo >&2 "Unable to determine component hash content" && exit 1
213+
214+
PATCHED_OUTPUT="${OUTPUT}.patched"
215+
216+
python3 <(cat <<END
217+
import sys
218+
import xml.etree.ElementTree as ET
219+
220+
def indent(elem, level=0):
221+
i = "\n" + level*" "
222+
if len(elem):
223+
if not elem.text or not elem.text.strip():
224+
elem.text = i + " "
225+
if not elem.tail or not elem.tail.strip():
226+
elem.tail = i
227+
for elem in elem:
228+
indent(elem, level+1)
229+
if not elem.tail or not elem.tail.strip():
230+
elem.tail = i
231+
else:
232+
if level and (not elem.tail or not elem.tail.strip()):
233+
elem.tail = i
234+
235+
ET.register_namespace('', 'http://cyclonedx.org/schema/bom/1.2')
236+
ns = {'': 'http://cyclonedx.org/schema/bom/1.2'}
237+
238+
# Open original file
239+
et = ET.parse(sys.stdin)
240+
root = et.getroot()
241+
242+
metadata = root.find('metadata', ns)
243+
244+
# Add this tool
245+
tools = metadata.find('tools', ns)
246+
if not tools:
247+
tools = ET.SubElement(metadata, 'tools')
248+
tool = ET.SubElement(tools, 'tool')
249+
ET.SubElement(tool, 'vendor').text = '$TOOL_VENDOR'
250+
ET.SubElement(tool, 'name').text = '$TOOL_NAME'
251+
ET.SubElement(tool, 'version').text = '$TOOL_VERSION'
252+
hashes = ET.SubElement(tool, 'hashes')
253+
hash = ET.SubElement(hashes, 'hash', alg='${TOOL_HASH_ALG}')
254+
hash.text = '$TOOL_HASH_CONTENT'
255+
256+
# Add sbom authors elements
257+
authors = metadata.find('authors', ns)
258+
if not authors:
259+
authors = ET.Element('authors')
260+
metadata.insert(2, authors)
261+
author = ET.SubElement(authors, 'author')
262+
ET.SubElement(author, 'name').text = '$AUTHOR_NAME'
263+
ET.SubElement(author, 'email').text = '$AUTHOR_EMAIL'
264+
265+
component = metadata.find('component', ns)
266+
267+
# Update component author
268+
author = component.find('author', ns)
269+
if not author:
270+
author = ET.Element('author')
271+
component.insert(0, author)
272+
author.text = '$COMPONENT_AUTHOR_NAME'
273+
274+
# Update component name and version
275+
component.find('name', ns).text = '$COMPONENT_NAME'
276+
component.find('version', ns).text = '$COMPONENT_VERSION'
277+
278+
# Update component hash
279+
hashes = component.find('hashes', ns)
280+
if not hashes:
281+
hashes = ET.SubElement(component, 'hashes')
282+
hash = ET.SubElement(hashes, 'hash', alg='${COMPONENT_HASH_ALG}')
283+
hash.text = '$COMPONENT_HASH_CONTENT'
284+
285+
# Add component supplier
286+
supplier = component.find('supplier', ns)
287+
if not supplier:
288+
supplier = ET.Element('supplier')
289+
component.insert(0, supplier)
290+
ET.SubElement(supplier, 'name').text = '$SUPPLIER_NAME'
291+
ET.SubElement(supplier, 'url').text = '$SUPPLIER_URL'
292+
293+
# Add supplier (it appears twice in the schema)
294+
supplier = metadata.find('supplier', ns)
295+
if not supplier:
296+
supplier = ET.SubElement(metadata, 'supplier')
297+
ET.SubElement(supplier, 'name').text = '$SUPPLIER_NAME'
298+
ET.SubElement(supplier, 'url').text = '$SUPPLIER_URL'
299+
300+
indent(root)
301+
302+
et.write(sys.stdout, encoding='unicode', xml_declaration=True, default_namespace='')
303+
END
304+
) < "$OUTPUT" > "$PATCHED_OUTPUT"
305+
306+
# ----------------------------------------------------------------------------
307+
# Check that the patched SBOM is valid against the cyclonedx schema
308+
# ----------------------------------------------------------------------------
309+
[ -f spdx.xsd ] || curl -fsS -o spdx.xsd https://cyclonedx.org/schema/spdx
310+
[ -f cyclonedx.xsd ] || curl -fsS -o cyclonedx.xsd https://cyclonedx.org/schema/bom/1.2
311+
312+
# xmllint complains about a double import of the spdx schema, but we have to import via
313+
# the wrapper to set the schema location to a local file, as xmllint fails to download
314+
# them from the internet as they are https
315+
xmllint "$PATCHED_OUTPUT" --schema cyclonedx-wrapper.xsd --noout 2>&1 | grep -Fv "Skipping import of schema located at 'http://cyclonedx.org/schema/spdx' for the namespace 'http://cyclonedx.org/schema/spdx'"
316+
[ "${PIPESTATUS[0]}" -ne 0 ] && exit "${PIPESTATUS[0]}"
317+
143318
# ----------------------------------------------------------------------------
144319
# Handle client id and secrets for SBOM scraper via App registrations
145320
# ----------------------------------------------------------------------------
146321
HTTP_STATUS=""
147322
# get token
148-
log "Get token"
323+
log "Get token ..."
149324
HTTP_STATUS=$(curl -sS -w "%{http_code}" \
150325
-o "${TEMPDIR}/access_token" \
151326
--data-urlencode "grant_type=client_credentials" \
@@ -169,13 +344,13 @@ EOF
169344
# ----------------------------------------------------------------------------
170345
# Upload SBOM
171346
# ----------------------------------------------------------------------------
172-
log "Upload ${PRIVACY} ${OUTPUT}"
347+
log "Upload ${PRIVACY} ${OUTPUT} ..."
173348

174349
HTTP_STATUS=$(curl -s -w "%{http_code}" -X POST \
175350
-o "${TEMPDIR}/upload" \
176351
-H "@${BEARER_TOKEN_FILE}" \
177352
-H "content_type=text/xml" \
178-
-F "sbom=@${OUTPUT}" \
353+
-F "sbom=@${PATCHED_OUTPUT}" \
179354
"${URL}/archivist/v1/sboms?privacy=${PRIVACY}")
180355

181356
if [ "${HTTP_STATUS}" != "200" ]
@@ -184,5 +359,5 @@ then
184359
exit 4
185360
fi
186361
log "Upload success: "
187-
${JQ} "${TEMPDIR}/upload"
362+
jq . "${TEMPDIR}/upload"
188363
exit 0

0 commit comments

Comments
 (0)