11#! /usr/bin/env bash
22#
3- # Scrape a docker image and upload as public or private SBOM file
3+ # Scrape a docker image and upload as public (default) or private SBOM file
44#
55# Preparation:
66#
1313# and note down the CLIENT_ID and SECRET.
1414#
1515# Copy the SECRET generated to the file specified by ${CLIENTSECRET_FILE} below. This
16- # file should reside in a subdirectory with 0600 permissions.
16+ # file should reside in a subdirectory with 0700 permissions.
1717#
1818# Use the CLIENT_ID as the first fixed argument to this script.
1919#
2020
2121SCRIPTNAME=$( basename " $0 " )
2222
23- SYFT=$( which syft)
24- if [ -z " ${SYFT} " ]
25- then
26- echo " syft command not found"
27- exit 10
28- fi
29- JQ=$( which jq)
30- if [ -z " ${JQ} " ]
31- then
32- JQ=" cat"
33- else
34- JQ=" jq ."
35- fi
23+ for TOOL in syft jq xq xmllint python3
24+ do
25+ if ! type $TOOL > /dev/null
26+ then
27+ echo >&2 " please make sure this tool is on your PATH"
28+ exit 10
29+ fi
30+ done
3631
3732set -e
3833set -u
3934
4035LOGTAG=$$
4136log () {
42- echo " ${LOGTAG} :$( date --rfc-3339=seconds ) :$* ... "
37+ echo " ${LOGTAG} :$( date ) :$* "
4338}
4439
40+ GIT_STATUS=$( git status --porcelain)
41+
4542# defaults
4643FORMAT=cyclonedx
44+ AUTHOR_NAME=" $( git config user.name) "
45+ AUTHOR_EMAIL=" $( git config user.email) "
46+ COMPONENT_AUTHOR_NAME=" $AUTHOR_NAME "
47+ SUPPLIER_NAME=dockerhub
48+ SUPPLIER_URL=https://hub.docker.com
49+ TOOL_NAME=" $( git config --get remote.origin.url) $( git ls-files --full-name " $SCRIPTNAME " ) "
50+ TOOL_VERSION=$( git describe --tags) ${GIT_STATUS: ++}
51+ TOOL_VENDOR=" Jitsuin Inc"
52+ TOOL_HASH_ALG=SHA-256
53+ # shellcheck disable=SC2002
54+ TOOL_HASH_CONTENT=$( cat " $0 " | openssl dgst -sha256)
4755
48- # credentials directory has 0600 permissions
56+ # credentials directory should have 0700 permissions
4957CLIENTSECRET_FILE=credentials/client_secret
5058SBOM=false
5159PRIVACY=PUBLIC
@@ -55,37 +63,44 @@ URL=https://app.rkvst.io
5563usage () {
5664 cat >&2 << EOF
5765
58- Scrape an SBOM from a docker image and upload to abom archivist
66+ Create a Cyclone DX 1.2 XML SBOM from a docker image and upload to RKVST SBOM Hub
5967
60- Usage: $SCRIPTNAME [-p ] [-c clientsecretfile ] [-o output format ] [-s sbomFile ] [-u url] client_id [docker-image|sbom file]
68+ Usage: $SCRIPTNAME [-a AUTHOR_NAME ] [-A AUTHOR_NAME ] [-c CLIENT_SECRET_FILE] [-e AUTHOR_EMAIL ] [-s] [-p ] [-u URL] CLIENT_ID [docker-image:tag |sbom file]
6169
62- -c clientsecretfile containing client secret (default ${CLIENTSECRET_FILE} )
63- -o FORMAT default ($FORMAT ) [cyclonedx]
64- -s default ($SBOM ) if specified the second argument is an sbom file
65- and -o is ignored.
66- -p upload private SBOM
67- -u URL URL Default ($URL )
70+ -a AUTHOR name of the author of the SBOM. Default ($AUTHOR_NAME )
71+ -A COMPONENT_AUTHOR name of the author of the docker image. Default ($COMPONENT_AUTHOR_NAME )
72+ -c CLIENT_SECRET_FILE containing client secret (default ${CLIENTSECRET_FILE} )
73+ -e AUTHOR_EMAIL email address of the author of the SBOM. Default ($AUTHOR_EMAIL )
74+ -s if specified the second argument is an sbom file.
75+ Default ($SBOM )
76+ -p upload private SBOM
77+ -u URL URL of archivist SBOM hub. Default ($URL )
6878
69- Example :
79+ Examples :
7080
7181 $0 29b48af4-45ca-465b-b136-206674f8aa9b ubuntu:21.10
82+ $0 -s 29b48af4-45ca-465b-b136-206674f8aa9b ./my-sbom.xml
7283
7384EOF
7485
7586 exit 1
7687}
7788
78- while getopts " c:ho:psu :" o; do
89+ while getopts " a:A:c:e:hpsu :" o; do
7990 case " ${o} " in
91+ a) AUTHOR_NAME=" ${OPTARG} "
92+ ;;
93+ A) COMPONENT_AUTHOR_NAME=" ${OPTARG} "
94+ ;;
8095 c) CLIENTSECRET_FILE=" ${OPTARG} "
8196 ;;
82- o) FORMAT= ${OPTARG}
97+ e) AUTHOR_EMAIL= " ${OPTARG} "
8398 ;;
8499 p) PRIVACY=PRIVATE
85100 ;;
86101 s) SBOM=true
87102 ;;
88- u) URL=$OPTARG
103+ u) URL=" $OPTARG "
89104 ;;
90105 * )
91106 usage
@@ -133,19 +148,179 @@ SECRET=$(cat "${CLIENTSECRET_FILE}")
133148# ----------------------------------------------------------------------------
134149if [ " ${SBOM} " = " false" ]
135150then
136- log " Scrape ${FORMAT} SBOM from ${DOCKER_IMAGE} to ${OUTFILE} ..."
151+ log " Scrape ${FORMAT} SBOM from ${DOCKER_IMAGE} to ${OUTFILE} ..."
137152 OUTPUT=" ${TEMPDIR} /${OUTFILE} "
138- ${SYFT} -q packages -o " ${FORMAT} " " ${DOCKER_IMAGE} " > " ${OUTPUT} "
153+ syft -q packages -o " ${FORMAT} " " ${DOCKER_IMAGE} " > " ${OUTPUT} "
139154else
140155 OUTPUT=" ${DOCKER_IMAGE} "
141156fi
142157
158+ # ----------------------------------------------------------------------------
159+ # Update SBOM including NTIA minimum elments
160+ # ----------------------------------------------------------------------------
161+ ORIG_COMPONENT_NAME=$( xq -r .bom.metadata.component.name " $OUTPUT " )
162+ ORIG_COMPONENT_VERSION=$( xq -r .bom.metadata.component.version " $OUTPUT " )
163+ COMPONENT_NAME=${ORIG_COMPONENT_NAME%%:* }
164+ COMPONENT_VERSION=${ORIG_COMPONENT_NAME##*: }
165+ HASH_ALG=" ${ORIG_COMPONENT_VERSION%%:* } "
166+ case ${HASH_ALG^^} in
167+ SHA256) COMPONENT_HASH_ALG=" SHA-256"
168+ ;;
169+ * ) echo >&2 " Unknonwn hash algorithm $HASH_ALG "
170+ esac
171+ COMPONENT_HASH_CONTENT=" ${ORIG_COMPONENT_VERSION##*: } "
172+
173+ echo " metadata:"
174+ echo " tools:"
175+ echo " tool:"
176+ echo " vendor: $TOOL_VENDOR "
177+ echo " name: $TOOL_NAME "
178+ echo " version: $TOOL_VERSION "
179+ echo " hashes:"
180+ echo " hash:"
181+ echo " alg: $TOOL_HASH_ALG "
182+ echo " content: $TOOL_HASH_CONTENT "
183+ echo " authors:"
184+ echo " author:"
185+ echo " name: $AUTHOR_NAME "
186+ echo " email: $AUTHOR_EMAIL "
187+ echo " component:"
188+ echo " supplier:"
189+ echo " name: $SUPPLIER_NAME "
190+ echo " url: $SUPPLIER_URL "
191+ echo " author: $COMPONENT_AUTHOR_NAME "
192+ echo " name: $ORIG_COMPONENT_NAME -> $COMPONENT_NAME "
193+ echo " version: $ORIG_COMPONENT_VERSION -> $COMPONENT_VERSION "
194+ echo " hashes:"
195+ echo " hash:"
196+ echo " alg: $COMPONENT_HASH_ALG "
197+ echo " content: $COMPONENT_HASH_CONTENT "
198+
199+ [ -z " $TOOL_VENDOR " ] && echo >&2 " Unable to determine SBOM tool vendor" && exit 1
200+ [ -z " $TOOL_NAME " ] && echo >&2 " Unable to determine SBOM tool name" && exit 1
201+ [ -z " $TOOL_VERSION " ] && echo >&2 " Unable to determine SBOM tool version" && exit 1
202+ [ -z " $TOOL_HASH_ALG " ] && echo >&2 " Unable to determine SBOM tool hash algorithm" && exit 1
203+ [ -z " $TOOL_HASH_CONTENT " ] && echo >&2 " Unable to determine SBOM tool hash content" && exit 1
204+ [ -z " $AUTHOR_NAME " ] && echo >&2 " Unable to determine SBOM author name" && exit 1
205+ [ -z " $AUTHOR_EMAIL " ] && echo >&2 " Unable to determine SBOM author email" && exit 1
206+ [ -z " $SUPPLIER_NAME " ] && echo >&2 " Unable to determine component supplier name" && exit 1
207+ [ -z " $SUPPLIER_URL " ] && echo >&2 " Unable to determine component supplier url" && exit 1
208+ [ -z " $COMPONENT_AUTHOR_NAME " ] && echo >&2 " Unable to determine component author name" && exit 1
209+ [ -z " $COMPONENT_NAME " ] && echo >&2 " Unable to determine component name" && exit 1
210+ [ -z " $COMPONENT_VERSION " ] && echo >&2 " Unable to determine component version" && exit 1
211+ [ -z " $COMPONENT_HASH_ALG " ] && echo >&2 " Unable to determine component hash algorithm" && exit 1
212+ [ -z " $COMPONENT_HASH_CONTENT " ] && echo >&2 " Unable to determine component hash content" && exit 1
213+
214+ PATCHED_OUTPUT=" ${OUTPUT} .patched"
215+
216+ python3 <( cat << END
217+ import sys
218+ import xml.etree.ElementTree as ET
219+
220+ def indent(elem, level=0):
221+ i = "\n" + level*" "
222+ if len(elem):
223+ if not elem.text or not elem.text.strip():
224+ elem.text = i + " "
225+ if not elem.tail or not elem.tail.strip():
226+ elem.tail = i
227+ for elem in elem:
228+ indent(elem, level+1)
229+ if not elem.tail or not elem.tail.strip():
230+ elem.tail = i
231+ else:
232+ if level and (not elem.tail or not elem.tail.strip()):
233+ elem.tail = i
234+
235+ ET.register_namespace('', 'http://cyclonedx.org/schema/bom/1.2')
236+ ns = {'': 'http://cyclonedx.org/schema/bom/1.2'}
237+
238+ # Open original file
239+ et = ET.parse(sys.stdin)
240+ root = et.getroot()
241+
242+ metadata = root.find('metadata', ns)
243+
244+ # Add this tool
245+ tools = metadata.find('tools', ns)
246+ if not tools:
247+ tools = ET.SubElement(metadata, 'tools')
248+ tool = ET.SubElement(tools, 'tool')
249+ ET.SubElement(tool, 'vendor').text = '$TOOL_VENDOR '
250+ ET.SubElement(tool, 'name').text = '$TOOL_NAME '
251+ ET.SubElement(tool, 'version').text = '$TOOL_VERSION '
252+ hashes = ET.SubElement(tool, 'hashes')
253+ hash = ET.SubElement(hashes, 'hash', alg='${TOOL_HASH_ALG} ')
254+ hash.text = '$TOOL_HASH_CONTENT '
255+
256+ # Add sbom authors elements
257+ authors = metadata.find('authors', ns)
258+ if not authors:
259+ authors = ET.Element('authors')
260+ metadata.insert(2, authors)
261+ author = ET.SubElement(authors, 'author')
262+ ET.SubElement(author, 'name').text = '$AUTHOR_NAME '
263+ ET.SubElement(author, 'email').text = '$AUTHOR_EMAIL '
264+
265+ component = metadata.find('component', ns)
266+
267+ # Update component author
268+ author = component.find('author', ns)
269+ if not author:
270+ author = ET.Element('author')
271+ component.insert(0, author)
272+ author.text = '$COMPONENT_AUTHOR_NAME '
273+
274+ # Update component name and version
275+ component.find('name', ns).text = '$COMPONENT_NAME '
276+ component.find('version', ns).text = '$COMPONENT_VERSION '
277+
278+ # Update component hash
279+ hashes = component.find('hashes', ns)
280+ if not hashes:
281+ hashes = ET.SubElement(component, 'hashes')
282+ hash = ET.SubElement(hashes, 'hash', alg='${COMPONENT_HASH_ALG} ')
283+ hash.text = '$COMPONENT_HASH_CONTENT '
284+
285+ # Add component supplier
286+ supplier = component.find('supplier', ns)
287+ if not supplier:
288+ supplier = ET.Element('supplier')
289+ component.insert(0, supplier)
290+ ET.SubElement(supplier, 'name').text = '$SUPPLIER_NAME '
291+ ET.SubElement(supplier, 'url').text = '$SUPPLIER_URL '
292+
293+ # Add supplier (it appears twice in the schema)
294+ supplier = metadata.find('supplier', ns)
295+ if not supplier:
296+ supplier = ET.SubElement(metadata, 'supplier')
297+ ET.SubElement(supplier, 'name').text = '$SUPPLIER_NAME '
298+ ET.SubElement(supplier, 'url').text = '$SUPPLIER_URL '
299+
300+ indent(root)
301+
302+ et.write(sys.stdout, encoding='unicode', xml_declaration=True, default_namespace='')
303+ END
304+ ) < " $OUTPUT " > " $PATCHED_OUTPUT "
305+
306+ # ----------------------------------------------------------------------------
307+ # Check that the patched SBOM is valid against the cyclonedx schema
308+ # ----------------------------------------------------------------------------
309+ [ -f spdx.xsd ] || curl -fsS -o spdx.xsd https://cyclonedx.org/schema/spdx
310+ [ -f cyclonedx.xsd ] || curl -fsS -o cyclonedx.xsd https://cyclonedx.org/schema/bom/1.2
311+
312+ # xmllint complains about a double import of the spdx schema, but we have to import via
313+ # the wrapper to set the schema location to a local file, as xmllint fails to download
314+ # them from the internet as they are https
315+ xmllint " $PATCHED_OUTPUT " --schema cyclonedx-wrapper.xsd --noout 2>&1 | grep -Fv " Skipping import of schema located at 'http://cyclonedx.org/schema/spdx' for the namespace 'http://cyclonedx.org/schema/spdx'"
316+ [ " ${PIPESTATUS[0]} " -ne 0 ] && exit " ${PIPESTATUS[0]} "
317+
143318# ----------------------------------------------------------------------------
144319# Handle client id and secrets for SBOM scraper via App registrations
145320# ----------------------------------------------------------------------------
146321HTTP_STATUS=" "
147322# get token
148- log " Get token"
323+ log " Get token ... "
149324HTTP_STATUS=$( curl -sS -w " %{http_code}" \
150325 -o " ${TEMPDIR} /access_token" \
151326 --data-urlencode " grant_type=client_credentials" \
@@ -169,13 +344,13 @@ EOF
169344# ----------------------------------------------------------------------------
170345# Upload SBOM
171346# ----------------------------------------------------------------------------
172- log " Upload ${PRIVACY} ${OUTPUT} "
347+ log " Upload ${PRIVACY} ${OUTPUT} ... "
173348
174349HTTP_STATUS=$( curl -s -w " %{http_code}" -X POST \
175350 -o " ${TEMPDIR} /upload" \
176351 -H " @${BEARER_TOKEN_FILE} " \
177352 -H " content_type=text/xml" \
178- -F " sbom=@${OUTPUT } " \
353+ -F " sbom=@${PATCHED_OUTPUT } " \
179354 " ${URL} /archivist/v1/sboms?privacy=${PRIVACY} " )
180355
181356if [ " ${HTTP_STATUS} " != " 200" ]
184359 exit 4
185360fi
186361log " Upload success: "
187- ${JQ} " ${TEMPDIR} /upload"
362+ jq . " ${TEMPDIR} /upload"
188363exit 0
0 commit comments