Skip to content

Commit 4ecb604

Browse files
committed
Upload SBOM from jar file
Problem: Deriving SBOMS from jar files requires extra steps and extra consolidation of internal fields. Solution: If a jar URL then fetch and execute syft with 'file:' qualifier. Add conditional mods to fields in generated sbom to comply with NTIA requirements. Signed-off-by: Paul Hewlett <[email protected]>
1 parent 2766f45 commit 4ecb604

File tree

1 file changed

+126
-86
lines changed

1 file changed

+126
-86
lines changed

scripts/sbom_scraper.sh

Lines changed: 126 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -41,39 +41,27 @@ log() {
4141
# ----------------------------------------------------------------------------
4242
# Option parsing
4343
# ----------------------------------------------------------------------------
44+
TOOL_NAME="https://github.com/jitsuin-inc/archivist-shell $SCRIPTNAME"
45+
#
46+
# Set this value just before release
47+
TOOL_VERSION="v0.3.5"
48+
TOOL_VENDOR="Jitsuin Inc"
49+
TOOL_HASH_ALG=SHA-256
50+
TOOL_HASH_CONTENT=$(shasum -a 256 "$0" | cut -d' ' -f1)
4451

45-
# Prepare defaults
46-
pushd "$SCRIPTDIR" > /dev/null
47-
if type git > /dev/null 2>&1 && git rev-parse --git-dir > /dev/null 2>&1
48-
then
49-
# we are in a git repo so set defaults using git
50-
GIT_STATUS=$(git status --porcelain)
51-
52-
AUTHOR_NAME="$(git config user.name || echo "$USER")"
53-
AUTHOR_EMAIL="$(git config user.email || true)"
54-
TOOL_NAME="$(git config --get remote.origin.url) $(git ls-files --full-name "$SCRIPTNAME")"
55-
TOOL_VERSION=$(git describe --tags)${GIT_STATUS:++}
56-
else
57-
AUTHOR_NAME="$USER"
58-
AUTHOR_EMAIL=""
59-
TOOL_NAME="$SCRIPTNAME"
60-
TOOL_VERSION="unknown"
61-
fi
62-
popd > /dev/null
52+
DEFAULT_AUTHOR_NAME="$USER"
53+
AUTHOR_NAME="$DEFAULT_AUTHOR_NAME"
54+
AUTHOR_EMAIL=""
6355

6456
FORMAT=cyclonedx
65-
COMPONENT_AUTHOR_NAME="$AUTHOR_NAME"
66-
SUPPLIER_NAME=dockerhub
67-
SUPPLIER_URL=https://hub.docker.com
68-
TOOL_VENDOR="Jitsuin Inc"
69-
TOOL_HASH_ALG=SHA-256
57+
COMPONENT_AUTHOR_NAME="$DEFAULT_AUTHOR_NAME"
7058
SBOM_UPLOAD_TIMEOUT=10
7159
# shellcheck disable=SC2002
72-
TOOL_HASH_CONTENT=$(shasum -a 256 "$0" | cut -d' ' -f1)
7360
# credentials directory should have 0700 permissions
7461
CLIENTSECRET_FILE=$SCRIPTDIR/../credentials/client_secret
7562
SBOM=false
7663
PRIVACY=PUBLIC
64+
JARFILE=false
7765

7866
URL=https://app.rkvst.io
7967

@@ -82,7 +70,7 @@ usage() {
8270
8371
Create a Cyclone DX 1.2 XML SBOM from a docker image and upload to RKVST SBOM Hub
8472
85-
Usage: $SCRIPTNAME [-a AUTHOR_NAME] [-A AUTHOR_NAME] [-c CLIENT_SECRET_FILE] [-e AUTHOR_EMAIL] [-s] [-p] [-u URL] CLIENT_ID [docker-image:tag|sbom file]
73+
Usage: $SCRIPTNAME [-a AUTHOR_NAME] [-A COMPONENT_AUTHOR] [-c CLIENT_SECRET_FILE] [-e AUTHOR_EMAIL] [-sp] [-u URL] CLIENT_ID [docker-image:tag|sbom file|jar URL]
8674
8775
-a AUTHOR name of the author of the SBOM. Default ($AUTHOR_NAME)
8876
-A COMPONENT_AUTHOR name of the author and publisher of the docker image. Default ($COMPONENT_AUTHOR_NAME)
@@ -97,6 +85,7 @@ Examples:
9785
9886
$0 29b48af4-45ca-465b-b136-206674f8aa9b ubuntu:21.10
9987
$0 -s 29b48af4-45ca-465b-b136-206674f8aa9b ./my-sbom.xml
88+
$0 -s 29b48af4-45ca-465b-b136-206674f8aa9b https://repo1.maven.org/maven2/org/assertj/assertj-core/1.0.0/assertj-core-1.0.0.jar
10089
10190
EOF
10291

@@ -126,15 +115,30 @@ while getopts "a:A:c:e:hpsu:" o; do
126115
done
127116
shift $((OPTIND-1))
128117

129-
[ $# -lt 1 ] && usage
118+
[ $# -lt 1 ] && echo "No client id specified" && usage
130119
CLIENT_ID=$1
131120
shift 1
132-
[ $# -lt 1 ] && usage
133-
DOCKER_IMAGE=$1
121+
[ $# -lt 1 ] && echo "No source specified" && usage
122+
SOURCE=$1
134123
shift 1
135124

136-
[ $# -ge 1 ] && usage
125+
[ $# -ge 1 ] && echo "Spurious positional arguments specified" && usage
137126

127+
if [ "${COMPONENT_AUTHOR_NAME}" = "${DEFAULT_AUTHOR_NAME}" ]
128+
then
129+
COMPONENT_AUTHOR_NAME="${AUTHOR_NAME}"
130+
fi
131+
132+
EXT=$(echo "${SOURCE}" | rev | cut -d '.' -f1 | rev | tr '[:upper:]' '[:lower:]')
133+
if [ "$EXT" = "jar" ]
134+
then
135+
JARFILE=true
136+
JARTYPE=$(echo "${SOURCE}" | cut -d':' -f1)
137+
if [ "${JARTYPE}" != "https" ]
138+
then
139+
echo "Jar file must be specified with https URL" && usage
140+
fi
141+
fi
138142
# ----------------------------------------------------------------------------
139143
# Setup exit handling and temporary directory
140144
# ----------------------------------------------------------------------------
@@ -148,7 +152,7 @@ function finalise {
148152
}
149153
trap finalise EXIT INT TERM
150154

151-
OUTFILE=$(echo "${DOCKER_IMAGE}" | tr '/:' '-').${FORMAT}.sbom
155+
OUTFILE=$(echo "${SOURCE}" | tr '/:' '-').${FORMAT}.sbom
152156

153157
# ----------------------------------------------------------------------------
154158
# Extract client secrets
@@ -160,59 +164,89 @@ then
160164
fi
161165
SECRET=$(cat "${CLIENTSECRET_FILE}")
162166

167+
163168
# ----------------------------------------------------------------------------
164-
# Extract SBOM
169+
# Deal with jar files - the argument should be of form
170+
# https://repo1.maven.org/maven2/org/assertj/assertj-core/1.0.0/assertj-core-1.0.0.jar
165171
# ----------------------------------------------------------------------------
166-
if [ "${SBOM}" = "false" ]
172+
if [ "${JARFILE}" = "true" ]
167173
then
168-
log "Scrape ${FORMAT} SBOM from ${DOCKER_IMAGE} to ${OUTFILE} ..."
174+
WORKDIR="${TEMPDIR}/jarfile"
175+
mkdir "${WORKDIR}"
176+
SUPPLIER_NAME=$(echo "${SOURCE}" | cut -d'/' -f4)
177+
SUPPLIER_URL=$(echo "${SOURCE}" | cut -d'/' -f1-4)
178+
(cd "${WORKDIR}" && curl -sSO "${SOURCE}")
179+
pushd "${WORKDIR}" > /dev/null
180+
INPUT=$(ls)
181+
OUTFILE=${INPUT}.${FORMAT}.sbom
169182
OUTPUT="${TEMPDIR}/${OUTFILE}"
170-
syft -q packages -o "${FORMAT}" "${DOCKER_IMAGE}"> "${OUTPUT}"
183+
syft -q packages --scope all-layers -o "${FORMAT}" "file:${INPUT}" > "${OUTPUT}"
184+
popd > /dev/null
185+
186+
COMPONENT_NAME=$(xq -r .bom.metadata.component.name "$OUTPUT")
187+
COMPONENT_VERSION=$(xq -r .bom.components.component.version "${OUTPUT}")
188+
ORIG_COMPONENT_NAME="${COMPONENT_NAME}"
189+
ORIG_COMPONENT_VERSION="${COMPONENT_VERSION}"
190+
COMPONENT_HASH_ALG=
191+
COMPONENT_HASH_CONTENT=
171192
else
172-
OUTPUT="${DOCKER_IMAGE}"
173-
fi
174-
175193
# ----------------------------------------------------------------------------
176-
# Update SBOM including NTIA minimum elments
194+
# Deal with dockerfiles - assume that raw sbom files originally came from
195+
# docker image
177196
# ----------------------------------------------------------------------------
178-
ORIG_COMPONENT_NAME=$(xq -r .bom.metadata.component.name "$OUTPUT")
179-
ORIG_COMPONENT_VERSION=$(xq -r .bom.metadata.component.version "$OUTPUT")
180-
COMPONENT_NAME=${ORIG_COMPONENT_NAME%%:*}
181-
COMPONENT_VERSION=${ORIG_COMPONENT_NAME##*:}
182-
HASH_ALG="${ORIG_COMPONENT_VERSION%%:*}"
183-
case ${HASH_ALG^^} in
184-
SHA256) COMPONENT_HASH_ALG="SHA-256"
185-
;;
186-
*) echo >&2 "Unknonwn hash algorithm $HASH_ALG"
187-
esac
188-
COMPONENT_HASH_CONTENT="${ORIG_COMPONENT_VERSION##*:}"
189-
190-
echo "metadata:"
191-
echo " tools:"
192-
echo " tool:"
193-
echo " vendor: $TOOL_VENDOR"
194-
echo " name: $TOOL_NAME"
195-
echo " version: $TOOL_VERSION"
196-
echo " hashes:"
197-
echo " hash:"
198-
echo " alg: $TOOL_HASH_ALG"
199-
echo " content: $TOOL_HASH_CONTENT"
200-
echo " authors:"
201-
echo " author:"
202-
echo " name: $AUTHOR_NAME"
203-
echo " email: $AUTHOR_EMAIL"
204-
echo " component:"
205-
echo " supplier:"
206-
echo " name: $SUPPLIER_NAME"
207-
echo " url: $SUPPLIER_URL"
208-
echo " author: $COMPONENT_AUTHOR_NAME"
209-
echo " publisher: $COMPONENT_AUTHOR_NAME"
210-
echo " name: $ORIG_COMPONENT_NAME -> $COMPONENT_NAME"
211-
echo " version: $ORIG_COMPONENT_VERSION -> $COMPONENT_VERSION"
212-
echo " hashes:"
213-
echo " hash:"
214-
echo " alg: $COMPONENT_HASH_ALG"
215-
echo " content: $COMPONENT_HASH_CONTENT"
197+
SUPPLIER_NAME=dockerhub
198+
SUPPLIER_URL=https://hub.docker.com
199+
if [ "${SBOM}" = "false" ]
200+
then
201+
log "Scrape ${FORMAT} SBOM from ${SOURCE} to ${OUTFILE} ..."
202+
OUTPUT="${TEMPDIR}/${OUTFILE}"
203+
syft -q packages --scope all-layers -o "${FORMAT}" "${SOURCE}"> "${OUTPUT}"
204+
else
205+
OUTPUT="${SOURCE}"
206+
fi
207+
208+
ORIG_COMPONENT_NAME=$(xq -r .bom.metadata.component.name "$OUTPUT")
209+
ORIG_COMPONENT_VERSION=$(xq -r .bom.metadata.component.version "$OUTPUT")
210+
COMPONENT_NAME=${ORIG_COMPONENT_NAME%%:*}
211+
COMPONENT_VERSION=${ORIG_COMPONENT_NAME##*:}
212+
HASH_ALG="${ORIG_COMPONENT_VERSION%%:*}"
213+
case ${HASH_ALG^^} in
214+
SHA256) COMPONENT_HASH_ALG="SHA-256"
215+
;;
216+
*) echo >&2 "Unknown hash algorithm $HASH_ALG"
217+
;;
218+
esac
219+
COMPONENT_HASH_CONTENT="${ORIG_COMPONENT_VERSION##*:}"
220+
fi
221+
222+
cat >&1 <<EOF
223+
metadata:
224+
tools:
225+
tool:
226+
vendor: $TOOL_VENDOR
227+
name: $TOOL_NAME
228+
version: $TOOL_VERSION
229+
hashes:
230+
hash:
231+
alg: $TOOL_HASH_ALG
232+
content: $TOOL_HASH_CONTENT
233+
authors:
234+
author:
235+
name: $AUTHOR_NAME
236+
email: $AUTHOR_EMAIL
237+
component:
238+
supplier:
239+
name: $SUPPLIER_NAME
240+
url: $SUPPLIER_URL
241+
author: $COMPONENT_AUTHOR_NAME
242+
publisher: $COMPONENT_AUTHOR_NAME
243+
name: $ORIG_COMPONENT_NAME -> $COMPONENT_NAME
244+
version: $ORIG_COMPONENT_VERSION -> $COMPONENT_VERSION
245+
hashes:
246+
hash:
247+
alg: $COMPONENT_HASH_ALG
248+
content: $COMPONENT_HASH_CONTENT
249+
EOF
216250

217251
[ -z "$TOOL_VENDOR" ] && echo >&2 "Unable to determine SBOM tool vendor" && exit 1
218252
[ -z "$TOOL_NAME" ] && echo >&2 "Unable to determine SBOM tool name" && exit 1
@@ -223,10 +257,12 @@ echo " content: $COMPONENT_HASH_CONTENT"
223257
[ -z "$SUPPLIER_URL" ] && echo >&2 "Unable to determine component supplier url" && exit 1
224258
[ -z "$COMPONENT_AUTHOR_NAME" ] && echo >&2 "Unable to determine component author name" && exit 1
225259
[ -z "$COMPONENT_NAME" ] && echo >&2 "Unable to determine component name" && exit 1
226-
[ -z "$COMPONENT_VERSION" ] && echo >&2 "Unable to determine component version" && exit 1
227-
[ -z "$COMPONENT_HASH_ALG" ] && echo >&2 "Unable to determine component hash algorithm" && exit 1
228-
[ -z "$COMPONENT_HASH_CONTENT" ] && echo >&2 "Unable to determine component hash content" && exit 1
229260

261+
if [ -z "$COMPONENT_VERSION" ]
262+
then
263+
[ -z "$COMPONENT_HASH_ALG" ] && echo >&2 "Unable to determine component version or hash algorithm" && exit 1
264+
[ -z "$COMPONENT_HASH_CONTENT" ] && echo >&2 "Unable to determine component hash content" && exit 1
265+
fi
230266
PATCHED_OUTPUT="${OUTPUT}.patched"
231267

232268
python3 <(cat <<END
@@ -294,14 +330,18 @@ author.text = '$COMPONENT_AUTHOR_NAME'
294330
295331
# Update component name and version
296332
component.find('name', ns).text = '$COMPONENT_NAME'
297-
component.find('version', ns).text = '$COMPONENT_VERSION'
333+
component_version = '$COMPONENT_VERSION'
334+
if component_version:
335+
component.find('version', ns).text = component_version
298336
299337
# Update component hash
300-
hashes = component.find('hashes', ns)
301-
if not hashes:
302-
hashes = ET.SubElement(component, 'hashes')
303-
hash = ET.SubElement(hashes, 'hash', alg='${COMPONENT_HASH_ALG}')
304-
hash.text = '$COMPONENT_HASH_CONTENT'
338+
component_hash_alg = '${COMPONENT_HASH_ALG}'
339+
if component_hash_alg:
340+
hashes = component.find('hashes', ns)
341+
if not hashes:
342+
hashes = ET.SubElement(component, 'hashes')
343+
hash = ET.SubElement(hashes, 'hash', alg=component_hash_alg)
344+
hash.text = '$COMPONENT_HASH_CONTENT'
305345
306346
# Add component supplier
307347
supplier = component.find('supplier', ns)
@@ -328,7 +368,7 @@ END
328368
# Check that the patched SBOM is valid against the cyclonedx schema
329369
# ----------------------------------------------------------------------------
330370
[ -f "$SCRIPTDIR"/spdx.xsd ] || curl -fsS -o "$SCRIPTDIR"/spdx.xsd https://cyclonedx.org/schema/spdx
331-
[ -f "$SCRIPTDIR"/cyclonedx.xsd ] || curl -fsS -o "$SCRIPTDIR"/cyclonedx.xsd https://cyclonedx.org/schema/bom/1.2
371+
[ -f "$SCRIPTDIR"/cyclonedx.xsd ] || curl -fsS -o "$SCRIPTDIR"/cyclonedx.xsd https://cyclonedx.org/schema/bom/1.3
332372

333373
# xmllint complains about a double import of the spdx schema, but we have to import via
334374
# the wrapper to set the schema location to a local file, as xmllint fails to download

0 commit comments

Comments
 (0)