Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
3a45aef
createdisk: remove trailing spaces
kpouget Oct 1, 2025
9b3c177
systemd/*.service: Ensure that crc-env exists before starting
kpouget Oct 1, 2025
c1b4155
systemd/*.service: don't use EnvironmentFile when not needed
kpouget Oct 1, 2025
c634d53
systemd: Improve the handling of the tap networking
kpouget Oct 1, 2025
22320bf
tools.sh: improve the bash syntax of the `generate_htpasswd_file` fun…
kpouget Oct 1, 2025
060329b
crc-systemd-common.sh: improve the bash syntax, rename into wait_for_…
kpouget Oct 1, 2025
6255e67
crc-cluster-status: improve the bash syntax
kpouget Oct 1, 2025
b3d28fd
crc-pullsecret.sh: syntax and reliability improvements
kpouget Oct 1, 2025
e658a37
crc-routes-controller.service: add a condition on user-mode networking
kpouget Oct 1, 2025
efab23c
crc-routes-controller.sh: minor syntax improvements
kpouget Oct 1, 2025
a35018c
crc-wait-apiserver-up.sh: minor syntax improvements
kpouget Oct 1, 2025
f66ea10
dnsmasq.sh.template: minor syntax improvements
kpouget Oct 1, 2025
7864591
ocp-cluster-ca.sh: syntax and reliability improvements
kpouget Oct 1, 2025
d086d67
ocp-clusterid.sh: minor syntax improvements
kpouget Oct 1, 2025
4cfee5d
ocp-custom-domain.sh: syntax and reliability improvements
kpouget Oct 1, 2025
0298bbf
ocp-growfs.sh: syntax improvements
kpouget Oct 1, 2025
fcae494
ocp-mco-sshkey.sh: syntax improvements
kpouget Oct 1, 2025
df91481
ocp-userpasswords.sh: syntax improvements
kpouget Oct 1, 2025
fd191fe
ocp-userpasswords.service: remove unnecessary sleep
kpouget Oct 2, 2025
975315d
Define the KUBECONFIG in the systemd service
kpouget Oct 2, 2025
af25437
systemd: add a synchronization on ocp-wait-apiservices-available
kpouget Oct 2, 2025
5500127
crc-wait-apiserver-up.sh: try more often
kpouget Oct 2, 2025
4408e0a
systemd: add synchronization on crc-wait-node-ready
kpouget Oct 3, 2025
69ae19a
ocp-growfs: remove
kpouget Oct 6, 2025
eddf3e8
crc-aws-fetch-secrets.sh: new script for mapt to fetch the secrets fr…
kpouget Oct 6, 2025
a271d9c
systemd: let systemd enforce that the mandatory secrets files exist
kpouget Oct 6, 2025
7116ea0
systemd: prevent podman from leaking passwords in the journal
kpouget Oct 6, 2025
28535ef
systemd: log the wait durations
kpouget Oct 6, 2025
5636dbf
createdisk-library: add the ability to upload `unit-name.service.d` c…
kpouget Oct 7, 2025
2855386
ovs-configuration.service.d/mute-console.conf: mute the journal logs
kpouget Oct 7, 2025
fe06a1f
crc-pullsecret.service: retry more often
kpouget Oct 7, 2025
5e27e7b
crc-pullsecret.service: only run after cloud-final.service
kpouget Oct 7, 2025
9c159a4
ocp-userpasswords.service: only run after cloud-final.service
kpouget Oct 7, 2025
9b2e043
crc-custom.target: reformulate the dependencies
kpouget Oct 7, 2025
9f8b565
ocp-mco-sshkey.service: ensure that the pubkey is there before starting
kpouget Oct 7, 2025
31b5633
ocp-mco-sshkey.sh: don't expose the pub key to the journal
kpouget Oct 7, 2025
b1ee39f
ocp-custom-domain.service: don't use AssertPathExists
kpouget Oct 14, 2025
9c56363
systemd: remove the dependency on crc-env-file-exists.service
kpouget Oct 15, 2025
3191110
systemd: update the self-sufficient/user-mode tests to avoid relying …
kpouget Oct 15, 2025
a4d8e22
crc-aws-fetch-secrets: try multiple times to get the secrets from the…
kpouget Oct 15, 2025
664f723
systemd: crc-needs-tap.sh: skip self-sufficient/user-mode networking …
kpouget Oct 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion createdisk-library.sh
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,9 @@ function copy_systemd_units() {
${SSH} core@${VM_IP} -- 'mkdir -p /home/core/systemd-units && mkdir -p /home/core/systemd-scripts'
${SCP} systemd/crc-*.service core@${VM_IP}:/home/core/systemd-units/
${SCP} systemd/crc-*.target core@${VM_IP}:/home/core/systemd-units/
${SCP} -r systemd/*.d core@${VM_IP}:/home/core/systemd-units/
${SCP} systemd/crc-*.sh core@${VM_IP}:/home/core/systemd-scripts/
${SCP} systemd/crc-*.py core@${VM_IP}:/home/core/systemd-scripts/

case "${BUNDLE_TYPE}" in
"snc"|"okd")
Expand All @@ -419,7 +421,7 @@ function copy_systemd_units() {
;;
esac

${SSH} core@${VM_IP} -- 'sudo cp /home/core/systemd-units/* /etc/systemd/system/ && sudo cp /home/core/systemd-scripts/* /usr/local/bin/'
${SSH} core@${VM_IP} -- 'sudo cp -r /home/core/systemd-units/* /etc/systemd/system/ && sudo cp /home/core/systemd-scripts/* /usr/local/bin/'
${SSH} core@${VM_IP} -- 'ls /home/core/systemd-scripts/ | xargs -t -I % sudo chmod +x /usr/local/bin/%'
${SSH} core@${VM_IP} -- 'sudo restorecon -rv /usr/local/bin'

Expand Down
6 changes: 5 additions & 1 deletion createdisk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ wait_for_ssh ${VM_NAME} ${VM_IP}
if [ ${BUNDLE_TYPE} != "microshift" ]; then
# Disable kubelet service
${SSH} core@${VM_IP} -- sudo systemctl disable kubelet

# Stop the kubelet service so it will not reprovision the pods
${SSH} core@${VM_IP} -- sudo systemctl stop kubelet
fi
Expand Down Expand Up @@ -109,11 +109,15 @@ ${SSH} core@${VM_IP} 'sudo bash -x -s' <<EOF
[Unit]
Description=gvisor-tap-vsock Network Traffic Forwarder
After=sys-devices-virtual-net-%i.device
After=crc-check-tap.service

[Service]
Restart=on-failure
Environment="GV_VSOCK_PORT=1024"
EnvironmentFile=-/etc/sysconfig/gv-user-network
# if CRC doesn't need tap, mark the unit as 'skipped'
ExecCondition=/usr/local/bin/crc-needs-tap.sh

ExecStart=/usr/libexec/podman/gvforwarder -preexisting -iface %i -url vsock://2:"\\\${GV_VSOCK_PORT}"/connect

[Install]
Expand Down
1 change: 0 additions & 1 deletion docs/self-sufficient-bundle.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ services to do their work.
| `ocp-cluster-ca.service` | ocp | /opt/crc/custom-ca.crt | none |
| `ocp-clusterid.service` | ocp | none | none |
| `ocp-custom-domain.service` | ocp | none | none |
| `ocp-growfs.service` | ocp | none | none |
| `ocp-userpasswords.service` | ocp | /opt/crc/pass_{kubeadmin, developer} | none |

In addition to the above services we have `ocp-cluster-ca.path`, `crc-pullsecret.path` and `ocp-userpasswords.path` that monitors the filesystem paths
Expand Down
128 changes: 128 additions & 0 deletions systemd/crc-aws-fetch-secrets.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#!/bin/bash

set -o pipefail
set -o errexit
set -o nounset
set -o errtrace
set -x

# set -x is safe, the secrets are passed via stdin

AWS_CLI_IMG=docker.io/amazon/aws-cli
MIN_CHAR_COUNT=8 # minimum number of chars for the secret to be
# assumed valid

umask 0077 # 0600 file permission for secrets
install -d -m 0700 /opt/crc # ensure that the target directory exists

PULL_SECRETS_KEY=${1:-}
KUBEADM_PASS_KEY=${2:-}
DEVELOPER_PASS_KEY=${3:-}

if [[ -z "$PULL_SECRETS_KEY" || -z "$KUBEADM_PASS_KEY" || -z "$DEVELOPER_PASS_KEY" ]]; then
echo "ERROR: expected to receive 3 parameters: PULL_SECRETS_KEY KUBEADM_PASS_KEY DEVELOPER_PASS_KEY"
exit 1
fi

DELAY=5
TOTAL_PERIOD=$(( 3*60 ))
ATTEMPTS=$(( TOTAL_PERIOD / DELAY))
function retry_compact() {
for i in $(seq 1 $ATTEMPTS); do
# If the command succeeds (returns 0), exit the function with success.
if "$@"; then
echo "'$*' succeeded after $i attempts "
return 0
fi
echo "'$*' still failing after $i/$ATTEMPTS attempts ..."
sleep "$DELAY"
done
echo "'$*' didn't succeed after $i attempt ..."
# If the loop finishes, the command never succeeded.
return 1
}

cleanup() {
rm -f /tmp/aws-region /opt/crc/pull-secret.tmp /opt/crc/pass_kubeadmin.tmp /opt/crc/pass_developer.tmp
echo "Temp files cleanup complete."
}

# Cleanup happens automatically via trap on error or at script end
trap cleanup ERR EXIT

SECONDS=0
podman pull --quiet "$AWS_CLI_IMG"
echo "Took $SECONDS seconds to pull the $AWS_CLI_IMG"

check_imds_available_and_get_region() {
IMDS_TOKEN_COMMAND=(
curl
--connect-timeout 1
-X PUT
"http://169.254.169.254/latest/api/token"
-H "X-aws-ec2-metadata-token-ttl-seconds: 21600"
-Ssf
)

if ! TOKEN=$("${IMDS_TOKEN_COMMAND[@]}"); then
echo "Couldn't fetch the token..." >&2
return 1
fi

# Then, use the token to get the region
echo "Fetching the AWS region ..."
curl -Ssf -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/placement/region > /tmp/aws-region
echo >> /tmp/aws-region # add EOL at EOF, for consistency
echo "AWS region: $(< /tmp/aws-region)"
}

(
set +x # disable the xtrace as the token would be leaked
echo "Waiting for the AWS IMDS service to be available ..."
SECONDS=0
retry_compact check_imds_available_and_get_region
echo "Took $SECONDS for the IMDS service to become available."
)

save_secret() {
name=$1
key=$2
dest=$3

# --log-driver=none avoids that the journal captures the stdout
# logs of podman and leaks the passwords in the journal ...
if ! podman run \
--name "cloud-init-fetch-$name" \
--env AWS_REGION="$(< /tmp/aws-region)" \
--log-driver=none \
--rm \
"$AWS_CLI_IMG" \
ssm get-parameter \
--name "$key" \
--with-decryption \
--query "Parameter.Value" \
--output text \
> "${dest}.tmp"
then
rm -f "${dest}.tmp"
echo "ERROR: failed to get the '$name' secret ... (fetched from $key)"
return 1
fi
char_count=$(wc -c < "${dest}.tmp")
if (( char_count < MIN_CHAR_COUNT )); then
echo "ERROR: the content of the '$name' secret is too short ... (fetched from $key)"
rm -f "${dest}.tmp"
return 1
fi

mv "${dest}.tmp" "${dest}" # atomic creation of the file

return 0
}

# execution will abort if 'retry_compact' fails.
retry_compact save_secret "pull-secrets" "$PULL_SECRETS_KEY" /opt/crc/pull-secret
retry_compact save_secret "kubeadmin-pass" "$KUBEADM_PASS_KEY" /opt/crc/pass_kubeadmin
retry_compact save_secret "developer-pass" "$DEVELOPER_PASS_KEY" /opt/crc/pass_developer

exit 0
14 changes: 14 additions & 0 deletions systemd/crc-check-tap.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[Unit]
Description=Ensure that tap0 network configuration is disabled when not necessary
Before=NetworkManager.service
[email protected]
After=local-fs.target
RequiresMountsFor=/etc/NetworkManager/system-connections

[Service]
Type=oneshot
ExecStart=/usr/local/bin/crc-conditionally-disable-tap.sh

[Install]
WantedBy=NetworkManager.service
[email protected]
4 changes: 3 additions & 1 deletion systemd/crc-cluster-status.service
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@ After=crc-wait-apiserver-up.service crc-pullsecret.service
After=ocp-mco-sshkey.service ocp-cluster-ca.service
After=ocp-custom-domain.service ocp-userpasswords.service
After=ocp-clusterid.service
After=ocp-wait-apiservices-available.service
After=crc-wait-node-ready.service
StartLimitIntervalSec=450
StartLimitBurst=10

[Service]
Type=oneshot
Restart=on-failure
RestartSec=40
EnvironmentFile=-/etc/sysconfig/crc-env
Environment=KUBECONFIG=/opt/kubeconfig
ExecCondition=/usr/local/bin/crc-self-sufficient-env.sh
ExecStart=/usr/local/bin/crc-cluster-status.sh
RemainAfterExit=true
Expand Down
47 changes: 36 additions & 11 deletions systemd/crc-cluster-status.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ set -o nounset
set -o errtrace
set -x

export KUBECONFIG=/opt/kubeconfig
MAXIMUM_LOGIN_RETRY=10
RETRY_DELAY=5

if [ ! -f /opt/crc/pass_kubeadmin ]; then
echo "kubeadmin password file not found"
Expand All @@ -15,25 +16,49 @@ fi

rm -rf /tmp/.crc-cluster-ready

SECONDS=0
if ! oc adm wait-for-stable-cluster --minimum-stable-period=1m --timeout=10m; then
exit 1
fi

echo "Cluster took $SECONDS seconds to stabilize."

echo "Logging into OpenShift with kubeadmin user to update $KUBECONFIG"
COUNTER=1
MAXIMUM_LOGIN_RETRY=10
echo "Logging into OpenShift with kubeadmin user to update the KUBECONFIG"

try_login() {
( # use a `(set +x)` subshell to avoid leaking the password
set +x
set +e # don't abort on error in this subshell
oc login --insecure-skip-tls-verify=true \
-u kubeadmin \
-p "$(cat /opt/crc/pass_kubeadmin)" \
https://api.crc.testing:6443 > /dev/null 2>&1
)
local success="$?"
if [[ "$success" == 0 ]]; then
echo "Login succeeded"
else
echo "Login did not complete ..."
fi

# use a `(set +x)` subshell to avoid leaking the password
until (set +x ; oc login --insecure-skip-tls-verify=true -u kubeadmin -p "$(cat /opt/crc/pass_kubeadmin)" https://api.crc.testing:6443 > /dev/null 2>&1); do
if [ "$COUNTER" -ge "$MAXIMUM_LOGIN_RETRY" ]; then
echo "Unable to login to the cluster..., authentication failed."
return "$success"
}
Comment on lines +28 to +45
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Pass --kubeconfig to oc login to ensure correct target file.

Without the --kubeconfig flag, oc login defaults to writing credentials to ~/.kube/config instead of the intended /opt/kubeconfig (set via the KUBECONFIG environment variable by the service unit). While the environment variable affects oc CLI behavior in some contexts, oc login specifically requires the flag to target the correct file.

Apply this diff:

-        oc login --insecure-skip-tls-verify=true \
+        oc login --kubeconfig "$KUBECONFIG" --insecure-skip-tls-verify=true \
            -u kubeadmin \
            -p "$(cat /opt/crc/pass_kubeadmin)" \
            https://api.crc.testing:6443 > /dev/null 2>&1

Additionally, add a guard at the script start to ensure KUBECONFIG is set:

# After line 10, before the password file check
: "${KUBECONFIG:?KUBECONFIG must be set}"
🤖 Prompt for AI Agents
In systemd/crc-cluster-status.sh around lines 28 to 45, the oc login call omits
--kubeconfig so credentials may be written to the default ~/.kube/config instead
of the intended file; update the oc login invocation to pass --kubeconfig
"$KUBECONFIG" and keep the rest of the options and redirection, and also add a
guard after line 10 to ensure KUBECONFIG is set (fail early if unset) by
inserting a parameter expansion check like : "${KUBECONFIG:?KUBECONFIG must be
set}" before the password file validation.


for ((counter=1; counter<=MAXIMUM_LOGIN_RETRY; counter++)); do
echo "Login attempt $counter/$MAXIMUM_LOGIN_RETRY…"
if try_login; then
break
fi
if (( counter == MAXIMUM_LOGIN_RETRY )); then
echo "Unable to login to the cluster after $counter attempts; authentication failed."
exit 1
fi
echo "Logging into OpenShift with updated credentials try $COUNTER, hang on...."
sleep 5
((COUNTER++))
sleep "$RETRY_DELAY"
done

# need to set a marker to let `crc` know the cluster is ready
touch /tmp/.crc-cluster-ready

echo "All done after $SECONDS seconds "

exit 0
17 changes: 17 additions & 0 deletions systemd/crc-conditionally-disable-tap.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

set -o pipefail
set -o errexit
set -o nounset
set -o errtrace
set -x

# Nothing to do here if CRC needs the TAP interface
if /usr/local/bin/crc-needs-tap.sh; then
echo "TAP device is required, doing nothing."
exit 0
fi

echo "TAP device not required, running disable script..."

exec /usr/local/bin/crc-disable-tap.sh
7 changes: 4 additions & 3 deletions systemd/crc-custom.target
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[Unit]
Description=crc custom target
Requires=kubelet-dependencies.target
After=kubelet-dependencies.target
Description=CRC custom target
Requires=crc-wait-apiserver-up.service
Requires=crc-cluster-status.service
After=crc-wait-apiserver-up.service crc-cluster-status.service
14 changes: 14 additions & 0 deletions systemd/crc-disable-tap.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

set -o pipefail
set -o errexit
set -o nounset
set -o errtrace
set -x

echo "Disabling the tap0 network configuration ..."

rm -f /etc/NetworkManager/system-connections/tap0.nmconnection
systemctl disable --now [email protected] || true

exit 0
1 change: 0 additions & 1 deletion systemd/crc-dnsmasq.service
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ StartLimitIntervalSec=30
[Service]
Type=oneshot
Restart=on-failure
EnvironmentFile=-/etc/sysconfig/crc-env
ExecStartPre=/bin/systemctl start ovs-configuration.service
ExecCondition=/usr/local/bin/crc-self-sufficient-env.sh
ExecStart=/usr/local/bin/crc-dnsmasq.sh
Expand Down
45 changes: 45 additions & 0 deletions systemd/crc-needs-tap.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash

set -o pipefail
set -o errexit
set -o nounset
set -o errtrace
set -x

source /etc/sysconfig/crc-env || echo "WARNING: crc-env not found"

EXIT_NEED_TAP=0
EXIT_DONT_NEED_TAP=77
EXIT_ERROR=1

if /usr/local/bin/crc-self-sufficient-env.sh; then
echo "Running a self-sufficient bundle. Don't need tap0"
exit "$EXIT_DONT_NEED_TAP"
fi

if /usr/local/bin/crc-user-mode-networking.sh system; then
echo "Running with CRC and system-mode networking. Don't need tap0. (Fairly rare case.)"
exit "$EXIT_DONT_NEED_TAP"
fi

# running with CRC (not a self-sufficient bundle)
# running with user-mode networking
# --> vfkit doesn't need tap0
# --> other platforms do need it

virt="$(systemd-detect-virt || true)"

case "${virt}" in
apple)
echo "Running with vfkit ($virt) virtualization. Don't need tap0."
exit "$EXIT_DONT_NEED_TAP"
;;
none)
echo "Bare metal detected. Don't need tap0."
exit "$EXIT_DONT_NEED_TAP"
;;
*)
echo "Running with '$virt' virtualization. Need tap0."
exit "$EXIT_NEED_TAP"
;;
esac
13 changes: 0 additions & 13 deletions systemd/crc-no-tap.service

This file was deleted.

Loading