Skip to content

feature: add solana explorer, readiness probe, checks for proper start #725

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 4 additions & 43 deletions starship/charts/devnet/scripts/solana/bootstrap-validator.sh
Original file line number Diff line number Diff line change
Expand Up @@ -150,12 +150,14 @@ fi

args+=(
--ledger "$ledger_dir"
--bind-address 0.0.0.0
--rpc-bind-address 0.0.0.0
--rpc-port 8899
--snapshot-interval-slots 200
--no-incremental-snapshots
--identity "$identity"
--vote-account "$vote_account"
--rpc-faucet-address 127.0.0.1:9900
--rpc-faucet-address 0.0.0.0:9900
--no-poh-speed-test
--no-os-network-limits-test
--no-wait-for-vote-to-start-leader
Expand All @@ -165,45 +167,4 @@ args+=(
default_arg --gossip-port 8001
default_arg --log -


pid=
kill_node() {
# Note: do not echo anything from this function to ensure $pid is actually
# killed when stdout/stderr are redirected
set +ex
if [[ -n $pid ]]; then
declare _pid=$pid
pid=
kill "$_pid" || true
wait "$_pid" || true
fi
}

kill_node_and_exit() {
kill_node
exit
}

trap 'kill_node_and_exit' INT TERM ERR

while true; do
echo "$program ${args[*]}"
$program "${args[@]}" &
pid=$!
echo "pid: $pid"

if ((no_restart)); then
wait "$pid"
exit $?
fi

while true; do
if [[ -z $pid ]] || ! kill -0 "$pid"; then
echo "############## validator exited, restarting ##############"
break
fi
sleep 1
done

kill_node
done
$program "${args[@]}"
96 changes: 96 additions & 0 deletions starship/charts/devnet/scripts/solana/chain-rpc-ready.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/bin/bash
# chain-rpc-ready.sh - Check if a Solana RPC service is ready
# Usage: chain-rpc-ready.sh [RPC_URL]

set -euo pipefail

RPC_URL=${1:-"http://localhost:8899"}

echo 1>&2 "Checking if Solana RPC at $RPC_URL is ready..."

# Check if the RPC URL is reachable
json=$(curl -s --connect-timeout 2 -X POST -H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":1,"method":"getHealth"}' \
"$RPC_URL")

# Check if we got a valid response
if [ $? -ne 0 ]; then
echo 1>&2 "$RPC_URL is not reachable"
exit 1
fi

# Check if the response contains an error
if echo "$json" | jq -e '.error' > /dev/null 2>&1; then
echo 1>&2 "$RPC_URL returned an error: $(echo "$json" | jq -r '.error.message // "Unknown error"')"
exit 1
fi

# Check if the health status is ok
health_status=$(echo "$json" | jq -r '.result // "unknown"')
if [ "$health_status" != "ok" ]; then
echo 1>&2 "$RPC_URL is not healthy: status is $health_status"
exit 1
fi

# Get slot info to check if the node is processing blocks
slot_info=$(curl -s --connect-timeout 2 -X POST -H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":1,"method":"getSlot"}' \
"$RPC_URL")

# Check if we can get slot info
if [ $? -ne 0 ]; then
echo 1>&2 "$RPC_URL slot info not available"
exit 1
fi

# Check if slot is progressing (not stuck at 0)
current_slot=$(echo "$slot_info" | jq -r '.result // 0')
if [ "$current_slot" -eq 0 ]; then
echo 1>&2 "$RPC_URL is not ready: slot is 0 (node may not be synced)"
exit 1
fi

# Get epoch info to check if the node is properly synced
epoch_info=$(curl -s --connect-timeout 2 -X POST -H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":1,"method":"getEpochInfo"}' \
"$RPC_URL")

# Check if we can get epoch info
if [ $? -ne 0 ]; then
echo 1>&2 "$RPC_URL epoch info not available"
exit 1
fi

# Check if epoch info is valid
if echo "$epoch_info" | jq -e '.error' > /dev/null 2>&1; then
echo 1>&2 "$RPC_URL epoch info error: $(echo "$epoch_info" | jq -r '.error.message // "Unknown error"')"
exit 1
fi

# Get cluster nodes to check if the node is part of the network
cluster_info=$(curl -s --connect-timeout 2 -X POST -H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":1,"method":"getClusterNodes"}' \
"$RPC_URL")

# Check if we can get cluster info
if [ $? -ne 0 ]; then
echo 1>&2 "$RPC_URL cluster info not available"
exit 1
fi

# Check if cluster info is valid
if echo "$cluster_info" | jq -e '.error' > /dev/null 2>&1; then
echo 1>&2 "$RPC_URL cluster info error: $(echo "$cluster_info" | jq -r '.error.message // "Unknown error"')"
exit 1
fi

# Check if there are nodes in the cluster
node_count=$(echo "$cluster_info" | jq -r '.result | length // 0')
if [ "$node_count" -eq 0 ]; then
echo 1>&2 "$RPC_URL is not ready: no nodes in cluster"
exit 1
fi

echo 1>&2 "Solana RPC at $RPC_URL is ready and healthy"
echo "Health: $health_status, Slot: $current_slot, Cluster Nodes: $node_count"
exit 0
7 changes: 6 additions & 1 deletion starship/charts/devnet/scripts/solana/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,12 @@ solana_program() {
printf "solana"
else
if [[ $program == "validator" || $program == "ledger-tool" || $program == "watchtower" || $program == "install" ]]; then
printf "agave-%s" "$program"
# Check if agave- prefixed binary exists
if command -v "agave-$program" >/dev/null 2>&1; then
printf "agave-%s" "$program"
else
printf "solana-%s" "$program"
fi
else
printf "solana-%s" "$program"
fi
Expand Down
41 changes: 41 additions & 0 deletions starship/charts/devnet/templates/chains/solana/genesis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
{{- if eq $chain.name "solana" }}
{{ $defaultFile := $.Files.Get "defaults.yaml" | fromYaml }}
{{ $chain := include "devnet.fullchain" (dict "name" $chain.id "file" $defaultFile "context" $) | fromJson }}
{{ $dataPublicRPC := dict "chain" $chain.id "port" "'8899'" }}
---
apiVersion: apps/v1
kind: StatefulSet
Expand Down Expand Up @@ -37,6 +38,8 @@ spec:
env:
{{- include "devnet.defaultEvnVars" $chain | indent 12 }}
{{- include "devnet.evnVars" $chain | indent 12 }}
{{- include "devnet.timeoutVars" $.Values | indent 12 }}
{{- include "devnet.genesisVars" $dataPublicRPC | indent 12 }}
- name: KEYS_CONFIG
value: /configs/keys.json
- name: FAUCET_ENABLED
Expand All @@ -59,6 +62,9 @@ spec:

echo "Running setup genesis script..."
bash -e {{ $chain.home }}/scripts/setup.sh

echo "Set config"
solana config set --url localhost
resources: {{- include "devnet.node.resources" ( dict "node" $chain "context" $ ) | trim | nindent 12 }}
volumeMounts:
- mountPath: {{ $chain.home }}
Expand All @@ -72,6 +78,8 @@ spec:
env:
{{- include "devnet.defaultEvnVars" $chain | indent 12 }}
{{- include "devnet.evnVars" $chain | indent 12 }}
{{- include "devnet.timeoutVars" $.Values | indent 12 }}
{{- include "devnet.genesisVars" $dataPublicRPC | indent 12 }}
- name: FAUCET_ENABLED
value: "{{ $chain.faucet.enabled }}"
- name: SOLANA_CONFIG_DIR
Expand All @@ -92,6 +100,39 @@ spec:
name: node
- mountPath: /scripts
name: scripts
startupProbe:
exec:
command:
- bash
- -e
- /scripts/chain-rpc-ready.sh
- http://0.0.0.0:8899
initialDelaySeconds: 10
periodSeconds: 30
timeoutSeconds: 15
failureThreshold: 10
readinessProbe:
exec:
command:
- bash
- -e
- /scripts/chain-rpc-ready.sh
- http://0.0.0.0:8899
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 15
failureThreshold: 3
livenessProbe:
exec:
command:
- bash
- -e
- /scripts/chain-rpc-ready.sh
- http://0.0.0.0:8899
initialDelaySeconds: 60
periodSeconds: 30
timeoutSeconds: 15
failureThreshold: 5
volumes:
- name: node
emptyDir: { }
Expand Down
15 changes: 15 additions & 0 deletions starship/scripts/port-forward.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ CHAIN_GRPCWEB_PORT=9091
CHAIN_LCD_PORT=1317
CHAIN_EXPOSER_PORT=8081
CHAIN_FAUCET_PORT=8000
SOLANA_RPC_PORT=8899
SOLANA_WS_PORT=8900
SOLANA_FAUCET_PORT=9900
ETHEREUM_REST_PORT=8545
ETHEREUM_RPC_PORT=8551
RELAYER_REST_PORT=3000
Expand Down Expand Up @@ -73,6 +76,18 @@ if [[ $num_chains -gt -1 ]]; then
color yellow " Forwarding RPC: http://localhost:$localrpc"
kubectl port-forward pods/$chain_name-$chain-0 $localrest:$ETHEREUM_REST_PORT > /dev/null 2>&1 &
kubectl port-forward pods/$chain_name-$chain-0 $localrpc:$ETHEREUM_RPC_PORT > /dev/null 2>&1 &
elif [[ "$chain_name" == *"solana"* ]]; then
localrpc=$SOLANA_RPC_PORT
localws=$SOLANA_WS_PORT
localfaucet=$SOLANA_FAUCET_PORT
color yellow "Solana chain detected: $chain"
color yellow " Forwarding RPC: http://localhost:$localrpc"
color yellow " Forwarding WS: http://localhost:$localws"
color yellow " Forwarding Faucet: http://localhost:$localfaucet"
kubectl port-forward pods/$chain_name-$chain-0 $localrpc:$SOLANA_RPC_PORT > /dev/null 2>&1 &
kubectl port-forward pods/$chain_name-$chain-0 $localws:$SOLANA_WS_PORT > /dev/null 2>&1 &
kubectl port-forward pods/$chain_name-$chain-0 $localfaucet:$SOLANA_FAUCET_PORT > /dev/null 2>&1 &
sleep 1
else
localrpc=$(yq -r ".chains[$i].ports.rpc" ${CONFIGFILE} )
localgrpc=$(yq -r ".chains[$i].ports.grpc" ${CONFIGFILE} )
Expand Down
8 changes: 8 additions & 0 deletions starship/tests/e2e/configs/solana.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,11 @@ chains:
resources:
cpu: 2000m
memory: 2048Mi

registry:
enabled: true
ports:
rest: 8081
resources:
cpu: "0.1"
memory: "100M"