diff --git a/Dockerfile b/Dockerfile index a69e46d36..7c7751dfb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -75,7 +75,7 @@ ENV SLOT_NAME_SUFFIX="${SLOT_NAME_SUFFIX}" \ ERL_AFLAGS="-proto_dist inet6_tcp" RUN apt-get update -y && \ - apt-get install -y libstdc++6 openssl libncurses5 locales iptables sudo tini curl awscli jq && \ + apt-get install -y libstdc++6 openssl libncurses5 locales iptables sudo tini curl && \ apt-get clean && rm -f /var/lib/apt/lists/*_* # Set the locale diff --git a/deploy/fly/prod.toml b/deploy/fly/prod.toml deleted file mode 100644 index 8a3a0fd46..000000000 --- a/deploy/fly/prod.toml +++ /dev/null @@ -1,54 +0,0 @@ -# fly.toml app configuration file generated for realtime-prod on 2023-08-08T09:07:09-07:00 -# -# See https://fly.io/docs/reference/configuration/ for information about how to use this file. -# - -app = "realtime-prod" -primary_region = "sea" -kill_signal = "SIGTERM" -kill_timeout = "5s" - -[experimental] - auto_rollback = true - -[deploy] - release_command = "/app/bin/migrate" - strategy = "rolling" - -[env] - DNS_NODES = "realtime-prod.internal" - ERL_CRASH_DUMP = "/data/erl_crash.dump" - ERL_CRASH_DUMP_SECONDS = "30" - - -[[services]] - protocol = "tcp" - internal_port = 4000 - processes = ["app"] - - [[services.ports]] - port = 80 - handlers = ["http"] - force_https = true - - [[services.ports]] - port = 443 - handlers = ["tls", "http"] - [services.concurrency] - type = "connections" - hard_limit = 100000 - soft_limit = 100000 - - [[services.tcp_checks]] - interval = "15s" - timeout = "2s" - grace_period = "30s" - - [[services.http_checks]] - interval = "10s" - timeout = "2s" - grace_period = "5s" - method = "get" - path = "/" - protocol = "http" - tls_skip_verify = false diff --git a/deploy/fly/qa.toml b/deploy/fly/qa.toml deleted file mode 100644 index 1fc957e8a..000000000 --- a/deploy/fly/qa.toml +++ /dev/null @@ -1,53 +0,0 @@ -app = "realtime-qa" -kill_signal = "SIGTERM" -kill_timeout = 5 -processes = [] - -[deploy] - release_command = "/app/bin/migrate" - strategy = "rolling" - -[env] - DNS_NODES = "realtime-qa.internal" - ERL_CRASH_DUMP = "/data/erl_crash.dump" - ERL_CRASH_DUMP_SECONDS = 30 - -[experimental] - allowed_public_ports = [] - auto_rollback = true - -[[services]] - internal_port = 4000 - processes = ["app"] - protocol = "tcp" - script_checks = [] - [services.concurrency] - hard_limit = 100000 - soft_limit = 100000 - type = "connections" - - [[services.ports]] - force_https = true - handlers = ["http"] - port = 80 - - [[services.ports]] - handlers = ["tls", "http"] - port = 443 - - [[services.tcp_checks]] - grace_period = "30s" - interval = "15s" - restart_limit = 6 - timeout = "2s" - - [[services.http_checks]] - interval = 10000 - grace_period = "5s" - method = "get" - path = "/" - protocol = "http" - restart_limit = 0 - timeout = 2000 - tls_skip_verify = false - [services.http_checks.headers] diff --git a/deploy/fly/staging.toml b/deploy/fly/staging.toml deleted file mode 100644 index 7bcad8a95..000000000 --- a/deploy/fly/staging.toml +++ /dev/null @@ -1,59 +0,0 @@ -# fly.toml app configuration file generated for realtime-staging on 2023-06-27T07:39:20-07:00 -# -# See https://fly.io/docs/reference/configuration/ for information about how to use this file. -# - -app = "realtime-staging" -primary_region = "lhr" -kill_signal = "SIGTERM" -kill_timeout = "5s" - -[experimental] - auto_rollback = true - -[deploy] - release_command = "/app/bin/migrate" - strategy = "rolling" - -[env] - DNS_NODES = "realtime-staging.internal" - ERL_CRASH_DUMP = "/data/erl_crash.dump" - ERL_CRASH_DUMP_SECONDS = "30" - -[[mounts]] - source = "data_vol_machines" - destination = "/data" - processes = ["app"] - -[[services]] - protocol = "tcp" - internal_port = 4000 - processes = ["app"] - - [[services.ports]] - port = 80 - handlers = ["http"] - force_https = true - - [[services.ports]] - port = 443 - handlers = ["tls", "http"] - [services.concurrency] - type = "connections" - hard_limit = 16384 - soft_limit = 16384 - - [[services.tcp_checks]] - interval = "15s" - timeout = "2s" - grace_period = "30s" - restart_limit = 6 - - [[services.http_checks]] - interval = "10s" - timeout = "2s" - grace_period = "5s" - restart_limit = 0 - method = "get" - path = "/" - protocol = "http" diff --git a/run.sh b/run.sh index ae4d48e33..f10e5e663 100755 --- a/run.sh +++ b/run.sh @@ -8,61 +8,149 @@ if [ ! -z "${RLIMIT_NOFILE:-}" ]; then ulimit -Sn "$RLIMIT_NOFILE" fi -export ERL_CRASH_DUMP=/tmp/erl_crash.dump - -upload_crash_dump_to_s3() { - EXIT_CODE=${?:-0} - bucket=$ERL_CRASH_DUMP_S3_BUCKET - s3Host=$ERL_CRASH_DUMP_S3_HOST - s3Port=$ERL_CRASH_DUMP_S3_PORT - - if [ "${AWS_CONTAINER_CREDENTIALS_RELATIVE_URI-}" ]; then - response=$(curl -s http://169.254.170.2$AWS_CONTAINER_CREDENTIALS_RELATIVE_URI) - s3Key=$(echo "$response" | grep -o '"AccessKeyId": *"[^"]*"' | grep -o '"[^"]*"$' | tr -d '"') - s3Secret=$(echo "$response" | grep -o '"SecretAccessKey": *"[^"]*"' | grep -o '"[^"]*"$' | tr -d '"') - else - s3Key=$ERL_CRASH_DUMP_S3_KEY - s3Secret=$ERL_CRASH_DUMP_S3_SECRET - fi - - filePath=${ERL_CRASH_DUMP_FOLDER:-tmp}/$(date +%s)_${ERL_CRASH_DUMP_FILE_NAME:-erl_crash.dump} - - if [ -f "${ERL_CRASH_DUMP_FOLDER:-tmp}/${ERL_CRASH_DUMP_FILE_NAME:-erl_crash.dump}" ]; then - mv ${ERL_CRASH_DUMP_FOLDER:-tmp}/${ERL_CRASH_DUMP_FILE_NAME:-erl_crash.dump} $filePath - resource="/${bucket}/realtime/crash_dumps${filePath}" - - contentType="application/octet-stream" - dateValue=$(date -R) - stringToSign="PUT\n\n${contentType}\n${dateValue}\n${resource}" +generate_certs() { + : "${AWS_CONTAINER_CREDENTIALS_RELATIVE_URI:?AWS_CONTAINER_CREDENTIALS_RELATIVE_URI is required}" + : "${CLUSTER_SECRET_ID:?CLUSTER_SECRET_ID is required}" + : "${CLUSTER_SECRET_REGION:?CLUSTER_SECRET_REGION is required}" + + local creds + creds=$(curl -fsS "http://169.254.170.2${AWS_CONTAINER_CREDENTIALS_RELATIVE_URI}") + + # Extract a top-level string field from a flat JSON object. + # Handles backslash-escaped characters inside the value. + json_field() { + local field="$1" + awk -v key="$field" ' + BEGIN { RS="\0" } + { + pat = "\"" key "\"[[:space:]]*:[[:space:]]*\"" + if (match($0, pat)) { + s = substr($0, RSTART + RLENGTH) + out = "" + i = 1 + while (i <= length(s)) { + c = substr(s, i, 1) + if (c == "\\") { out = out substr(s, i, 2); i += 2; continue } + if (c == "\"") { break } + out = out c + i++ + } + gsub(/\\"/, "\"", out) + gsub(/\\\\/, "\\", out) + gsub(/\\n/, "\n", out) + gsub(/\\t/, "\t", out) + gsub(/\\r/, "\r", out) + gsub(/\\\//, "/", out) + print out + } + } + ' + } + + local aws_access_key aws_secret_key aws_session_token + aws_access_key=$(printf '%s' "$creds" | json_field "AccessKeyId") + aws_secret_key=$(printf '%s' "$creds" | json_field "SecretAccessKey") + aws_session_token=$(printf '%s' "$creds" | json_field "Token") + + if [[ -z "$aws_access_key" || -z "$aws_secret_key" || -z "$aws_session_token" ]]; then + echo "Failed to obtain ECS task role credentials" >&2 + return 1 + fi - signature=$(echo -en ${stringToSign} | openssl sha1 -hmac ${s3Secret} -binary | base64) + local service="secretsmanager" + local region="${CLUSTER_SECRET_REGION}" + local host="secretsmanager.${region}.amazonaws.com" + local endpoint="https://${host}/" + local amz_target="secretsmanager.GetSecretValue" + local content_type="application/x-amz-json-1.1" + local amz_date short_date + amz_date=$(date -u +"%Y%m%dT%H%M%SZ") + short_date=$(date -u +"%Y%m%d") + + local payload + payload=$(printf '{"SecretId":"%s"}' "${CLUSTER_SECRET_ID}") + + sha256_hex() { + openssl dgst -sha256 -hex | awk '{print $NF}' + } + + local payload_hash + payload_hash=$(printf '%s' "$payload" | sha256_hex) + + # Canonical headers must be sorted alphabetically by lowercased name. + # Format per AWS SigV4 spec: METHOD\nURI\nQueryString\nHeaders\n\nSignedHeaders\nPayloadHash + local signed_headers="content-type;host;x-amz-date;x-amz-security-token;x-amz-target" + local canonical_request + canonical_request=$(printf '%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n\n%s\n%s' \ + "POST" \ + "/" \ + "" \ + "content-type:${content_type}" \ + "host:${host}" \ + "x-amz-date:${amz_date}" \ + "x-amz-security-token:${aws_session_token}" \ + "x-amz-target:${amz_target}" \ + "${signed_headers}" \ + "${payload_hash}") + + local canonical_request_hash + canonical_request_hash=$(printf '%s' "$canonical_request" | sha256_hex) + + local credential_scope="${short_date}/${region}/${service}/aws4_request" + local string_to_sign + string_to_sign=$(printf 'AWS4-HMAC-SHA256\n%s\n%s\n%s' \ + "$amz_date" "$credential_scope" "$canonical_request_hash") + + # Derive signing key via successive HMAC-SHA256 steps. + # openssl -macopt hexkey: requires hex input, so we hex-encode the initial key with od. + hmac_sha256_hex() { + openssl dgst -sha256 -mac HMAC -macopt "hexkey:$1" | awk '{print $NF}' + } + + local k_secret_hex k_date k_region k_service k_signing signature + k_secret_hex=$(printf 'AWS4%s' "$aws_secret_key" | od -An -tx1 -v | tr -d ' \n') + k_date=$(printf '%s' "$short_date" | hmac_sha256_hex "$k_secret_hex") + k_region=$(printf '%s' "$region" | hmac_sha256_hex "$k_date") + k_service=$(printf '%s' "$service" | hmac_sha256_hex "$k_region") + k_signing=$(printf '%s' "aws4_request" | hmac_sha256_hex "$k_service") + signature=$(printf '%s' "$string_to_sign" | hmac_sha256_hex "$k_signing") + + local authorization="AWS4-HMAC-SHA256 Credential=${aws_access_key}/${credential_scope}, SignedHeaders=${signed_headers}, Signature=${signature}" + + local response + response=$(curl -fsS -X POST "$endpoint" \ + -H "Content-Type: ${content_type}" \ + -H "Host: ${host}" \ + -H "X-Amz-Date: ${amz_date}" \ + -H "X-Amz-Security-Token: ${aws_session_token}" \ + -H "X-Amz-Target: ${amz_target}" \ + -H "Authorization: ${authorization}" \ + --data-binary "$payload") + + local secret_string + secret_string=$(printf '%s' "$response" | json_field "SecretString") + + if [[ -z "$secret_string" ]]; then + echo "SecretString not found in Secrets Manager response" >&2 + return 1 + fi - if [ "${ERL_CRASH_DUMP_S3_SSL:-}" = true ]; then - protocol="https" - else - protocol="http" - fi + printf '%s' "$secret_string" | json_field "key" | base64 -d > ca.key + printf '%s' "$secret_string" | json_field "cert" | base64 -d > ca.cert - curl -v -X PUT -T "${filePath}" \ - -H "Host: ${s3Host}" \ - -H "Date: ${dateValue}" \ - -H "Content-Type: ${contentType}" \ - -H "Authorization: AWS ${s3Key}:${signature}" \ - ${protocol}://${s3Host}:${s3Port}${resource} + if [[ ! -s ca.key || ! -s ca.cert ]]; then + echo "Failed to extract ca.key/ca.cert from secret" >&2 + return 1 fi - exit "$EXIT_CODE" -} - -generate_certs() { - aws secretsmanager get-secret-value --secret-id "${CLUSTER_SECRET_ID}" --region "${CLUSTER_SECRET_REGION}" | jq -r '.SecretString' > cert_secrets - jq -r '.key' cert_secrets | base64 -d > ca.key - jq -r '.cert' cert_secrets | base64 -d > ca.cert - openssl req -new -nodes -out server.csr -keyout server.key -subj "/C=US/ST=Delaware/L=New Castle/O=Supabase Inc/CN=$(hostname -f)" + openssl req -new -nodes -out server.csr -keyout server.key \ + -subj "/C=US/ST=Delaware/L=New Castle/O=Supabase Inc/CN=$(hostname -f)" openssl x509 -req -in server.csr -days 90 -CA ca.cert -CAkey ca.key -out server.cert rm -f ca.key - CWD=`pwd` + + local CWD + CWD=$(pwd) export GEN_RPC_CACERTFILE="$CWD/ca.cert" export GEN_RPC_KEYFILE="$CWD/server.key" export GEN_RPC_CERTFILE="$CWD/server.cert" @@ -83,12 +171,9 @@ generate_certs() { ]} ]. EOF - export ERL_AFLAGS="${ERL_AFLAGS} -proto_dist inet_tls -ssl_dist_optfile ${CWD}/inet_tls.conf" + export ERL_AFLAGS="${ERL_AFLAGS:-} -proto_dist inet_tls -ssl_dist_optfile ${CWD}/inet_tls.conf" } -if [ "${ENABLE_ERL_CRASH_DUMP:-false}" = true ]; then - trap upload_crash_dump_to_s3 INT TERM KILL EXIT -fi if [[ -n "${GENERATE_CLUSTER_CERTS:-}" ]] ; then generate_certs