diff --git a/deploy/dockerephemeral/Dockerfile.haproxy b/deploy/dockerephemeral/Dockerfile.haproxy new file mode 100644 index 0000000000..33e509090c --- /dev/null +++ b/deploy/dockerephemeral/Dockerfile.haproxy @@ -0,0 +1,16 @@ +FROM haproxy:2.8 + +# Switch to root to install packages +USER root + +# Install socat and netcat for monitoring scripts +RUN apt-get update && \ + apt-get install -y socat netcat-openbsd curl && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Switch back to haproxy user for security +USER haproxy + +# Expose ports +EXPOSE 5672 15672 8080 diff --git a/deploy/dockerephemeral/docker-compose.yaml b/deploy/dockerephemeral/docker-compose.yaml index cd98c0ac92..7384ca9402 100644 --- a/deploy/dockerephemeral/docker-compose.yaml +++ b/deploy/dockerephemeral/docker-compose.yaml @@ -324,15 +324,53 @@ services: - RABBITMQ_USERNAME - RABBITMQ_PASSWORD ports: - - "127.0.0.1:5671:5671" - - "127.0.0.1:15671:15671" - - "127.0.0.1:15672:15672" + # Shift real RabbitMQ off standard ports; Toxiproxy will listen on them + - "127.0.0.1:5673:5671" + - "127.0.0.1:5674:5672" + - "127.0.0.1:15674:15671" + - "127.0.0.1:15673:15672" volumes: - ./rabbitmq-config/rabbitmq.conf:/etc/rabbitmq/conf.d/20-wire.conf - ./rabbitmq-config/certificates:/etc/rabbitmq/certificates networks: - demo_wire + haproxy: + build: + context: . + dockerfile: Dockerfile.haproxy + container_name: haproxy + depends_on: + - rabbitmq + ports: + - "5671:5671" + - "5672:5672" + - "15671:15671" + - "15672:15672" + - "8081:8080" + volumes: + - ./haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg:ro + - ./haproxy-logs:/var/log/haproxy + networks: + - demo_wire + + # toxiproxy: + # container_name: toxiproxy + # image: ghcr.io/shopify/toxiproxy:2.9.0 + # command: -host=0.0.0.0 -config /config/toxiproxy.json + # depends_on: + # - rabbitmq + # ports: + # - "127.0.0.1:8474:8474" # Toxiproxy admin API + # - "127.0.0.1:5671:5671" # RabbitMQ AMQP (TLS) via proxy + # - "127.0.0.1:5672:5672" # RabbitMQ AMQP via proxy + # - "127.0.0.1:15672:15672" # RabbitMQ management via proxy + # - "127.0.0.1:15671:15671" # RabbitMQ management (TLS) via proxy + # volumes: + # - ./docker/toxiproxy.json:/config/toxiproxy.json:ro + # networks: + # - demo_wire + init_vhosts: image: alpine/curl:3.14 environment: diff --git a/deploy/dockerephemeral/docker/toxiproxy.json b/deploy/dockerephemeral/docker/toxiproxy.json new file mode 100644 index 0000000000..dd0611e61c --- /dev/null +++ b/deploy/dockerephemeral/docker/toxiproxy.json @@ -0,0 +1,26 @@ +[ + { + "name": "rabbitmq-amqp-tcp", + "listen": "0.0.0.0:5672", + "upstream": "rabbitmq:5672", + "enabled": true + }, + { + "name": "rabbitmq-amqp-tls", + "listen": "0.0.0.0:5671", + "upstream": "rabbitmq:5671", + "enabled": true + }, + { + "name": "rabbitmq-management", + "listen": "0.0.0.0:15672", + "upstream": "rabbitmq:15672", + "enabled": true + }, + { + "name": "rabbitmq-management-tls", + "listen": "0.0.0.0:15671", + "upstream": "rabbitmq:15671", + "enabled": true + } +] diff --git a/deploy/dockerephemeral/haproxy.cfg b/deploy/dockerephemeral/haproxy.cfg new file mode 100644 index 0000000000..cbbf5344a8 --- /dev/null +++ b/deploy/dockerephemeral/haproxy.cfg @@ -0,0 +1,120 @@ +global + log stdout local0 + stats socket /var/lib/haproxy/stats mode 666 level admin expose-fd listeners + stats timeout 30s + user haproxy + group haproxy + daemon + tune.idletimer 30s + tune.http.cookielen 4096 + +# Use Docker's embedded DNS for service discovery inside the compose network +resolvers docker + nameserver dns1 127.0.0.11:53 + resolve_retries 3 + timeout resolve 1s + hold valid 10s + +defaults + mode tcp + log global + option tcplog + option dontlognull + retries 3 + timeout connect 5000ms + timeout client 3600000ms + timeout server 3600000ms + timeout check 5000ms + + option tcp-check + + +listen stats + bind *:8080 + mode http + stats enable + stats uri /stats + stats refresh 30s + stats admin if TRUE + stats auth guest:alpaca-grapefruit + +frontend rabbitmq_amqp + bind *:5672 + mode tcp + option tcplog + default_backend rabbitmq_amqp_cluster + + timeout client 3600000ms + +frontend rabbitmq_amqps + bind *:5671 + mode tcp + option tcplog + default_backend rabbitmq_amqps_cluster + + timeout client 3600000ms + +frontend rabbitmq_management + bind *:15672 + mode http + option httplog + default_backend rabbitmq_management_cluster + + timeout client 3600000ms + +frontend rabbitmq_management_tls + bind *:15671 + mode tcp + option tcplog + default_backend rabbitmq_management_tls_cluster + + timeout client 3600000ms + +backend rabbitmq_amqp_cluster + mode tcp + balance roundrobin + + timeout server 3600000ms + timeout check 3000ms + + option tcp-check + tcp-check connect + + server rabbitmq rabbitmq:5672 check inter 10000ms fall 3 rise 2 resolvers docker resolve-prefer ipv4 init-addr last,libc,none + +backend rabbitmq_amqps_cluster + mode tcp + balance roundrobin + + timeout server 3600000ms + timeout check 3000ms + + option tcp-check + tcp-check connect + + server rabbitmq-tls rabbitmq:5671 check inter 10000ms fall 3 rise 2 resolvers docker resolve-prefer ipv4 init-addr last,libc,none + +backend rabbitmq_management_cluster + mode http + balance roundrobin + option httpchk GET /api/overview + http-check send-state + http-check connect default + http-check send meth GET uri /api/overview hdr Authorization "Basic YWRtaW46cGFzc3dvcmQ=" + http-check expect status 200 + + timeout server 3600000ms + + server rabbitmq-mgmt rabbitmq:15672 check inter 10000ms fall 3 rise 2 resolvers docker resolve-prefer ipv4 init-addr last,libc,none + +backend rabbitmq_management_tls_cluster + mode tcp + balance roundrobin + + timeout server 3600000ms + timeout check 3000ms + + option tcp-check + tcp-check connect + + server rabbitmq-mgmt-tls rabbitmq:15671 check inter 10000ms fall 3 rise 2 resolvers docker resolve-prefer ipv4 init-addr last,libc,none diff --git a/deploy/dockerephemeral/toxiproxy-rabbitmq-terminate.sh b/deploy/dockerephemeral/toxiproxy-rabbitmq-terminate.sh new file mode 100755 index 0000000000..1c7ec3193e --- /dev/null +++ b/deploy/dockerephemeral/toxiproxy-rabbitmq-terminate.sh @@ -0,0 +1,229 @@ +#!/usr/bin/env bash +# shellcheck disable=SC2034 +set -euo pipefail + +COMPOSE_FILE_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +COMPOSE_FILE="$COMPOSE_FILE_DIR/docker-compose.yaml" +TOXIPROXY_SVC="toxiproxy" +RABBITMQ_SVC="rabbitmq" + +# Primary proxy to target; adjust if you want to target others +DEFAULT_PROXY="rabbitmq-amqp-tcp" + +curlx() { + if command -v curl >/dev/null 2>&1; then + curl -sS "$@" + else + # Fallback to using the compose service with curl + docker compose -f "$COMPOSE_FILE" run --rm --no-deps --entrypoint curl init_vhosts -sS "$@" + fi +} + +TOXIPROXY_API="${TOXIPROXY_API:-http://127.0.0.1:8474}" + +add_toxic() { + local proxy="$1" name="$2" type="$3" stream="$4" attrs_json="$5" + local resp code + resp=$(curlx -X POST -H 'Content-Type: application/json' \ + --data "{\"name\":\"$name\",\"type\":\"$type\",\"stream\":\"$stream\",\"attributes\":$attrs_json}" \ + -w "\n%{http_code}" "$TOXIPROXY_API/proxies/$proxy/toxics") || true + code="${resp##*$'\n'}" + local body="${resp%$'\n'*}" + if [[ "$code" == 2* ]]; then + echo "Applied toxic '$name' ($type/$stream) -> HTTP $code" + echo "$body" + return 0 + else + echo "Error adding toxic '$name' ($type/$stream) to proxy '$proxy':" >&2 + echo "HTTP $code" >&2 + echo "Response: $body" >&2 + return 1 + fi +} + +# Enable/disable proxy (fallback to simulate abrupt disconnect) +set_proxy_enabled() { + local proxy="$1" enabled="$2" resp code + resp=$(curlx -X POST -H 'Content-Type: application/json' \ + --data "{\"enabled\":$enabled}" \ + -w "\n%{http_code}" "$TOXIPROXY_API/proxies/$proxy") || true + code="${resp##*$'\n'}" + local body="${resp%$'\n'*}" + echo "Toggled proxy '$proxy' enabled=$enabled -> HTTP $code" + echo "$body" + if [[ "$code" != 2* ]]; then + echo "Error toggling proxy state." >&2 + return 1 + fi +} + +remove_toxic() { + local proxy="$1" name="$2" resp code body + resp=$(curlx -X DELETE -w "\n%{http_code}" "$TOXIPROXY_API/proxies/$proxy/toxics/$name") || true + code="${resp##*$'\n'}" + body="${resp%$'\n'*}" + echo "Remove toxic '$name' from proxy '$proxy' -> HTTP $code" + [ -n "$body" ] && echo "$body" + # don't fail hard on non-2xx; cleanup is best-effort + return 0 +} + +cleanup_toxics() { + local proxy="$1" name + for name in down force_rst_up force_rst_down blackhole_up blackhole_down odd_fin_up odd_fin_down odd_slow_close; do + remove_toxic "$proxy" "$name" + done +} + +prompt_proxy() { + local proxy + read -r -p "Proxy to target [${DEFAULT_PROXY}]: " proxy || proxy="" + proxy=${proxy:-$DEFAULT_PROXY} + echo "$proxy" +} + +echo "Choose termination mode:" +echo " 1) Abrupt RST (reset_peer)" +echo " 2) Silent black hole (timeout both directions)" +echo " 3) Odd/graceful FIN at weird moment (limit_data + slow_close)" +echo " 4) Graceful termination (RabbitMQ close_all_connections)" +echo " 5) down" +echo " 6) Handshake choke (slice+bandwidth+jitter)" +echo " 7) Heartbeat killer (high latency both directions)" +echo " 8) Intermittent flaps (toggle proxy on/off)" +echo +read -r -p "Enter choice [1-8]: " choice + +case "${choice}" in + 1) + proxy=$(prompt_proxy) + echo "Applying Abrupt RST on proxy '${proxy}'..." + cleanup_toxics "$proxy" # ensure clean slate + # Sends TCP RST to peer immediately (both directions) + if add_toxic "$proxy" force_rst_down reset_peer downstream '{}' && \ + add_toxic "$proxy" force_rst_up reset_peer upstream '{}'; then + echo "RST toxic applied. Press Enter to remove it and restore normal traffic." + read -r _ + cleanup_toxics "$proxy" + else + echo "reset_peer toxic not supported; falling back to disabling proxy (abrupt disconnect)." >&2 + set_proxy_enabled "$proxy" false || { echo "Failed to disable proxy." >&2; exit 1; } + echo "Proxy disabled. Existing connections should drop immediately. Press Enter to re-enable." + read -r _ + set_proxy_enabled "$proxy" true || { echo "Failed to re-enable proxy." >&2; exit 1; } + fi + ;; + 2) + proxy=$(prompt_proxy) + echo "Applying Silent Black Hole on proxy '${proxy}'..." + cleanup_toxics "$proxy" # ensure clean slate + # timeout toxic with timeout=0 drops all traffic indefinitely (blackhole) + if add_toxic "$proxy" blackhole_down timeout downstream '{"timeout":0}' && \ + add_toxic "$proxy" blackhole_up timeout upstream '{"timeout":0}'; then + echo "Blackhole toxics applied. Press Enter to remove them and restore normal traffic." + read -r _ + cleanup_toxics "$proxy" + else + echo "Failed to apply blackhole toxics." >&2 + cleanup_toxics "$proxy" + exit 1 + fi + ;; + 3) + proxy=$(prompt_proxy) + echo "Applying Odd/Graceful FIN on proxy '${proxy}'..." + cleanup_toxics "$proxy" # ensure clean slate + # Limit bytes to force mid-stream termination then close slowly (FIN) + if add_toxic "$proxy" odd_fin_down limit_data downstream '{"bytes":64}' && \ + add_toxic "$proxy" odd_fin_up limit_data upstream '{"bytes":64}' && \ + add_toxic "$proxy" odd_slow_close slow_close downstream '{"delay":1000}'; then + echo "Odd FIN toxics applied. Press Enter to remove them and restore normal traffic." + read -r _ + cleanup_toxics "$proxy" + else + echo "Failed to apply odd FIN toxics." >&2 + cleanup_toxics "$proxy" + exit 1 + fi + ;; + 4) + echo "Gracefully closing all RabbitMQ connections via rabbitmqctl..." + # This requests the broker to gracefully close connections (FIN) + docker compose -f "$COMPOSE_FILE" exec -T "$RABBITMQ_SVC" \ + rabbitmqctl close_all_connections "Closed by toxiproxy-rabbitmq-terminate.sh" || true + echo "Requested graceful termination on the broker." + ;; + 5) + echo "down" + proxy=$(prompt_proxy) + echo "Applying Silent Black Hole on proxy '${proxy}'..." + cleanup_toxics "$proxy" # ensure clean slate + # timeout toxic with timeout=0 drops all traffic indefinitely (blackhole) + if add_toxic "$proxy" down '{"timeout":0}'; then + echo "down toxics applied. Press Enter to remove them and restore normal traffic." + read -r _ + cleanup_toxics "$proxy" + else + echo "Failed to apply down toxics." >&2 + cleanup_toxics "$proxy" + exit 1 + fi + ;; + 6) + proxy=$(prompt_proxy) + echo "Applying handshake choke on proxy '${proxy}'..." + cleanup_toxics "$proxy" + if add_toxic "$proxy" hs_slice_dn slicer downstream '{"average_size":8,"size_variation":5,"delay":5}' && \ + add_toxic "$proxy" hs_bw_dn bandwidth downstream '{"rate":1500}' && \ + add_toxic "$proxy" hs_jit_dn latency downstream '{"latency":80,"jitter":60,"correlation":0.3}'; then + echo "Handshake choke applied. Press Enter to remove and restore traffic." + read -r _ + cleanup_toxics "$proxy" + else + echo "Failed to apply handshake choke toxics." >&2 + cleanup_toxics "$proxy" + exit 1 + fi + ;; + 7) + proxy=$(prompt_proxy) + echo "Applying heartbeat killer (latency both directions) on proxy '${proxy}'..." + cleanup_toxics "$proxy" + if add_toxic "$proxy" hb_lat_dn latency downstream '{"latency":65000,"jitter":15000,"correlation":0.2}' && \ + add_toxic "$proxy" hb_lat_up latency upstream '{"latency":65000,"jitter":15000,"correlation":0.2}'; then + echo "Heartbeat killer applied. Press Enter to remove and restore traffic." + read -r _ + cleanup_toxics "$proxy" + else + echo "Failed to apply heartbeat killer toxics." >&2 + cleanup_toxics "$proxy" + exit 1 + fi + ;; + 8) + proxy=$(prompt_proxy) + read -r -p "Flap duration seconds [5]: " flap_dur + read -r -p "Flap cycles [3]: " flap_cycles + flap_dur=${flap_dur:-5} + flap_cycles=${flap_cycles:-3} + echo "Flapping proxy '${proxy}' ${flap_cycles}x with ${flap_dur}s intervals..." + i=1 + while [ "$i" -le "$flap_cycles" ]; do + echo "Cycle $i: disabling..." + set_proxy_enabled "$proxy" false || { echo "Failed to disable proxy." >&2; break; } + sleep "$flap_dur" + echo "Cycle $i: enabling..." + set_proxy_enabled "$proxy" true || { echo "Failed to enable proxy." >&2; break; } + sleep "$flap_dur" + i=$((i+1)) + done + echo "Ensuring proxy is enabled..." + set_proxy_enabled "$proxy" true || true + ;; + *) + echo "Invalid choice. Exiting." >&2 + exit 1 + ;; +esac + +echo "Done." diff --git a/integration/test/Test/Demo.hs b/integration/test/Test/Demo.hs index 8d93872208..3bd0699bd7 100644 --- a/integration/test/Test/Demo.hs +++ b/integration/test/Test/Demo.hs @@ -7,10 +7,14 @@ import qualified API.Brig as BrigP import qualified API.BrigInternal as BrigI import qualified API.GalleyInternal as GalleyI import qualified API.Nginz as Nginz +import Control.Concurrent import GHC.Stack +import MLS.Util +import Notifications (isMemberJoinNotif, isNewMLSMessageNotif) import SetupHelpers import Testlib.Prelude import Testlib.VersionedFed +import Prelude (getLine) -- | Deleting unknown clients should fail with 404. testDeleteUnknownClient :: (HasCallStack) => App () @@ -215,3 +219,28 @@ testLegacyFedFederationV2 fedDomainV2 = do bob' <- BrigP.getUser alice bob >>= getJSON 200 bob' %. "qualified_id" `shouldMatch` (bob %. "qualified_id") + +testRabbitMQConnection :: (HasCallStack) => App () +testRabbitMQConnection = do + [alice, bob] <- createUsers [OwnDomain, OtherDomain] + connectTwoUsers alice bob + clients@[alice1, _] <- traverse (createMLSClient def) [alice, bob] + for_ clients (uploadNewKeyPackage def) + convId <- createNewGroup def alice1 + + withWebSockets [alice, bob] $ \wss@[_, bobWs] -> do + void $ createAddCommit alice1 convId [alice, bob] >>= sendAndConsumeCommitBundle + for_ wss (awaitMatch isMemberJoinNotif) + + void $ createApplicationMessage convId alice1 "test" >>= sendAndConsumeMessage + void $ awaitMatch isNewMLSMessageNotif bobWs + + putStrLn "simulate RabbitMQ outage" + putStrLn "e.g.: docker stop rabbitmq" + putStrLn "and: docker start rabbitmq" + putStrLn "wait a bit for reconnection then press enter to continue..." + _ <- liftIO getLine + + void $ createApplicationMessage convId alice1 "test" >>= sendAndConsumeMessage + void $ awaitMatch isNewMLSMessageNotif bobWs + liftIO $ threadDelay 2_000_000 diff --git a/integration/test/Testlib/ModService.hs b/integration/test/Testlib/ModService.hs index 9808d9b04e..cdd6d74f44 100644 --- a/integration/test/Testlib/ModService.hs +++ b/integration/test/Testlib/ModService.hs @@ -249,7 +249,7 @@ startDynamicBackend resource beOverrides = do galleyCfg = setField "logLevel" ("Warn" :: String), gundeckCfg = setField "logLevel" ("Warn" :: String), nginzCfg = setField "logLevel" ("Warn" :: String), - backgroundWorkerCfg = setField "logLevel" ("Warn" :: String), + backgroundWorkerCfg = setField "logLevel" ("Info" :: String), sternCfg = setField "logLevel" ("Warn" :: String), federatorInternalCfg = setField "logLevel" ("Warn" :: String), wireProxyCfg = setField "logLevel" ("Warn" :: String) diff --git a/integration/test/Testlib/Run.hs b/integration/test/Testlib/Run.hs index 8143dc50a0..c6a5dc9fc2 100644 --- a/integration/test/Testlib/Run.hs +++ b/integration/test/Testlib/Run.hs @@ -196,23 +196,24 @@ runMigrations = do void $ liftIO $ waitForProcess ph deleteFederationV0AndV1Queues :: GlobalEnv -> IO () -deleteFederationV0AndV1Queues env = do - let testDomains = env.gDomain1 : env.gDomain2 : env.gDynamicDomains - putStrLn "Attempting to delete federation V0 queues..." - (mV0User, mV0Pass) <- readCredsFromEnvWithSuffix "V0" - fromMaybe (putStrLn "No or incomplete credentials for fed V0 RabbitMQ") $ - deleteFederationQueues testDomains env.gRabbitMQConfigV0 <$> mV0User <*> mV0Pass - - putStrLn "Attempting to delete federation V1 queues..." - (mV1User, mV1Pass) <- readCredsFromEnvWithSuffix "V1" - fromMaybe (putStrLn "No or incomplete credentials for fed V1 RabbitMQ") $ - deleteFederationQueues testDomains env.gRabbitMQConfigV1 <$> mV1User <*> mV1Pass - where - readCredsFromEnvWithSuffix :: String -> IO (Maybe Text, Maybe Text) - readCredsFromEnvWithSuffix suffix = - (,) - <$> (fmap fromString <$> lookupEnv ("RABBITMQ_USERNAME_" <> suffix)) - <*> (fmap fromString <$> lookupEnv ("RABBITMQ_PASSWORD_" <> suffix)) +deleteFederationV0AndV1Queues _env = do + pure () + -- let testDomains = env.gDomain1 : env.gDomain2 : env.gDynamicDomains + -- putStrLn "Attempting to delete federation V0 queues..." + -- (mV0User, mV0Pass) <- readCredsFromEnvWithSuffix "V0" + -- fromMaybe (putStrLn "No or incomplete credentials for fed V0 RabbitMQ") $ + -- deleteFederationQueues testDomains env.gRabbitMQConfigV0 <$> mV0User <*> mV0Pass + + -- putStrLn "Attempting to delete federation V1 queues..." + -- (mV1User, mV1Pass) <- readCredsFromEnvWithSuffix "V1" + -- fromMaybe (putStrLn "No or incomplete credentials for fed V1 RabbitMQ") $ + -- deleteFederationQueues testDomains env.gRabbitMQConfigV1 <$> mV1User <*> mV1Pass + -- where + -- readCredsFromEnvWithSuffix :: String -> IO (Maybe Text, Maybe Text) + -- readCredsFromEnvWithSuffix suffix = + -- (,) + -- <$> (fmap fromString <$> lookupEnv ("RABBITMQ_USERNAME_" <> suffix)) + -- <*> (fmap fromString <$> lookupEnv ("RABBITMQ_PASSWORD_" <> suffix)) deleteFederationQueues :: [String] -> RabbitMqAdminOpts -> Text -> Text -> IO () deleteFederationQueues testDomains opts username password = do