diff --git a/.github/workflows/docker_image.yml b/.github/workflows/docker_image.yml index 54157ace6cf2..b1c995771471 100644 --- a/.github/workflows/docker_image.yml +++ b/.github/workflows/docker_image.yml @@ -9,7 +9,7 @@ permissions: contents: read jobs: - build-and-push: + build-and-push-base: runs-on: ubuntu-latest-16-cores timeout-minutes: 40 @@ -28,11 +28,63 @@ jobs: credentials_json: ${{ secrets.GCP_SA_ACTIONS_RUNNER_KEY }} - name: Set up Google Cloud SDK uses: google-github-actions/setup-gcloud@v1 - + + - name: Authenticate Docker with GCP + run: | + gcloud auth configure-docker us-docker.pkg.dev + + - name: Set env variables + run: | + echo "GIT_COMMIT_SHORT=${GITHUB_SHA:0:7}" >> $GITHUB_ENV + echo "GIT_COMMIT_LONG=${GITHUB_SHA}" >> $GITHUB_ENV + if [ -n "${{ github.head_ref }}" ]; then + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + else + echo "BRANCH_NAME=${{ github.ref_name }}" >> $GITHUB_ENV + fi + + - name: Set docker image env variable + run: | + echo "DOCKER_IMAGE=us-docker.pkg.dev/linera-io-dev/linera-public-registry/linera" >> $GITHUB_ENV + + - name: Build base Docker image + run: | + docker build --build-arg git_commit=${{ env.GIT_COMMIT_LONG }} \ + -f docker/Dockerfile . \ + -t ${{ env.DOCKER_IMAGE }}:${{ env.BRANCH_NAME }}_base \ + -t ${{ env.DOCKER_IMAGE }}:${{ env.GIT_COMMIT_SHORT }}_base \ + -t ${{ env.DOCKER_IMAGE }}:${{ env.GIT_COMMIT_LONG }}_base + + - name: Push base Docker image to Google Artifact Registry + run: | + docker push ${{ env.DOCKER_IMAGE }}:${{ env.BRANCH_NAME }}_base + docker push ${{ env.DOCKER_IMAGE }}:${{ env.GIT_COMMIT_SHORT }}_base + docker push ${{ env.DOCKER_IMAGE }}:${{ env.GIT_COMMIT_LONG }}_base + + build-and-push-full: + runs-on: ubuntu-latest-16-cores + timeout-minutes: 40 + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions-rust-lang/setup-rust-toolchain@v1 + - name: Install Protoc + uses: arduino/setup-protoc@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + - name: Auth service account + uses: google-github-actions/auth@v1 + with: + credentials_json: ${{ secrets.GCP_SA_ACTIONS_RUNNER_KEY }} + - name: Set up Google Cloud SDK + uses: google-github-actions/setup-gcloud@v1 + - name: Authenticate Docker with GCP run: | gcloud auth configure-docker us-docker.pkg.dev - + - name: Set env variables run: | echo "GIT_COMMIT_SHORT=${GITHUB_SHA:0:7}" >> $GITHUB_ENV @@ -46,17 +98,18 @@ jobs: - name: Set docker image env variable run: | echo "DOCKER_IMAGE=us-docker.pkg.dev/linera-io-dev/linera-public-registry/linera" >> $GITHUB_ENV - - - name: Build Docker image + + - name: Build full Docker image run: | docker build --build-arg git_commit=${{ env.GIT_COMMIT_LONG }} \ + --build-arg additional_features=tempo,memory-profiling \ -f docker/Dockerfile . \ - -t ${{ env.DOCKER_IMAGE }}:${{ env.BRANCH_NAME }} \ - -t ${{ env.DOCKER_IMAGE }}:${{ env.GIT_COMMIT_SHORT }} \ - -t ${{ env.DOCKER_IMAGE }}:${{ env.GIT_COMMIT_LONG }} - - - name: Push Docker image to Google Artifact Registry - run: | - docker push ${{ env.DOCKER_IMAGE }}:${{ env.BRANCH_NAME }} - docker push ${{ env.DOCKER_IMAGE }}:${{ env.GIT_COMMIT_SHORT }} - docker push ${{ env.DOCKER_IMAGE }}:${{ env.GIT_COMMIT_LONG }} + -t ${{ env.DOCKER_IMAGE }}:${{ env.BRANCH_NAME }}_full \ + -t ${{ env.DOCKER_IMAGE }}:${{ env.GIT_COMMIT_SHORT }}_full \ + -t ${{ env.DOCKER_IMAGE }}:${{ env.GIT_COMMIT_LONG }}_full + + - name: Push full Docker image to Google Artifact Registry + run: | + docker push ${{ env.DOCKER_IMAGE }}:${{ env.BRANCH_NAME }}_full + docker push ${{ env.DOCKER_IMAGE }}:${{ env.GIT_COMMIT_SHORT }}_full + docker push ${{ env.DOCKER_IMAGE }}:${{ env.GIT_COMMIT_LONG }}_full diff --git a/CLI.md b/CLI.md index d26be734cf2e..71decda2a32a 100644 --- a/CLI.md +++ b/CLI.md @@ -144,6 +144,9 @@ Client implementation and command-line tool for the Linera blockchain * `--max-retries ` — Number of times to retry connecting to a validator Default value: `10` +* `--chrome-trace-exporter` — Enable OpenTelemetry Chrome JSON exporter for trace data analysis +* `--otel-trace-file ` — Output file path for Chrome trace JSON format. Can be visualized in chrome://tracing or Perfetto UI +* `--otel-exporter-otlp-endpoint ` — OpenTelemetry OTLP exporter endpoint (requires tempo feature) * `--wait-for-outgoing-messages` — Whether to wait until a quorum of validators has confirmed that all sent cross-chain messages have been delivered * `--long-lived-services` — (EXPERIMENTAL) Whether application services can persist in some cases between queries * `--blanket-message-policy ` — The policy for handling incoming messages diff --git a/Cargo.lock b/Cargo.lock index c68cb913f0c4..6bbe691d4b68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5196,6 +5196,7 @@ dependencies = [ "serde_bytes", "serde_json", "serde_with", + "tempfile", "test-case", "test-strategy", "thiserror 1.0.69", @@ -5203,6 +5204,7 @@ dependencies = [ "tokio-stream", "tokio-util", "tracing", + "tracing-chrome", "tracing-opentelemetry", "tracing-subscriber 0.3.19", "tracing-web", @@ -10604,6 +10606,17 @@ dependencies = [ "syn 2.0.105", ] +[[package]] +name = "tracing-chrome" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf0a738ed5d6450a9fb96e86a23ad808de2b727fd1394585da5cdd6788ffe724" +dependencies = [ + "serde_json", + "tracing-core", + "tracing-subscriber 0.3.19", +] + [[package]] name = "tracing-core" version = "0.1.34" @@ -10675,9 +10688,11 @@ dependencies = [ "serde", "serde_json", "sharded-slab", + "smallvec", "thread_local", "tracing", "tracing-core", + "tracing-log", "tracing-serde", ] diff --git a/Cargo.toml b/Cargo.toml index cd5c3a80b3cb..d6a6385079af 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -259,6 +259,7 @@ tonic-web-wasm-client = "0.6.0" tower = "0.4.13" tower-http = "0.6.6" tracing = { version = "0.1.40", features = ["release_max_level_debug"] } +tracing-chrome = "0.7.2" tracing-opentelemetry = "0.31.0" tracing-subscriber = { version = "0.3.18", default-features = false, features = [ "env-filter", diff --git a/docker/Dockerfile b/docker/Dockerfile index b479cf3cf5a7..e5d955efada7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -25,7 +25,8 @@ ARG binaries= ARG copy=${binaries:+_copy} ARG build_flag=--release ARG build_folder=release -ARG build_features=scylladb,metrics,memory-profiling,tempo +ARG build_features=scylladb,metrics +ARG additional_features= ARG rustflags="-C force-frame-pointers=yes" FROM rust:1.74-slim-bookworm AS builder @@ -34,6 +35,7 @@ ARG target ARG build_flag ARG build_folder ARG build_features +ARG additional_features ARG rustflags RUN apt-get update && apt-get install -y \ @@ -74,12 +76,17 @@ COPY rust-toolchain* Cargo.* ./ ENV GIT_COMMIT=${git_commit} ENV RUSTFLAGS=${rustflags} -RUN cargo build ${build_flag:+"$build_flag"} \ +RUN if [ -n "$additional_features" ]; then \ + features="$build_features,$additional_features"; \ + else \ + features="$build_features"; \ + fi && \ + cargo build ${build_flag:+"$build_flag"} \ --target "$target" \ --bin linera \ --bin linera-proxy \ --bin linera-server \ - --features $build_features + --features "$features" RUN mv \ target/"$target"/"$build_folder"/linera \ @@ -119,13 +126,3 @@ COPY --from=binaries \ linera-server \ linera-proxy \ ./ - -COPY --chmod=755 \ - docker/server-entrypoint.sh \ - docker/server-init.sh \ - docker/proxy-entrypoint.sh \ - docker/proxy-init.sh \ - docker/compose-server-entrypoint.sh \ - docker/compose-proxy-entrypoint.sh \ - docker/compose-server-init.sh \ - ./ diff --git a/docker/compose-proxy-entrypoint.sh b/docker/compose-proxy-entrypoint.sh deleted file mode 100755 index c68c4258f0fb..000000000000 --- a/docker/compose-proxy-entrypoint.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh - -storage_replication_factor=$1 - -exec ./linera-proxy \ - --storage scylladb:tcp:scylla:9042 \ - --storage-replication-factor $storage_replication_factor \ - /config/server.json diff --git a/docker/compose-server-entrypoint.sh b/docker/compose-server-entrypoint.sh deleted file mode 100755 index 5fcbdcbdbc0d..000000000000 --- a/docker/compose-server-entrypoint.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh - -storage=$1 -storage_replication_factor=$2 - -exec ./linera-server run \ - --storage $storage \ - --server /config/server.json \ - --shard 0 \ - --storage-replication-factor $storage_replication_factor diff --git a/docker/compose-server-init.sh b/docker/compose-server-init.sh deleted file mode 100755 index 6d51a220bac5..000000000000 --- a/docker/compose-server-init.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh - -storage=$1 - -while true; do - ./linera storage check-existence --storage $storage - status=$? - - if [ $status -eq 0 ]; then - echo "Database already exists, no need to initialize." - exit 0 - elif [ $status -eq 1 ]; then - echo "Database does not exist, attempting to initialize..." - if ./linera storage initialize --storage $storage --genesis /config/genesis.json - then - echo "Initialization successful." - exit 0 - else - echo "Initialization failed, retrying in 5 seconds..." - sleep 5 - fi - else - echo "An unexpected error occurred (status: $status), retrying in 5 seconds..." - sleep 5 - fi -done diff --git a/docker/docker-compose.indexer-test.yml b/docker/docker-compose.indexer-test.yml index ffe35c91f23b..d0cfed97f8a0 100644 --- a/docker/docker-compose.indexer-test.yml +++ b/docker/docker-compose.indexer-test.yml @@ -6,7 +6,7 @@ services: ports: - "${LINERA_STORAGE_SERVICE_PORT:-1235}:${LINERA_STORAGE_SERVICE_PORT:-1235}" command: > - sh -c "./linera-storage-server memory --endpoint 0.0.0.0:${LINERA_STORAGE_SERVICE_PORT:-1235}" + ./linera-storage-server memory --endpoint 0.0.0.0:${LINERA_STORAGE_SERVICE_PORT:-1235} environment: - LINERA_STORAGE_SERVICE_PORT=${LINERA_STORAGE_SERVICE_PORT:-1235} healthcheck: @@ -51,10 +51,10 @@ services: - "${BLOCK_EXPORTER_PORT:-8882}:${BLOCK_EXPORTER_PORT:-8882}" - "${METRICS_PORT:-9091}:${METRICS_PORT:-9091}" command: > - sh -c "./linera-exporter - --storage service:tcp:linera-storage-service:${LINERA_STORAGE_SERVICE_PORT:-1235}:linera_storage_service_server_0_db - --config-path /exporter-config.toml - --metrics-port ${METRICS_PORT:-9091}" + ./linera-exporter + --storage service:tcp:linera-storage-service:${LINERA_STORAGE_SERVICE_PORT:-1235}:linera_storage_service_server_0_db + --config-path /exporter-config.toml + --metrics-port ${METRICS_PORT:-9091} environment: - LINERA_STORAGE_SERVICE_PORT=${LINERA_STORAGE_SERVICE_PORT:-1235} - BLOCK_EXPORTER_PORT=${BLOCK_EXPORTER_PORT:-8882} @@ -82,14 +82,14 @@ services: - "${FAUCET_PORT:-8080}:${FAUCET_PORT:-8080}" - "13001:13001" command: > - sh -c "./linera net + ./linera net --storage service:tcp:linera-storage-service:${LINERA_STORAGE_SERVICE_PORT:-1235}:linera_storage_service up --with-faucet --faucet-port ${FAUCET_PORT:-8080} --with-block-exporter --exporter-address linera-block-exporter - --exporter-port ${BLOCK_EXPORTER_PORT:-8882}" + --exporter-port ${BLOCK_EXPORTER_PORT:-8882} environment: - RUST_LOG=linera=info - LINERA_STORAGE_SERVICE_PORT=${LINERA_STORAGE_SERVICE_PORT:-1235} diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index db8764151a5f..4e5fb4a18567 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -47,7 +47,14 @@ services: container_name: proxy ports: - "19100:19100" - command: ["./compose-proxy-entrypoint.sh", "1"] + command: + - "sh" + - "-c" + - | + exec ./linera-proxy \ + --storage scylladb:tcp:scylla:9042 \ + --storage-replication-factor 1 \ + /config/server.json volumes: - .:/config labels: @@ -60,7 +67,15 @@ services: image: "${LINERA_IMAGE:-us-docker.pkg.dev/linera-io-dev/linera-public-registry/linera:testnet_conway_release}" deploy: replicas: 4 - command: ["./compose-server-entrypoint.sh", "scylladb:tcp:scylla:9042", "1"] + command: + - "sh" + - "-c" + - | + exec ./linera-server run \ + --storage scylladb:tcp:scylla:9042 \ + --server /config/server.json \ + --shard 0 \ + --storage-replication-factor 1 volumes: - .:/config labels: @@ -72,7 +87,31 @@ services: shard-init: image: "${LINERA_IMAGE:-us-docker.pkg.dev/linera-io-dev/linera-public-registry/linera:testnet_conway_release}" container_name: shard-init - command: ["./compose-server-init.sh", "scylladb:tcp:scylla:9042", "1"] + command: + - "sh" + - "-c" + - | + while true; do + ./linera storage check-existence --storage scylladb:tcp:scylla:9042 + status=$? + + if [ $status -eq 0 ]; then + echo "Database already exists, no need to initialize." + exit 0 + elif [ $status -eq 1 ]; then + echo "Database does not exist, attempting to initialize..." + if ./linera storage initialize --storage scylladb:tcp:scylla:9042 --genesis /config/genesis.json; then + echo "Initialization successful." + exit 0 + else + echo "Initialization failed, retrying in 5 seconds..." + sleep 5 + fi + else + echo "An unexpected error occurred (status: $status), retrying in 5 seconds..." + sleep 5 + fi + done volumes: - .:/config depends_on: diff --git a/docker/proxy-entrypoint.sh b/docker/proxy-entrypoint.sh deleted file mode 100644 index 90b82b5491b6..000000000000 --- a/docker/proxy-entrypoint.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh - -storage_replication_factor=$1 -ORDINAL="${HOSTNAME##*-}" - -exec ./linera-proxy \ - --storage scylladb:tcp:scylla-client.scylla.svc.cluster.local:9042 \ - --storage-replication-factor $storage_replication_factor \ - --id "$ORDINAL" \ - /config/server.json diff --git a/docker/proxy-init.sh b/docker/proxy-init.sh deleted file mode 100644 index e2f40110411e..000000000000 --- a/docker/proxy-init.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/sh - -while true; do - ./linera storage check-existence --storage "scylladb:tcp:scylla-client.scylla.svc.cluster.local:9042" - status=$? - - if [ "$status" -eq 0 ]; then - echo "Database already exists, no need to initialize." - exit 0 - else - # We rely on the shards to initialize the database, so just wait here - if [ "$status" -eq 1 ]; then - echo "Database does not exist, retrying in 5 seconds..." - else - echo "An unexpected error occurred (status: $status), retrying in 5 seconds..." - fi - sleep 5 - fi -done diff --git a/docker/server-entrypoint.sh b/docker/server-entrypoint.sh deleted file mode 100644 index 803748ebedb2..000000000000 --- a/docker/server-entrypoint.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/sh - -storage=$1 -storage_replication_factor=$2 - -# Extract the ordinal number from the pod hostname -ORDINAL="${HOSTNAME##*-}" - -exec ./linera-server run \ - --storage $storage \ - --server /config/server.json \ - --shard $ORDINAL \ - --storage-replication-factor $storage_replication_factor diff --git a/docker/server-init.sh b/docker/server-init.sh deleted file mode 100644 index b80978e003fa..000000000000 --- a/docker/server-init.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh - -storage=$1 -storage_replication_factor=$2 - -while true; do - ./linera storage check-existence --storage $storage - status=$? - - if [ $status -eq 0 ]; then - echo "Database already exists, no need to initialize." - exit 0 - elif [ $status -eq 1 ]; then - echo "Database does not exist, attempting to initialize..." - if ./linera storage initialize \ - --storage $storage \ - --genesis /config/genesis.json \ - --storage-replication-factor $storage_replication_factor; then - echo "Initialization successful." - exit 0 - else - echo "Initialization failed, retrying in 5 seconds..." - sleep 5 - fi - else - echo "An unexpected error occurred (status: $status), retrying in 5 seconds..." - sleep 5 - fi -done diff --git a/examples/Cargo.lock b/examples/Cargo.lock index b1f37829287f..7cbc5c9031cd 100644 --- a/examples/Cargo.lock +++ b/examples/Cargo.lock @@ -3680,6 +3680,8 @@ dependencies = [ "is-terminal", "k256", "linera-witty", + "opentelemetry", + "opentelemetry_sdk", "port-selector", "prometheus", "proptest", @@ -3697,6 +3699,8 @@ dependencies = [ "tokio-stream", "tokio-util", "tracing", + "tracing-chrome", + "tracing-opentelemetry", "tracing-subscriber", "trait-variant", "zstd", @@ -4576,6 +4580,38 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e296cf87e61c9cfc1a61c3c63a0f7f286ed4554e0e22be84e8a38e1d264a2a29" +[[package]] +name = "opentelemetry" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aaf416e4cb72756655126f7dd7bb0af49c674f4c1b9903e80c009e0c37e552e6" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.9", + "tracing", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11f644aa9e5e31d11896e024305d7e3c98a88884d9f8919dbf37a9991bc47a4b" +dependencies = [ + "futures-channel", + "futures-executor", + "futures-util", + "opentelemetry", + "percent-encoding", + "rand 0.9.0", + "serde_json", + "thiserror 2.0.9", + "tokio", + "tokio-stream", +] + [[package]] name = "overload" version = "0.1.1" @@ -6671,6 +6707,17 @@ dependencies = [ "syn 2.0.100", ] +[[package]] +name = "tracing-chrome" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf0a738ed5d6450a9fb96e86a23ad808de2b727fd1394585da5cdd6788ffe724" +dependencies = [ + "serde_json", + "tracing-core", + "tracing-subscriber", +] + [[package]] name = "tracing-core" version = "0.1.32" @@ -6693,6 +6740,35 @@ dependencies = [ "tracing", ] +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-opentelemetry" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddcf5959f39507d0d04d6413119c04f33b623f4f951ebcbdddddfad2d0623a9c" +dependencies = [ + "js-sys", + "once_cell", + "opentelemetry", + "opentelemetry_sdk", + "smallvec", + "tracing", + "tracing-core", + "tracing-log", + "tracing-subscriber", + "web-time", +] + [[package]] name = "tracing-serde" version = "0.1.3" @@ -6716,9 +6792,11 @@ dependencies = [ "serde", "serde_json", "sharded-slab", + "smallvec", "thread_local", "tracing", "tracing-core", + "tracing-log", "tracing-serde", ] @@ -7103,6 +7181,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "webpki-roots" version = "0.25.4" diff --git a/kubernetes/linera-validator/templates/proxy.yaml b/kubernetes/linera-validator/templates/proxy.yaml index a097c03b0741..a7c32accbdbc 100644 --- a/kubernetes/linera-validator/templates/proxy.yaml +++ b/kubernetes/linera-validator/templates/proxy.yaml @@ -55,7 +55,12 @@ spec: - name: linera-proxy-initializer image: {{ .Values.lineraImage }} imagePullPolicy: {{ .Values.lineraImagePullPolicy }} - command: ["./proxy-init.sh"] + command: + - "./linera" + - "storage" + - "check-existence" + - "--storage" + - "scylladb:tcp:scylla-client.scylla.svc.cluster.local:9042" env: - name: RUST_LOG value: {{ .Values.logLevel }} @@ -70,10 +75,25 @@ spec: name: linera-port - containerPort: 20100 name: private-port - command: ["./proxy-entrypoint.sh", {{ .Values.storageReplicationFactor | quote }}] + command: + - "./linera-proxy" + - "--storage" + - "scylladb:tcp:scylla-client.scylla.svc.cluster.local:9042" + - "--storage-replication-factor" + - {{ .Values.storageReplicationFactor | quote }} + - "--id" + - "$(POD_ORDINAL)" + - "--memory-profiling" + - "--otel-exporter-otlp-endpoint" + - "http://tempo.tempo.svc.cluster.local:4317" + - "/config/server.json" env: - name: RUST_LOG value: {{ .Values.logLevel }} + - name: POD_ORDINAL + valueFrom: + fieldRef: + fieldPath: metadata.annotations['statefulset.kubernetes.io/pod-ordinal'] volumeMounts: - name: config mountPath: "/config" diff --git a/kubernetes/linera-validator/templates/shards.yaml b/kubernetes/linera-validator/templates/shards.yaml index e6b5e5fbdd21..932b7787a5a5 100644 --- a/kubernetes/linera-validator/templates/shards.yaml +++ b/kubernetes/linera-validator/templates/shards.yaml @@ -35,7 +35,16 @@ spec: - name: linera-server-initializer image: {{ .Values.lineraImage }} imagePullPolicy: {{ .Values.lineraImagePullPolicy }} - command: ["./server-init.sh", {{ .Values.storage | quote }}, {{ .Values.storageReplicationFactor | quote }}] + command: + - "./linera" + - "storage" + - "initialize" + - "--storage" + - {{ .Values.storage | quote }} + - "--genesis" + - "/config/genesis.json" + - "--storage-replication-factor" + - {{ .Values.storageReplicationFactor | quote }} env: - name: RUST_LOG value: {{ .Values.logLevel }} @@ -53,10 +62,27 @@ spec: - name: linera-server image: {{ .Values.lineraImage }} imagePullPolicy: {{ .Values.lineraImagePullPolicy }} - command: ["./server-entrypoint.sh", {{ .Values.storage | quote }}, {{ .Values.storageReplicationFactor | quote }}] + command: + - "./linera-server" + - "run" + - "--storage" + - {{ .Values.storage | quote }} + - "--server" + - "/config/server.json" + - "--shard" + - "$(POD_ORDINAL)" + - "--storage-replication-factor" + - {{ .Values.storageReplicationFactor | quote }} + - "--memory-profiling" + - "--otel-exporter-otlp-endpoint" + - "http://tempo.tempo.svc.cluster.local:4317" env: - name: RUST_LOG value: {{ .Values.logLevel }} + - name: POD_ORDINAL + valueFrom: + fieldRef: + fieldPath: metadata.annotations['statefulset.kubernetes.io/pod-ordinal'] {{- if .Values.serverTokioThreads }} - name: LINERA_SERVER_TOKIO_THREADS value: "{{ .Values.serverTokioThreads }}" diff --git a/linera-base/Cargo.toml b/linera-base/Cargo.toml index dcdd9cd97088..9d555652c844 100644 --- a/linera-base/Cargo.toml +++ b/linera-base/Cargo.toml @@ -18,12 +18,7 @@ workspace = true metrics = ["prometheus"] reqwest = ["dep:reqwest"] revm = [] -tempo = [ - "opentelemetry", - "opentelemetry-otlp", - "opentelemetry_sdk", - "tracing-opentelemetry", -] +tempo = ["opentelemetry-otlp"] test = ["test-strategy", "proptest"] web = [ "getrandom/js", @@ -86,10 +81,11 @@ tracing-web = { optional = true, workspace = true } [target.'cfg(not(target_arch = "wasm32"))'.dependencies] chrono.workspace = true -opentelemetry = { workspace = true, optional = true } +opentelemetry.workspace = true opentelemetry-otlp = { workspace = true, optional = true } -opentelemetry_sdk = { workspace = true, optional = true } -tracing-opentelemetry = { workspace = true, optional = true } +opentelemetry_sdk.workspace = true +tracing-chrome.workspace = true +tracing-opentelemetry.workspace = true rand = { workspace = true, features = ["getrandom", "std", "std_rng"] } tokio = { workspace = true, features = [ "process", @@ -107,6 +103,7 @@ assert_matches.workspace = true bcs.workspace = true linera-base = { path = ".", default-features = false, features = ["test"] } linera-witty = { workspace = true, features = ["test"] } +tempfile.workspace = true test-case.workspace = true [build-dependencies] diff --git a/linera-base/src/lib.rs b/linera-base/src/lib.rs index 609127e96108..285cd33a570a 100644 --- a/linera-base/src/lib.rs +++ b/linera-base/src/lib.rs @@ -40,6 +40,8 @@ pub use task::Blocking; pub mod time; #[cfg_attr(web, path = "tracing_web.rs")] pub mod tracing; +#[cfg(not(target_arch = "wasm32"))] +pub mod tracing_otel; #[cfg(test)] mod unit_tests; diff --git a/linera-base/src/tracing.rs b/linera-base/src/tracing.rs index 09811bb0f8eb..2235c3be4dde 100644 --- a/linera-base/src/tracing.rs +++ b/linera-base/src/tracing.rs @@ -12,8 +12,6 @@ use std::{ use is_terminal::IsTerminal as _; use tracing::Subscriber; -#[cfg(all(not(target_arch = "wasm32"), feature = "tempo"))] -use tracing_subscriber::filter::{filter_fn, FilterExt as _}; use tracing_subscriber::{ fmt::{ self, @@ -24,18 +22,52 @@ use tracing_subscriber::{ layer::{Layer, SubscriberExt as _}, registry::LookupSpan, util::SubscriberInitExt, + EnvFilter, }; -#[cfg(all(not(target_arch = "wasm32"), feature = "tempo"))] -use { - opentelemetry::{global, trace::TracerProvider}, - opentelemetry_otlp::{SpanExporter, WithExportConfig}, - opentelemetry_sdk::{ - trace::{self as sdktrace, SdkTracerProvider}, - Resource, - }, - tracing_opentelemetry::OpenTelemetryLayer, + +#[cfg(not(target_arch = "wasm32"))] +pub use crate::tracing_otel::{ + init_with_chrome_trace_exporter, init_with_opentelemetry, ChromeTraceGuard, }; +pub(crate) struct EnvConfig { + pub(crate) env_filter: EnvFilter, + span_events: FmtSpan, + format: Option, + color_output: bool, + log_name: String, +} + +impl EnvConfig { + pub(crate) fn stderr_layer(&self) -> Box + Send + Sync> + where + S: Subscriber + for<'span> LookupSpan<'span>, + { + prepare_formatted_layer( + self.format.as_deref(), + fmt::layer() + .with_span_events(self.span_events.clone()) + .with_writer(std::io::stderr) + .with_ansi(self.color_output), + ) + } + + pub(crate) fn maybe_log_file_layer(&self) -> Option + Send + Sync>> + where + S: Subscriber + for<'span> LookupSpan<'span>, + { + open_log_file(&self.log_name).map(|file_writer| { + prepare_formatted_layer( + self.format.as_deref(), + fmt::layer() + .with_span_events(self.span_events.clone()) + .with_writer(Arc::new(file_writer)) + .with_ansi(false), + ) + }) + } +} + /// Initializes tracing in a standard way. /// /// The environment variables `RUST_LOG`, `RUST_LOG_SPAN_EVENTS`, and `RUST_LOG_FORMAT` @@ -46,65 +78,19 @@ use { /// store log files. If it is set, a file named `log_name` with the `log` extension is /// created in the directory. pub fn init(log_name: &str) { - init_internal(log_name, false); + let config = get_env_config(log_name); + let maybe_log_file_layer = config.maybe_log_file_layer(); + let stderr_layer = config.stderr_layer(); + + tracing_subscriber::registry() + .with(config.env_filter) + .with(maybe_log_file_layer) + .with(stderr_layer) + .init(); } -/// Initializes tracing with full OpenTelemetry support. -/// -/// **IMPORTANT**: This function must be called from within a Tokio runtime context -/// as it initializes OpenTelemetry background tasks for span batching and export. -/// -/// This sets up complete tracing with OpenTelemetry integration, including the -/// OpenTelemetry layer in the subscriber to export spans to Tempo. -/// -/// ## Span Filtering for Performance -/// -/// By default, spans created by `#[instrument]` are logged to console AND sent -/// to OpenTelemetry. In order to not spam stderr, you can set a low level, and use the -/// `telemetry_only` target: -/// -/// ```rust -/// use tracing::{instrument, Level}; -/// -/// // Always sent to telemetry; console output controlled by level -/// #[instrument(level = "trace", target = "telemetry_only")] -/// fn my_called_too_frequently_function() { -/// // Will be sent to OpenTelemetry regardless of RUST_LOG level -/// // Will only appear in console if RUST_LOG includes trace level -/// } -/// -/// // Higher level - more likely to appear in console -/// #[instrument(level = "info", target = "telemetry_only")] -/// fn my_important_function() { -/// // Will be sent to OpenTelemetry regardless of RUST_LOG level -/// // Will appear in console if RUST_LOG includes info level or higher -/// } -/// ``` -/// -/// **Key behaviors:** -/// - If span level >= RUST_LOG level: span goes to BOTH telemetry AND console (regardless of target) -/// - If span level < RUST_LOG level AND target = "telemetry_only": span goes to telemetry ONLY -/// - If span level < RUST_LOG level AND target != "telemetry_only": span is filtered out completely -/// - Default level for `telemetry_only` should be `trace` for minimal console noise -/// - All explicit log calls (tracing::info!(), etc.) are always printed regardless of span filtering -pub async fn init_with_opentelemetry(log_name: &str) { - #[cfg(feature = "tempo")] - { - init_internal(log_name, true); - } - - #[cfg(not(feature = "tempo"))] - { - tracing::warn!( - "OpenTelemetry initialization requested but 'tempo' feature is not enabled. \ - Initializing standard tracing without OpenTelemetry support." - ); - init_internal(log_name, false); - } -} - -fn init_internal(log_name: &str, with_opentelemetry: bool) { - let env_filter = tracing_subscriber::EnvFilter::builder() +pub(crate) fn get_env_config(log_name: &str) -> EnvConfig { + let env_filter = EnvFilter::builder() .with_default_directive(tracing_subscriber::filter::LevelFilter::INFO.into()) .from_env_lossy(); @@ -116,87 +102,12 @@ fn init_internal(log_name: &str, with_opentelemetry: bool) { let color_output = !std::env::var("NO_COLOR").is_ok_and(|x| !x.is_empty()) && std::io::stderr().is_terminal(); - let stderr_layer = prepare_formatted_layer( - format.as_deref(), - fmt::layer() - .with_span_events(span_events.clone()) - .with_writer(std::io::stderr) - .with_ansi(color_output), - ); - - let maybe_log_file_layer = open_log_file(log_name).map(|file_writer| { - prepare_formatted_layer( - format.as_deref(), - fmt::layer() - .with_span_events(span_events) - .with_writer(Arc::new(file_writer)) - .with_ansi(false), - ) - }); - - #[cfg(any(target_arch = "wasm32", not(feature = "tempo")))] - { - let _ = with_opentelemetry; - tracing_subscriber::registry() - .with(env_filter) - .with(maybe_log_file_layer) - .with(stderr_layer) - .init(); - } - - #[cfg(all(not(target_arch = "wasm32"), feature = "tempo"))] - { - if with_opentelemetry { - // Initialize OpenTelemetry within async context - let otlp_endpoint = std::env::var("OTEL_EXPORTER_OTLP_ENDPOINT") - .unwrap_or_else(|_| "http://tempo.tempo.svc.cluster.local:4317".to_string()); - - let exporter = SpanExporter::builder() - .with_tonic() - .with_endpoint(otlp_endpoint) - .build() - .expect("Failed to create OTLP exporter"); - - let resource = Resource::builder() - .with_service_name(log_name.to_string()) - .build(); - - let tracer_provider = SdkTracerProvider::builder() - .with_resource(resource) - .with_batch_exporter(exporter) - .with_sampler(sdktrace::Sampler::AlwaysOn) - .build(); - - // Set the global tracer provider - global::set_tracer_provider(tracer_provider.clone()); - - let tracer = tracer_provider.tracer("linera"); - - let telemetry_only_filter = - filter_fn(|metadata| metadata.is_span() && metadata.target() == "telemetry_only"); - - let otel_env_filter = tracing_subscriber::EnvFilter::builder() - .with_default_directive(tracing_subscriber::filter::LevelFilter::INFO.into()) - .from_env_lossy(); - - let opentelemetry_filter = otel_env_filter.or(telemetry_only_filter); - - let opentelemetry_layer = - OpenTelemetryLayer::new(tracer).with_filter(opentelemetry_filter); - - tracing_subscriber::registry() - .with(env_filter) - .with(maybe_log_file_layer) - .with(stderr_layer) - .with(opentelemetry_layer) - .init(); - } else { - tracing_subscriber::registry() - .with(env_filter) - .with(maybe_log_file_layer) - .with(stderr_layer) - .init(); - } + EnvConfig { + env_filter, + span_events, + format, + color_output, + log_name: log_name.to_string(), } } @@ -206,7 +117,7 @@ fn init_internal(log_name: &str, with_opentelemetry: bool) { /// and its name by the `log_name` parameter. /// /// Returns [`None`] if the `LINERA_LOG_DIR` environment variable is not set. -fn open_log_file(log_name: &str) -> Option { +pub(crate) fn open_log_file(log_name: &str) -> Option { let log_directory = env::var_os("LINERA_LOG_DIR")?; let mut log_file_path = Path::new(&log_directory).join(log_name); log_file_path.set_extension("log"); @@ -223,7 +134,7 @@ fn open_log_file(log_name: &str) -> Option { /// Applies a requested `formatting` to the log output of the provided `layer`. /// /// Returns a boxed [`Layer`] with the formatting applied to the original `layer`. -fn prepare_formatted_layer( +pub(crate) fn prepare_formatted_layer( formatting: Option<&str>, layer: fmt::Layer, W>, ) -> Box + Send + Sync> @@ -243,7 +154,7 @@ where } } -fn fmt_span_from_str(events: &str) -> FmtSpan { +pub(crate) fn fmt_span_from_str(events: &str) -> FmtSpan { let mut fmt_span = FmtSpan::NONE; for event in events.split(',') { fmt_span |= match event { diff --git a/linera-base/src/tracing_otel.rs b/linera-base/src/tracing_otel.rs new file mode 100644 index 000000000000..8e466fedb3c5 --- /dev/null +++ b/linera-base/src/tracing_otel.rs @@ -0,0 +1,99 @@ +// Copyright (c) Zefchain Labs, Inc. +// SPDX-License-Identifier: Apache-2.0 + +//! OpenTelemetry integration for tracing with OTLP export to Tempo and Chrome trace export. + +use tracing_chrome::ChromeLayerBuilder; +use tracing_subscriber::{layer::SubscriberExt as _, util::SubscriberInitExt as _}; +#[cfg(feature = "tempo")] +use { + opentelemetry::{global, trace::TracerProvider}, + opentelemetry_otlp::{SpanExporter, WithExportConfig}, + opentelemetry_sdk::{trace::SdkTracerProvider, Resource}, + tracing_opentelemetry::OpenTelemetryLayer, + tracing_subscriber::{ + filter::{filter_fn, FilterExt as _}, + layer::Layer, + }, +}; + +/// Initializes tracing with OpenTelemetry OTLP exporter to Tempo. +/// +/// Exports traces to Tempo using the OTLP protocol. Requires the `tempo` feature. +/// The `otlp_endpoint` parameter is required and must be provided by the caller. +#[cfg(feature = "tempo")] +pub fn init_with_opentelemetry(log_name: &str, otlp_endpoint: &str) { + let resource = Resource::builder() + .with_service_name(log_name.to_string()) + .build(); + + let exporter = SpanExporter::builder() + .with_tonic() + .with_endpoint(otlp_endpoint) + .build() + .expect("Failed to create OTLP exporter"); + + let tracer_provider = SdkTracerProvider::builder() + .with_resource(resource) + .with_batch_exporter(exporter) + .with_sampler(opentelemetry_sdk::trace::Sampler::AlwaysOn) + .build(); + + global::set_tracer_provider(tracer_provider.clone()); + let tracer = tracer_provider.tracer("linera"); + + let telemetry_only_filter = + filter_fn(|metadata| metadata.is_span() && metadata.target() == "telemetry_only"); + + let otel_env_filter = tracing_subscriber::EnvFilter::builder() + .with_default_directive(tracing_subscriber::filter::LevelFilter::INFO.into()) + .from_env_lossy(); + + let opentelemetry_filter = otel_env_filter.or(telemetry_only_filter); + let opentelemetry_layer = OpenTelemetryLayer::new(tracer).with_filter(opentelemetry_filter); + + let config = crate::tracing::get_env_config(log_name); + let maybe_log_file_layer = config.maybe_log_file_layer(); + let stderr_layer = config.stderr_layer(); + + tracing_subscriber::registry() + .with(opentelemetry_layer) + .with(config.env_filter) + .with(maybe_log_file_layer) + .with(stderr_layer) + .init(); +} + +/// Guard that flushes Chrome trace file when dropped. +/// +/// Store this guard in a variable that lives for the duration of your program. +/// When it's dropped, the trace file will be completed and closed. +pub type ChromeTraceGuard = tracing_chrome::FlushGuard; + +/// Initializes tracing with Chrome Trace JSON exporter. +/// +/// Returns a guard that must be kept alive for the duration of the program. +/// When the guard is dropped, the trace data is flushed and completed. +/// +/// Exports traces to Chrome Trace JSON format which can be visualized in: +/// - Chrome: `chrome://tracing` +/// - Perfetto UI: +pub fn init_with_chrome_trace_exporter(log_name: &str, writer: W) -> ChromeTraceGuard +where + W: std::io::Write + Send + 'static, +{ + let (chrome_layer, guard) = ChromeLayerBuilder::new().writer(writer).build(); + + let config = crate::tracing::get_env_config(log_name); + let maybe_log_file_layer = config.maybe_log_file_layer(); + let stderr_layer = config.stderr_layer(); + + tracing_subscriber::registry() + .with(chrome_layer) + .with(config.env_filter) + .with(maybe_log_file_layer) + .with(stderr_layer) + .init(); + + guard +} diff --git a/linera-client/src/client_options.rs b/linera-client/src/client_options.rs index bbe2ce86ab41..562ee1dabb8f 100644 --- a/linera-client/src/client_options.rs +++ b/linera-client/src/client_options.rs @@ -103,6 +103,19 @@ pub struct ClientContextOptions { #[arg(long, default_value = "10")] pub max_retries: u32, + /// Enable OpenTelemetry Chrome JSON exporter for trace data analysis. + #[arg(long)] + pub chrome_trace_exporter: bool, + + /// Output file path for Chrome trace JSON format. + /// Can be visualized in chrome://tracing or Perfetto UI. + #[arg(long, env = "LINERA_OTEL_TRACE_FILE")] + pub otel_trace_file: Option, + + /// OpenTelemetry OTLP exporter endpoint (requires tempo feature). + #[arg(long, env = "LINERA_OTEL_EXPORTER_OTLP_ENDPOINT")] + pub otel_exporter_otlp_endpoint: Option, + /// Whether to wait until a quorum of validators has confirmed that all sent cross-chain /// messages have been delivered. #[arg(long)] diff --git a/linera-faucet/server/Cargo.toml b/linera-faucet/server/Cargo.toml index 40bd9f61cf74..c6a805f91bd8 100644 --- a/linera-faucet/server/Cargo.toml +++ b/linera-faucet/server/Cargo.toml @@ -19,6 +19,7 @@ ignored = ["prometheus"] [features] metrics = ["prometheus", "linera-base/metrics", "linera-client/metrics"] +memory-profiling = [] [build-dependencies] cfg_aliases.workspace = true diff --git a/linera-faucet/server/src/lib.rs b/linera-faucet/server/src/lib.rs index 60041a8342b5..f28d9001a26e 100644 --- a/linera-faucet/server/src/lib.rs +++ b/linera-faucet/server/src/lib.rs @@ -780,6 +780,8 @@ where pending_requests: Arc>>, request_notifier: Arc, max_batch_size: usize, + #[cfg(feature = "memory-profiling")] + memory_profiling: bool, } impl Clone for FaucetService @@ -806,6 +808,8 @@ where pending_requests: Arc::clone(&self.pending_requests), request_notifier: Arc::clone(&self.request_notifier), max_batch_size: self.max_batch_size, + #[cfg(feature = "memory-profiling")] + memory_profiling: self.memory_profiling, } } } @@ -821,6 +825,8 @@ pub struct FaucetConfig { pub chain_listener_config: ChainListenerConfig, pub storage_path: PathBuf, pub max_batch_size: usize, + #[cfg(feature = "memory-profiling")] + pub memory_profiling: bool, } impl FaucetService @@ -877,6 +883,8 @@ where pending_requests, request_notifier, max_batch_size: config.max_batch_size, + #[cfg(feature = "memory-profiling")] + memory_profiling: config.memory_profiling, }) } @@ -912,7 +920,12 @@ where let index_handler = axum::routing::get(graphiql).post(Self::index_handler); #[cfg(feature = "metrics")] - monitoring_server::start_metrics(self.metrics_address(), cancellation_token.clone()); + monitoring_server::start_metrics( + self.metrics_address(), + cancellation_token.clone(), + #[cfg(feature = "memory-profiling")] + self.memory_profiling, + ); let app = Router::new() .route("/", index_handler) diff --git a/linera-metrics/src/monitoring_server.rs b/linera-metrics/src/monitoring_server.rs index 123241ae4b9e..5af99128cc5e 100644 --- a/linera-metrics/src/monitoring_server.rs +++ b/linera-metrics/src/monitoring_server.rs @@ -14,25 +14,30 @@ use crate::memory_profiler::MemoryProfiler; pub fn start_metrics( address: impl ToSocketAddrs + Debug + Send + 'static, shutdown_signal: CancellationToken, + #[cfg(feature = "memory-profiling")] enable_profiling: bool, ) { #[cfg(feature = "memory-profiling")] let app = { - // Try to add memory profiling endpoint - match MemoryProfiler::check_prof_ctl() { - Ok(()) => { - info!("Memory profiling available, enabling /debug/pprof and /debug/flamegraph endpoints"); - Router::new() - .route("/metrics", get(serve_metrics)) - .route("/debug/pprof", get(MemoryProfiler::heap_profile)) - .route("/debug/flamegraph", get(MemoryProfiler::heap_flamegraph)) - } - Err(e) => { - tracing::warn!( - "Memory profiling not available: {}, serving metrics-only", - e - ); - Router::new().route("/metrics", get(serve_metrics)) + if enable_profiling { + // Try to add memory profiling endpoint + match MemoryProfiler::check_prof_ctl() { + Ok(()) => { + info!("Memory profiling available, enabling /debug/pprof and /debug/flamegraph endpoints"); + Router::new() + .route("/metrics", get(serve_metrics)) + .route("/debug/pprof", get(MemoryProfiler::heap_profile)) + .route("/debug/flamegraph", get(MemoryProfiler::heap_flamegraph)) + } + Err(e) => { + tracing::warn!( + "Memory profiling not available: {}, serving metrics-only", + e + ); + Router::new().route("/metrics", get(serve_metrics)) + } } + } else { + Router::new().route("/metrics", get(serve_metrics)) } }; diff --git a/linera-sdk/tests/fixtures/Cargo.lock b/linera-sdk/tests/fixtures/Cargo.lock index 536405487642..19e63f9d2511 100644 --- a/linera-sdk/tests/fixtures/Cargo.lock +++ b/linera-sdk/tests/fixtures/Cargo.lock @@ -172,7 +172,7 @@ dependencies = [ "serde_urlencoded", "static_assertions_next", "tempfile", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -189,7 +189,7 @@ dependencies = [ "quote", "strum", "syn 2.0.106", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -417,7 +417,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85b6598a2f5d564fb7855dc6b06fd1c38cff5a72bd8b863a4d021938497b440a" dependencies = [ "serde", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -525,7 +525,7 @@ dependencies = [ "semver", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -1694,7 +1694,7 @@ dependencies = [ "pest_derive", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -2216,6 +2216,8 @@ dependencies = [ "is-terminal", "k256", "linera-witty", + "opentelemetry", + "opentelemetry_sdk", "port-selector", "prometheus", "proptest", @@ -2228,11 +2230,13 @@ dependencies = [ "serde_json", "serde_with", "test-strategy", - "thiserror", + "thiserror 1.0.69", "tokio", "tokio-stream", "tokio-util", "tracing", + "tracing-chrome", + "tracing-opentelemetry", "tracing-subscriber", "trait-variant", "zstd", @@ -2256,7 +2260,7 @@ dependencies = [ "rand_distr", "serde", "serde_bytes", - "thiserror", + "thiserror 1.0.69", "tokio", "tracing", ] @@ -2290,7 +2294,7 @@ dependencies = [ "serde_json", "test-log", "test-strategy", - "thiserror", + "thiserror 1.0.69", "tokio", "tokio-stream", "tonic", @@ -2330,7 +2334,7 @@ dependencies = [ "serde", "serde_bytes", "serde_json", - "thiserror", + "thiserror 1.0.69", "tokio", "tracing", "url", @@ -2364,7 +2368,7 @@ dependencies = [ "papaya", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "tokio", "wit-bindgen 0.24.0", ] @@ -2413,7 +2417,7 @@ dependencies = [ "linera-views", "prost", "serde", - "thiserror", + "thiserror 1.0.69", "tokio", "tonic", "tonic-build", @@ -2437,7 +2441,7 @@ dependencies = [ "serde", "serde_json", "sha3", - "thiserror", + "thiserror 1.0.69", "tracing", ] @@ -2468,7 +2472,7 @@ dependencies = [ "sync_wrapper 1.0.2", "sysinfo", "tempfile", - "thiserror", + "thiserror 1.0.69", "tokio", "tracing", "trait-variant", @@ -2516,7 +2520,7 @@ dependencies = [ "serde-wasm-bindgen", "shared-buffer", "target-lexicon", - "thiserror", + "thiserror 1.0.69", "tracing", "wasm-bindgen", "wasmer-derive", @@ -2549,7 +2553,7 @@ dependencies = [ "serde_bytes", "shared-buffer", "smallvec", - "thiserror", + "thiserror 1.0.69", "wasmer-types", "wasmparser 0.121.2", "windows-sys 0.59.0", @@ -2618,7 +2622,7 @@ dependencies = [ "region", "scopeguard", "serde", - "thiserror", + "thiserror 1.0.69", "wasmer-types", "windows-sys 0.59.0", ] @@ -2634,7 +2638,7 @@ dependencies = [ "linera-wasmer", "linera-witty-macros", "log", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -2893,6 +2897,38 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ce411919553d3f9fa53a0880544cda985a112117a0444d5ff1e870a893d6ea" +[[package]] +name = "opentelemetry" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aaf416e4cb72756655126f7dd7bb0af49c674f4c1b9903e80c009e0c37e552e6" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.17", + "tracing", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11f644aa9e5e31d11896e024305d7e3c98a88884d9f8919dbf37a9991bc47a4b" +dependencies = [ + "futures-channel", + "futures-executor", + "futures-util", + "opentelemetry", + "percent-encoding", + "rand 0.9.2", + "serde_json", + "thiserror 2.0.17", + "tokio", + "tokio-stream", +] + [[package]] name = "papaya" version = "0.1.9" @@ -3161,7 +3197,7 @@ dependencies = [ "memchr", "parking_lot", "protobuf", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -3304,6 +3340,7 @@ version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ + "rand_chacha 0.9.0", "rand_core 0.9.3", "serde", ] @@ -3737,7 +3774,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b5b14ebbcc4e4f2b3642fa99c388649da58d1dc3308c7d109f39f565d1710f0" dependencies = [ "serde", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -4225,7 +4262,16 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +dependencies = [ + "thiserror-impl 2.0.17", ] [[package]] @@ -4239,6 +4285,17 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "thiserror-impl" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "thread_local" version = "1.1.9" @@ -4563,6 +4620,17 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "tracing-chrome" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf0a738ed5d6450a9fb96e86a23ad808de2b727fd1394585da5cdd6788ffe724" +dependencies = [ + "serde_json", + "tracing-core", + "tracing-subscriber", +] + [[package]] name = "tracing-core" version = "0.1.34" @@ -4573,6 +4641,35 @@ dependencies = [ "valuable", ] +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-opentelemetry" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddcf5959f39507d0d04d6413119c04f33b623f4f951ebcbdddddfad2d0623a9c" +dependencies = [ + "js-sys", + "once_cell", + "opentelemetry", + "opentelemetry_sdk", + "smallvec", + "tracing", + "tracing-core", + "tracing-log", + "tracing-subscriber", + "web-time", +] + [[package]] name = "tracing-serde" version = "0.2.0" @@ -4596,9 +4693,11 @@ dependencies = [ "serde", "serde_json", "sharded-slab", + "smallvec", "thread_local", "tracing", "tracing-core", + "tracing-log", "tracing-serde", ] @@ -4907,7 +5006,7 @@ dependencies = [ "serde_bytes", "sha2", "target-lexicon", - "thiserror", + "thiserror 1.0.69", "xxhash-rust", ] @@ -4943,6 +5042,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "webpki-roots" version = "0.25.4" diff --git a/linera-service/Cargo.toml b/linera-service/Cargo.toml index 1c3d714209ab..3f2217420d6a 100644 --- a/linera-service/Cargo.toml +++ b/linera-service/Cargo.toml @@ -59,6 +59,7 @@ memory-profiling = [ "tikv-jemallocator/profiling", "linera-metrics/memory-profiling", ] +tempo = ["linera-base/tempo"] storage-service = ["linera-storage-service"] [dependencies] diff --git a/linera-service/src/cli/command.rs b/linera-service/src/cli/command.rs index e528272d6809..9d466a9c696e 100644 --- a/linera-service/src/cli/command.rs +++ b/linera-service/src/cli/command.rs @@ -1175,6 +1175,16 @@ pub enum NetCommand { #[cfg(feature = "kubernetes")] #[arg(long, default_value = "false")] dual_store: bool, + + /// Build Docker image with memory-profiling feature enabled. + #[cfg(feature = "kubernetes")] + #[arg(long, default_value = "false")] + with_memory_profiling: bool, + + /// Build Docker image with tempo feature enabled. + #[cfg(feature = "kubernetes")] + #[arg(long, default_value = "false")] + with_tempo: bool, }, /// Print a bash helper script to make `linera net up` easier to use. The script is diff --git a/linera-service/src/cli/main.rs b/linera-service/src/cli/main.rs index 505077eb3a2e..c40803260564 100644 --- a/linera-service/src/cli/main.rs +++ b/linera-service/src/cli/main.rs @@ -923,6 +923,8 @@ impl Runnable for Job { monitoring_server::start_metrics( metrics_address, shutdown_notifier.clone(), + #[cfg(feature = "memory-profiling")] + false, ); } @@ -1184,6 +1186,8 @@ impl Runnable for Job { monitoring_server::start_metrics( metrics_address, shutdown_notifier.clone(), + #[cfg(feature = "memory-profiling")] + false, ); } @@ -1286,6 +1290,8 @@ impl Runnable for Job { metrics_port, default_chain, context, + #[cfg(feature = "memory-profiling")] + options.memory_profiling, ); let cancellation_token = CancellationToken::new(); tokio::spawn(listen_for_shutdown_signals(cancellation_token.clone())); @@ -1329,6 +1335,8 @@ impl Runnable for Job { chain_listener_config: config, storage_path, max_batch_size, + #[cfg(feature = "memory-profiling")] + memory_profiling: options.memory_profiling, }; let faucet = FaucetService::new(config, context, storage).await?; let cancellation_token = CancellationToken::new(); @@ -1814,6 +1822,16 @@ struct ClientOptions { #[arg(long, env = "LINERA_CLIENT_TOKIO_BLOCKING_THREADS")] tokio_blocking_threads: Option, + /// Enable memory profiling with jemalloc (requires memory-profiling feature). + #[cfg(feature = "memory-profiling")] + #[arg(long)] + memory_profiling: bool, + + /// OpenTelemetry OTLP exporter endpoint for Tempo (requires tempo feature). + #[cfg(feature = "tempo")] + #[arg(long, env = "LINERA_OTEL_EXPORTER_OTLP_ENDPOINT")] + otel_exporter_otlp_endpoint: Option, + /// Subcommand. #[command(subcommand)] command: ClientCommand, @@ -2068,17 +2086,53 @@ async fn kill_all_processes(pids: &[u32]) { } } -fn should_init_opentelemetry(command: &ClientCommand) -> bool { - matches!(command, ClientCommand::Faucet { .. }) -} +#[cfg(not(target_arch = "wasm32"))] +fn init_tracing( + options: &ClientOptions, +) -> anyhow::Result> { + if matches!(&options.command, ClientCommand::Faucet { .. }) { + #[cfg(feature = "tempo")] + if let Some(endpoint) = &options.otel_exporter_otlp_endpoint { + linera_base::tracing::init_with_opentelemetry( + &options.command.log_file_name(), + endpoint, + ); + } else { + linera_base::tracing::init(&options.command.log_file_name()); + } -fn main() -> anyhow::Result<()> { - let options = ClientOptions::init(); + #[cfg(not(feature = "tempo"))] + linera_base::tracing::init(&options.command.log_file_name()); - if !should_init_opentelemetry(&options.command) { + Ok(None) + } else if options.context_options.chrome_trace_exporter { + let trace_file_path = options + .context_options + .otel_trace_file + .as_deref() + .map_or_else( + || format!("{}.trace.json", options.command.log_file_name()), + |s| s.to_string(), + ); + let writer = std::fs::File::create(&trace_file_path)?; + Ok(Some(linera_base::tracing::init_with_chrome_trace_exporter( + &options.command.log_file_name(), + writer, + ))) + } else { linera_base::tracing::init(&options.command.log_file_name()); + Ok(None) } +} + +#[cfg(target_arch = "wasm32")] +fn init_tracing(options: &ClientOptions) { + linera_base::tracing::init(&options.command.log_file_name()); +} +fn main() -> anyhow::Result { + let options = ClientOptions::init(); + let _guard = init_tracing(&options)?; let mut runtime = if options.tokio_threads == Some(1) { tokio::runtime::Builder::new_current_thread() } else { @@ -2102,25 +2156,20 @@ fn main() -> anyhow::Result<()> { let result = runtime .enable_all() - .build() - .expect("Failed to create Tokio runtime") + .build()? .block_on(run(&options).instrument(span)); - let error_code = match result { - Ok(code) => code, + Ok(match result { + Ok(0) => process::ExitCode::SUCCESS, + Ok(code) => process::ExitCode::from(code as u8), Err(msg) => { error!("Error is {:?}", msg); - 2 + process::ExitCode::FAILURE } - }; - process::exit(error_code); + }) } async fn run(options: &ClientOptions) -> Result { - if should_init_opentelemetry(&options.command) { - linera_base::tracing::init_with_opentelemetry(&options.command.log_file_name()).await; - } - match &options.command { ClientCommand::HelpMarkdown => { clap_markdown::print_help_markdown::(); diff --git a/linera-service/src/exporter/main.rs b/linera-service/src/exporter/main.rs index e0ff4006d965..78576df3244d 100644 --- a/linera-service/src/exporter/main.rs +++ b/linera-service/src/exporter/main.rs @@ -104,7 +104,12 @@ impl Runnable for ExporterContext { tokio::spawn(listen_for_shutdown_signals(shutdown_notifier.clone())); #[cfg(with_metrics)] - monitoring_server::start_metrics(self.config.metrics_address(), shutdown_notifier.clone()); + monitoring_server::start_metrics( + self.config.metrics_address(), + shutdown_notifier.clone(), + #[cfg(feature = "memory-profiling")] + false, + ); let (sender, handle) = start_block_processor_task( storage, diff --git a/linera-service/src/node_service.rs b/linera-service/src/node_service.rs index 643371f6ec30..c2ec5af59711 100644 --- a/linera-service/src/node_service.rs +++ b/linera-service/src/node_service.rs @@ -801,6 +801,8 @@ where metrics_port: NonZeroU16, default_chain: Option, context: Arc>, + #[cfg(feature = "memory-profiling")] + memory_profiling: bool, } impl Clone for NodeService @@ -815,6 +817,8 @@ where metrics_port: self.metrics_port, default_chain: self.default_chain, context: Arc::clone(&self.context), + #[cfg(feature = "memory-profiling")] + memory_profiling: self.memory_profiling, } } } @@ -830,6 +834,7 @@ where #[cfg(with_metrics)] metrics_port: NonZeroU16, default_chain: Option, context: C, + #[cfg(feature = "memory-profiling")] memory_profiling: bool, ) -> Self { Self { config, @@ -838,6 +843,8 @@ where metrics_port, default_chain, context: Arc::new(Mutex::new(context)), + #[cfg(feature = "memory-profiling")] + memory_profiling, } } @@ -876,7 +883,12 @@ where axum::routing::get(util::graphiql).post(Self::application_handler); #[cfg(with_metrics)] - monitoring_server::start_metrics(self.metrics_address(), cancellation_token.clone()); + monitoring_server::start_metrics( + self.metrics_address(), + cancellation_token.clone(), + #[cfg(feature = "memory-profiling")] + self.memory_profiling, + ); let app = Router::new() .route("/", index_handler) diff --git a/linera-service/src/proxy/grpc.rs b/linera-service/src/proxy/grpc.rs index a01e1a88aee4..d251839d1145 100644 --- a/linera-service/src/proxy/grpc.rs +++ b/linera-service/src/proxy/grpc.rs @@ -153,6 +153,8 @@ struct GrpcProxyInner { tls: TlsConfig, storage: S, id: usize, + #[cfg(feature = "memory-profiling")] + memory_profiling: bool, } impl GrpcProxy @@ -166,6 +168,7 @@ where tls: TlsConfig, storage: S, id: usize, + #[cfg(feature = "memory-profiling")] memory_profiling: bool, ) -> Self { Self(Arc::new(GrpcProxyInner { internal_config, @@ -176,6 +179,8 @@ where tls, storage, id, + #[cfg(feature = "memory-profiling")] + memory_profiling, })) } @@ -248,7 +253,12 @@ where let mut join_set = JoinSet::new(); #[cfg(with_metrics)] - monitoring_server::start_metrics(self.metrics_address(), shutdown_signal.clone()); + monitoring_server::start_metrics( + self.metrics_address(), + shutdown_signal.clone(), + #[cfg(feature = "memory-profiling")] + self.0.memory_profiling, + ); let (mut health_reporter, health_service) = tonic_health::server::health_reporter(); health_reporter diff --git a/linera-service/src/proxy/main.rs b/linera-service/src/proxy/main.rs index 0c0a54148068..a60fb09da4ac 100644 --- a/linera-service/src/proxy/main.rs +++ b/linera-service/src/proxy/main.rs @@ -96,6 +96,16 @@ pub struct ProxyOptions { /// Runs a specific proxy instance. #[arg(long)] id: Option, + + /// Enable memory profiling with jemalloc (requires memory-profiling feature). + #[cfg(feature = "memory-profiling")] + #[arg(long)] + memory_profiling: bool, + + /// OpenTelemetry OTLP exporter endpoint for Tempo (requires tempo feature). + #[cfg(feature = "tempo")] + #[arg(long, env = "LINERA_OTEL_EXPORTER_OTLP_ENDPOINT")] + otel_exporter_otlp_endpoint: Option, } /// A Linera Proxy, either gRPC or over 'Simple Transport', meaning TCP or UDP. @@ -114,6 +124,8 @@ struct ProxyContext { send_timeout: Duration, recv_timeout: Duration, id: usize, + #[cfg(feature = "memory-profiling")] + memory_profiling: bool, } impl ProxyContext { @@ -124,6 +136,8 @@ impl ProxyContext { send_timeout: options.send_timeout, recv_timeout: options.recv_timeout, id: options.id.unwrap_or(0), + #[cfg(feature = "memory-profiling")] + memory_profiling: options.memory_profiling, }) } } @@ -163,6 +177,8 @@ where tls, storage, context.id, + #[cfg(feature = "memory-profiling")] + context.memory_profiling, )) } ( @@ -182,6 +198,8 @@ where recv_timeout: context.recv_timeout, storage, id: context.id, + #[cfg(feature = "memory-profiling")] + memory_profiling: context.memory_profiling, })), _ => { bail!( @@ -207,6 +225,8 @@ where recv_timeout: Duration, storage: S, id: usize, + #[cfg(feature = "memory-profiling")] + memory_profiling: bool, } #[async_trait] @@ -265,7 +285,12 @@ where let address = self.get_listen_address(); #[cfg(with_metrics)] - monitoring_server::start_metrics(address, shutdown_signal.clone()); + monitoring_server::start_metrics( + address, + shutdown_signal.clone(), + #[cfg(feature = "memory-profiling")] + self.memory_profiling, + ); self.public_config .protocol @@ -484,8 +509,17 @@ impl ProxyOptions { let server_config: ValidatorServerConfig = util::read_json(&self.config_path).expect("Fail to read server config"); let public_key = &server_config.validator.public_key; - linera_base::tracing::init_with_opentelemetry(&format!("validator-{public_key}-proxy")) - .await; + let log_name = format!("validator-{public_key}-proxy"); + + #[cfg(feature = "tempo")] + if let Some(endpoint) = &self.otel_exporter_otlp_endpoint { + linera_base::tracing::init_with_opentelemetry(&log_name, endpoint); + } else { + linera_base::tracing::init(&log_name); + } + + #[cfg(not(feature = "tempo"))] + linera_base::tracing::init(&log_name); let store_config = self .storage_config diff --git a/linera-service/src/schema_export.rs b/linera-service/src/schema_export.rs index 5bcf3e964728..6962c55f4ea8 100644 --- a/linera-service/src/schema_export.rs +++ b/linera-service/src/schema_export.rs @@ -231,6 +231,8 @@ async fn main() -> std::io::Result<()> { std::num::NonZeroU16::new(8081).unwrap(), None, DummyContext, + #[cfg(feature = "memory-profiling")] + false, ); let schema = service.schema().sdl(); print!("{}", schema); diff --git a/linera-service/src/server.rs b/linera-service/src/server.rs index 9e517d7387f6..8780860709fc 100644 --- a/linera-service/src/server.rs +++ b/linera-service/src/server.rs @@ -71,6 +71,8 @@ struct ServerContext { grace_period: Duration, chain_worker_ttl: Duration, chain_info_max_received_log_entries: usize, + #[cfg(feature = "memory-profiling")] + memory_profiling: bool, } impl ServerContext { @@ -130,6 +132,8 @@ impl ServerContext { monitoring_server::start_metrics( (listen_address.clone(), port), shutdown_signal.clone(), + #[cfg(feature = "memory-profiling")] + self.memory_profiling, ); } @@ -176,6 +180,8 @@ impl ServerContext { monitoring_server::start_metrics( (listen_address.to_string(), port), shutdown_signal.clone(), + #[cfg(feature = "memory-profiling")] + self.memory_profiling, ); } @@ -274,6 +280,16 @@ struct ServerOptions { /// The number of Tokio blocking threads to use. #[arg(long, env = "LINERA_SERVER_TOKIO_BLOCKING_THREADS")] tokio_blocking_threads: Option, + + /// Enable memory profiling with jemalloc (requires memory-profiling feature). + #[cfg(feature = "memory-profiling")] + #[arg(long)] + memory_profiling: bool, + + /// OpenTelemetry OTLP exporter endpoint for Tempo (requires tempo feature). + #[cfg(feature = "tempo")] + #[arg(long, env = "LINERA_OTEL_EXPORTER_OTLP_ENDPOINT")] + otel_exporter_otlp_endpoint: Option, } #[derive(Debug, PartialEq, Eq, Deserialize)] @@ -492,7 +508,17 @@ fn log_file_name_for(command: &ServerCommand) -> Cow<'static, str> { } async fn run(options: ServerOptions) { - linera_base::tracing::init_with_opentelemetry(&log_file_name_for(&options.command)).await; + let log_name = log_file_name_for(&options.command); + + #[cfg(feature = "tempo")] + if let Some(endpoint) = &options.otel_exporter_otlp_endpoint { + linera_base::tracing::init_with_opentelemetry(&log_name, endpoint); + } else { + linera_base::tracing::init(&log_name); + } + + #[cfg(not(feature = "tempo"))] + linera_base::tracing::init(&log_name); match options.command { ServerCommand::Run { @@ -520,6 +546,8 @@ async fn run(options: ServerOptions) { grace_period, chain_worker_ttl, chain_info_max_received_log_entries, + #[cfg(feature = "memory-profiling")] + memory_profiling: options.memory_profiling, }; let wasm_runtime = wasm_runtime.with_wasm_default(); let store_config = storage_config diff --git a/linera-web/src/lib.rs b/linera-web/src/lib.rs index 9dc2f76c5e98..6a13ae67f262 100644 --- a/linera-web/src/lib.rs +++ b/linera-web/src/lib.rs @@ -97,6 +97,9 @@ pub const OPTIONS: ClientContextOptions = ClientContextOptions { wallet_state_path: None, keystore_path: None, with_wallet: None, + chrome_trace_exporter: false, + otel_trace_file: None, + otel_exporter_otlp_endpoint: None, }; #[wasm_bindgen(js_name = Faucet)]