Skip to content

Commit bf1e79f

Browse files
authored
better JD health-checks (#2206)
1 parent 3780857 commit bf1e79f

File tree

11 files changed

+239
-15
lines changed

11 files changed

+239
-15
lines changed
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
name: Framework Golden Private Tests Examples
2+
# Groups tests that require access to private container registries
3+
on:
4+
push:
5+
6+
jobs:
7+
test:
8+
defaults:
9+
run:
10+
working-directory: framework/examples/myproject
11+
env:
12+
CTF_JD_IMAGE: "${{secrets.AWS_ACCOUNT_ID_PROD}}.dkr.ecr.us-west-2.amazonaws.com/job-distributor:0.22.1"
13+
runs-on: ubuntu-latest
14+
permissions:
15+
id-token: write
16+
contents: read
17+
strategy:
18+
fail-fast: false
19+
matrix:
20+
test:
21+
- name: TestPrivate
22+
config: jd.toml
23+
count: 1
24+
timeout: 10m
25+
# TODO: sdlc auth
26+
# - name: TestDockerFakes
27+
# config: fake_docker.toml
28+
# count: 1
29+
# timeout: 10m
30+
name: ${{ matrix.test.name }}
31+
steps:
32+
- name: Checkout repo
33+
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
34+
- name: Configure AWS credentials using OIDC
35+
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
36+
with:
37+
role-to-assume: ${{ secrets.AWS_CTF_READ_ACCESS_ROLE_ARN }}
38+
aws-region: us-west-2
39+
- name: Login to Amazon ECR
40+
id: login-ecr-private
41+
uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1
42+
with:
43+
registries: ${{ format('{0},{1}', secrets.AWS_ACCOUNT_ID_SDLC, secrets.AWS_ACCOUNT_ID_PROD) }}
44+
env:
45+
AWS_REGION: us-west-2
46+
- name: Check for changes in Framework
47+
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
48+
id: changes
49+
with:
50+
filters: |
51+
src:
52+
- 'framework/**'
53+
- '.github/workflows/framework-golden-tests.yml'
54+
- name: Set up Go
55+
uses: actions/setup-go@v5
56+
with:
57+
go-version: '1.24.0'
58+
- name: Cache Go modules
59+
uses: actions/cache@v4
60+
with:
61+
path: |
62+
~/.cache/go-build
63+
~/go/pkg/mod
64+
key: go-modules-${{ hashFiles('framework/examples/myproject/go.sum') }}-${{ runner.os }}-framework-golden-examples
65+
restore-keys: |
66+
go-modules-${{ runner.os }}-framework-golden-examples
67+
go-modules-${{ runner.os }}
68+
- name: Install dependencies
69+
run: go mod download
70+
- name: Run Tests
71+
if: steps.changes.outputs.src == 'true'
72+
env:
73+
CTF_CONFIGS: ${{ matrix.test.config }}
74+
run: |
75+
go test -timeout ${{ matrix.test.timeout }} -v -count ${{ matrix.test.count }} -run ${{ matrix.test.name }}
76+
- name: Upload Logs
77+
if: always()
78+
uses: actions/upload-artifact@v4
79+
with:
80+
name: container-logs-${{ matrix.test.name }}
81+
path: framework/examples/myproject/logs
82+
retention-days: 1

.github/workflows/framework-golden-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ jobs:
7575
config: fake.toml
7676
count: 1
7777
timeout: 10m
78-
# TODO: sdlc auth
78+
# TODO: sdlc auth (move to framework-golden-tests-private.yml, which has that auth set up)
7979
# - name: TestDockerFakes
8080
# config: fake_docker.toml
8181
# count: 1

framework/.changeset/v0.11.3.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
- Bump the Aptos node image to v1.36.6
1+
- Bump the Aptos node image to v1.36.6

framework/.changeset/v0.11.5.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
- Enhance JD health checks and add a CI test for it
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
package jd
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"time"
7+
8+
"github.com/docker/go-connections/nat"
9+
tc "github.com/testcontainers/testcontainers-go"
10+
tcwait "github.com/testcontainers/testcontainers-go/wait"
11+
"google.golang.org/grpc"
12+
"google.golang.org/grpc/credentials"
13+
"google.golang.org/grpc/credentials/insecure"
14+
"google.golang.org/grpc/health/grpc_health_v1"
15+
16+
"github.com/smartcontractkit/chainlink-testing-framework/framework"
17+
)
18+
19+
// GRPCHealthStrategy implements a wait strategy for gRPC health checks
20+
type GRPCHealthStrategy struct {
21+
Port nat.Port
22+
PollInterval time.Duration
23+
timeout time.Duration
24+
}
25+
26+
// NewGRPCHealthStrategy creates a new gRPC health check wait strategy
27+
func NewGRPCHealthStrategy(port nat.Port) *GRPCHealthStrategy {
28+
return &GRPCHealthStrategy{
29+
Port: port,
30+
PollInterval: 200 * time.Millisecond,
31+
timeout: 3 * time.Minute,
32+
}
33+
}
34+
35+
// WithTimeout sets the timeout for the gRPC health check strategy
36+
func (g *GRPCHealthStrategy) WithTimeout(timeout time.Duration) *GRPCHealthStrategy {
37+
g.timeout = timeout
38+
return g
39+
}
40+
41+
// WithPollInterval sets the poll interval for the gRPC health check strategy
42+
func (g *GRPCHealthStrategy) WithPollInterval(interval time.Duration) *GRPCHealthStrategy {
43+
g.PollInterval = interval
44+
return g
45+
}
46+
47+
// WaitUntilReady implements Strategy.WaitUntilReady
48+
func (g *GRPCHealthStrategy) WaitUntilReady(ctx context.Context, target tcwait.StrategyTarget) error {
49+
ctx, cancel := context.WithTimeout(ctx, g.timeout)
50+
defer cancel()
51+
52+
for {
53+
select {
54+
case <-ctx.Done():
55+
return ctx.Err()
56+
case <-time.After(g.PollInterval):
57+
// Check if container is still running
58+
state, err := target.State(ctx)
59+
if err != nil {
60+
return err
61+
}
62+
if !state.Running {
63+
return fmt.Errorf("container is not running: %s", state.Status)
64+
}
65+
66+
// Get host and port
67+
host, err := framework.GetHost(target.(tc.Container)) //nolint:contextcheck //don't want modify the signature of GetHost() yet
68+
if err != nil {
69+
continue
70+
}
71+
72+
mappedPort, err := target.MappedPort(ctx, g.Port)
73+
if err != nil {
74+
continue
75+
}
76+
77+
// Attempt gRPC health check
78+
address := fmt.Sprintf("%s:%s", host, mappedPort.Port())
79+
if err := g.checkHealth(ctx, address); err == nil {
80+
return nil
81+
}
82+
}
83+
}
84+
}
85+
86+
// checkHealth performs the actual gRPC health check
87+
func (g *GRPCHealthStrategy) checkHealth(ctx context.Context, address string) error {
88+
// Create a short timeout for the individual check
89+
checkCtx, cancel := context.WithTimeout(ctx, 1*time.Second)
90+
defer cancel()
91+
92+
// Use plaintext/insecure connection (standard for local testing and health checks)
93+
return g.tryHealthCheck(checkCtx, address, insecure.NewCredentials())
94+
}
95+
96+
// tryHealthCheck attempts a health check with specific credentials
97+
func (g *GRPCHealthStrategy) tryHealthCheck(ctx context.Context, address string, creds credentials.TransportCredentials) error {
98+
// Build dial options similar to the working JD connection code
99+
opts := []grpc.DialOption{
100+
grpc.WithTransportCredentials(creds),
101+
}
102+
103+
// Create the gRPC client connection
104+
conn, err := grpc.NewClient(address, opts...)
105+
if err != nil {
106+
return err
107+
}
108+
defer func() { _ = conn.Close() }()
109+
110+
// Create health check client
111+
healthClient := grpc_health_v1.NewHealthClient(conn)
112+
113+
// Perform health check
114+
resp, err := healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{})
115+
if err != nil {
116+
return err
117+
}
118+
119+
if resp.Status != grpc_health_v1.HealthCheckResponse_SERVING {
120+
return fmt.Errorf("service not serving, status: %v", resp.Status)
121+
}
122+
123+
return nil
124+
}

framework/components/jd/jd.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@ import (
44
"context"
55
"fmt"
66
"os"
7+
"time"
78

89
"github.com/docker/docker/api/types/container"
910
"github.com/docker/go-connections/nat"
1011
tc "github.com/testcontainers/testcontainers-go"
12+
"github.com/testcontainers/testcontainers-go/wait"
1113
tcwait "github.com/testcontainers/testcontainers-go/wait"
1214

1315
"github.com/smartcontractkit/chainlink-testing-framework/framework"
@@ -19,6 +21,7 @@ const (
1921
GRPCPort string = "14231"
2022
CSAEncryptionKey string = "!PASsword000!"
2123
WSRPCPort string = "8080"
24+
WSRPCHealthPort string = "8081"
2225
)
2326

2427
type Input struct {
@@ -75,6 +78,9 @@ func NewJD(in *Input) (*Output, error) {
7578
if jdImg != "" {
7679
in.Image = jdImg
7780
}
81+
if in.WSRPCPort == WSRPCHealthPort {
82+
return nil, fmt.Errorf("wsrpc port cannot be the same as wsrpc health port")
83+
}
7884
if in.DBInput == nil {
7985
in.DBInput = defaultJDDB()
8086
}
@@ -84,7 +90,8 @@ func NewJD(in *Input) (*Output, error) {
8490
return nil, err
8591
}
8692
containerName := framework.DefaultTCName("jd")
87-
bindPort := fmt.Sprintf("%s/tcp", in.GRPCPort)
93+
grpcPort := fmt.Sprintf("%s/tcp", in.GRPCPort)
94+
wsHealthPort := fmt.Sprintf("%s/tcp", WSRPCHealthPort)
8895
req := tc.ContainerRequest{
8996
Name: containerName,
9097
Image: in.Image,
@@ -93,11 +100,11 @@ func NewJD(in *Input) (*Output, error) {
93100
NetworkAliases: map[string][]string{
94101
framework.DefaultNetworkName: {containerName},
95102
},
96-
ExposedPorts: []string{bindPort},
103+
ExposedPorts: []string{grpcPort, wsHealthPort},
97104
HostConfigModifier: func(h *container.HostConfig) {
98105
// JobDistributor service is isolated from internet by default!
99106
framework.NoDNS(true, h)
100-
h.PortBindings = framework.MapTheSamePort(bindPort)
107+
h.PortBindings = framework.MapTheSamePort(grpcPort)
101108
},
102109
Env: map[string]string{
103110
"DATABASE_URL": pgOut.JDInternalURL,
@@ -107,6 +114,13 @@ func NewJD(in *Input) (*Output, error) {
107114
},
108115
WaitingFor: tcwait.ForAll(
109116
tcwait.ForListeningPort(nat.Port(fmt.Sprintf("%s/tcp", in.GRPCPort))),
117+
wait.ForHTTP("/healthz").
118+
WithPort(nat.Port(fmt.Sprintf("%s/tcp", WSRPCHealthPort))). // WSRPC health endpoint uses different port than WSRPC
119+
WithStartupTimeout(1*time.Minute).
120+
WithPollInterval(200*time.Millisecond),
121+
NewGRPCHealthStrategy(nat.Port(fmt.Sprintf("%s/tcp", in.GRPCPort))).
122+
WithTimeout(1*time.Minute).
123+
WithPollInterval(200*time.Millisecond),
110124
),
111125
}
112126
if req.Image == "" {
Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,20 @@
1-
package jd_test
1+
package examples
22

33
import (
44
"os"
55
"testing"
66

7-
"github.com/stretchr/testify/require"
8-
97
"github.com/smartcontractkit/chainlink-testing-framework/framework"
108
"github.com/smartcontractkit/chainlink-testing-framework/framework/components/jd"
9+
"github.com/stretchr/testify/require"
1110
)
1211

13-
// here we only test that we can boot up JD
14-
// client examples are under "examples" dir
15-
// since JD is private this env var should be set locally and in CI
16-
// TODO: add ComponentDocker prefix to turn this on when we'll have access to ECRs
17-
func TestJD(t *testing.T) {
12+
func TestPrivateJd(t *testing.T) {
1813
err := framework.DefaultNetwork(nil)
1914
require.NoError(t, err)
2015
_, err = jd.NewJD(&jd.Input{
21-
Image: os.Getenv("CTF_JD_IMAGE"),
16+
Image: os.Getenv("CTF_JD_IMAGE"),
17+
CSAEncryptionKey: "d1093c0060d50a3c89c189b2e485da5a3ce57f3dcb38ab7e2c0d5f0bb2314a44", // random key for tests
2218
})
2319
require.NoError(t, err)
2420
}

framework/examples/myproject_cll/go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ require (
119119
golang.org/x/text v0.26.0 // indirect
120120
golang.org/x/time v0.7.0 // indirect
121121
google.golang.org/genproto/googleapis/api v0.0.0-20250528174236-200df99c418a // indirect
122+
google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 // indirect
122123
google.golang.org/grpc v1.72.2 // indirect
123124
google.golang.org/protobuf v1.36.6 // indirect
124125
gopkg.in/guregu/null.v4 v4.0.0 // indirect

framework/examples/myproject_cll/go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,8 @@ go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/Wgbsd
334334
go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E=
335335
go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI=
336336
go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg=
337+
go.opentelemetry.io/otel/sdk/metric v1.34.0 h1:5CeK9ujjbFVL5c1PhLuStg1wxA7vQv7ce1EK0Gyvahk=
338+
go.opentelemetry.io/otel/sdk/metric v1.34.0/go.mod h1:jQ/r8Ze28zRKoNRdkjCZxfs6YvBTG1+YIqyFVFYec5w=
337339
go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
338340
go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
339341
go.opentelemetry.io/proto/otlp v1.7.0 h1:jX1VolD6nHuFzOYso2E73H85i92Mv8JQYk0K9vz09os=

framework/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ require (
3131
go.opentelemetry.io/otel/trace v1.35.0
3232
go.uber.org/multierr v1.11.0
3333
golang.org/x/sync v0.13.0
34+
google.golang.org/grpc v1.71.0
3435
gopkg.in/guregu/null.v4 v4.0.0
3536
)
3637

@@ -123,7 +124,6 @@ require (
123124
golang.org/x/text v0.24.0 // indirect
124125
google.golang.org/genproto/googleapis/api v0.0.0-20250106144421-5f5ef82da422 // indirect
125126
google.golang.org/genproto/googleapis/rpc v0.0.0-20250115164207-1a7da9e5054f // indirect
126-
google.golang.org/grpc v1.71.0 // indirect
127127
google.golang.org/protobuf v1.36.4 // indirect
128128
gopkg.in/ini.v1 v1.67.0 // indirect
129129
gopkg.in/yaml.v3 v3.0.1 // indirect

0 commit comments

Comments
 (0)