Skip to content

Commit 5176573

Browse files
authored
Upgrade to use Gateway Inference Extension 1.1.0 rc.1 (#384)
* Upgrade to GIE 1.1.0-rc.1 Signed-off-by: Shmuel Kallner <[email protected]> * API changes due to upgrade to GIE 1.1.0-rc.1 Signed-off-by: Shmuel Kallner <[email protected]> * Test changes due to upgrade to GIE 1.1.0-rc.1 Signed-off-by: Shmuel Kallner <[email protected]> * E2E tests mow exploit public test APIs in GIE Signed-off-by: Shmuel Kallner <[email protected]> --------- Signed-off-by: Shmuel Kallner <[email protected]>
1 parent 6e2bb8a commit 5176573

File tree

17 files changed

+150
-274
lines changed

17 files changed

+150
-274
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ test-integration: download-tokenizer install-dependencies ## Run integration tes
9999
go test -ldflags="$(LDFLAGS)" -v -tags=integration_tests ./test/integration/
100100

101101
.PHONY: test-e2e
102-
test-e2e: image-build ## Run end-to-end tests against a new kind cluster
102+
test-e2e: image-build sidecar-image-build ## Run end-to-end tests against a new kind cluster
103103
@printf "\033[33;1m==== Running End to End Tests ====\033[0m\n"
104104
./test/scripts/run_e2e.sh
105105

go.mod

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ require (
2222
k8s.io/client-go v0.34.1
2323
sigs.k8s.io/controller-runtime v0.22.3
2424
sigs.k8s.io/gateway-api v1.4.0
25-
sigs.k8s.io/gateway-api-inference-extension v0.0.0-20251016181044-831a919943ba
25+
sigs.k8s.io/gateway-api-inference-extension v1.1.0-rc.1
2626
)
2727

2828
require (
@@ -57,9 +57,9 @@ require (
5757
github.com/google/btree v1.1.3 // indirect
5858
github.com/google/cel-go v0.26.0 // indirect
5959
github.com/google/gnostic-models v0.7.0 // indirect
60-
github.com/google/pprof v0.0.0-20250820193118-f64d9cf942d6 // indirect
60+
github.com/google/pprof v0.0.0-20250923004556-9e5a51aed1e8 // indirect
6161
github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect
62-
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc // indirect
62+
github.com/grafana/regexp v0.0.0-20250905093917-f7b3be9d1853 // indirect
6363
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect
6464
github.com/inconshreveable/mousetrap v1.1.0 // indirect
6565
github.com/josharian/intern v1.0.0 // indirect
@@ -77,7 +77,7 @@ require (
7777
github.com/prometheus/client_model v0.6.2 // indirect
7878
github.com/prometheus/common v0.67.1 // indirect
7979
github.com/prometheus/procfs v0.17.0 // indirect
80-
github.com/prometheus/prometheus v0.306.0 // indirect
80+
github.com/prometheus/prometheus v0.307.1 // indirect
8181
github.com/redis/go-redis/v9 v9.11.0 // indirect
8282
github.com/spf13/cobra v1.9.1 // indirect
8383
github.com/spf13/pflag v1.0.7 // indirect
@@ -90,7 +90,7 @@ require (
9090
github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
9191
github.com/x448/float16 v0.8.4 // indirect
9292
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
93-
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect
93+
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect
9494
go.opentelemetry.io/otel v1.38.0 // indirect
9595
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect
9696
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 // indirect
@@ -105,19 +105,19 @@ require (
105105
go.uber.org/zap v1.27.0 // indirect
106106
go.yaml.in/yaml/v2 v2.4.3 // indirect
107107
go.yaml.in/yaml/v3 v3.0.4 // indirect
108-
golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8 // indirect
108+
golang.org/x/exp v0.0.0-20250808145144-a408d31f581a // indirect
109109
golang.org/x/mod v0.28.0 // indirect
110110
golang.org/x/net v0.44.0 // indirect
111111
golang.org/x/oauth2 v0.31.0 // indirect
112112
golang.org/x/sync v0.17.0 // indirect
113113
golang.org/x/sys v0.36.0 // indirect
114114
golang.org/x/term v0.35.0 // indirect
115115
golang.org/x/text v0.29.0 // indirect
116-
golang.org/x/time v0.12.0 // indirect
116+
golang.org/x/time v0.13.0 // indirect
117117
golang.org/x/tools v0.37.0 // indirect
118118
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
119-
google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect
120-
google.golang.org/genproto/googleapis/rpc v0.0.0-20250826171959-ef028d996bc1 // indirect
119+
google.golang.org/genproto/googleapis/api v0.0.0-20250929231259-57b25ae835d4 // indirect
120+
google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9 // indirect
121121
google.golang.org/protobuf v1.36.10 // indirect
122122
gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
123123
gopkg.in/inf.v0 v0.9.1 // indirect

go.sum

Lines changed: 64 additions & 64 deletions
Large diffs are not rendered by default.

pkg/plugins/pre-request/pd_prerequest.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66
"encoding/json"
77
"fmt"
88
"net"
9-
"strconv"
109

1110
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
1211
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/requestcontrol"
@@ -68,7 +67,7 @@ func (p *PrefillHeaderHandler) WithName(name string) *PrefillHeaderHandler {
6867
}
6968

7069
// PreRequest wires prefill SchedulerProfile result into a header to indicate prefill worker
71-
func (p *PrefillHeaderHandler) PreRequest(_ context.Context, request *types.LLMRequest, schedulingResult *types.SchedulingResult, targetPort int) {
70+
func (p *PrefillHeaderHandler) PreRequest(_ context.Context, request *types.LLMRequest, schedulingResult *types.SchedulingResult) {
7271
if _, found := request.Headers[common.PrefillPodHeader]; found {
7372
request.Headers[common.PrefillPodHeader] = "" // clear header, if already set
7473
}
@@ -78,6 +77,7 @@ func (p *PrefillHeaderHandler) PreRequest(_ context.Context, request *types.LLMR
7877
return // prefill profile failed to run or we chose not to run it, no-op in this case
7978
}
8079

81-
prefillHostPort := net.JoinHostPort(prefillProfileRunResult.TargetPods[0].GetPod().Address, strconv.Itoa(targetPort))
80+
targetPod := prefillProfileRunResult.TargetPods[0].GetPod()
81+
prefillHostPort := net.JoinHostPort(targetPod.Address, targetPod.Port)
8282
request.Headers[common.PrefillPodHeader] = prefillHostPort // in the form of <ip:port>
8383
}

pkg/plugins/scorer/active_request.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ func (s *ActiveRequest) Score(ctx context.Context, _ *types.CycleState, _ *types
167167
// It creates a new request entry in the cache with its own TTL and
168168
// increments the pod count for fast lookup.
169169
func (s *ActiveRequest) PreRequest(ctx context.Context, request *types.LLMRequest,
170-
schedulingResult *types.SchedulingResult, _ int) {
170+
schedulingResult *types.SchedulingResult) {
171171
debugLogger := log.FromContext(ctx).V(logutil.DEBUG)
172172

173173
for _, profileResult := range schedulingResult.ProfileResults { // schedulingResult guaranteed not to be nil

pkg/plugins/scorer/active_request_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ func TestActiveRequestScorer_PreRequest(t *testing.T) {
124124
}
125125

126126
// First request
127-
scorer.PreRequest(ctx, request, schedulingResult, 0)
127+
scorer.PreRequest(ctx, request, schedulingResult)
128128

129129
// Check cache and pod counts
130130
compositeKey := "default/pod-a.test-request-1"
@@ -151,7 +151,7 @@ func TestActiveRequestScorer_PreRequest(t *testing.T) {
151151
},
152152
}
153153

154-
scorer.PreRequest(ctx, request2, schedulingResult2, 0)
154+
scorer.PreRequest(ctx, request2, schedulingResult2)
155155

156156
// Check incremented count
157157
scorer.mutex.RLock()
@@ -192,7 +192,7 @@ func TestActiveRequestScorer_ResponseComplete(t *testing.T) {
192192
},
193193
}
194194

195-
scorer.PreRequest(ctx, request, schedulingResult, 0)
195+
scorer.PreRequest(ctx, request, schedulingResult)
196196

197197
// Verify initial state
198198
compositeKey := "default/pod-a.test-request-1"
@@ -248,7 +248,7 @@ func TestActiveRequestScorer_TTLExpiration(t *testing.T) {
248248
}
249249

250250
// Add request
251-
scorer.PreRequest(ctx, request, schedulingResult, 0)
251+
scorer.PreRequest(ctx, request, schedulingResult)
252252

253253
// Verify request is added
254254
scorer.mutex.RLock()

pkg/plugins/scorer/no_hit_lru.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ func (s *NoHitLRU) Score(ctx context.Context, cycleState *types.CycleState, requ
256256

257257
// PreRequest is called before a request is sent to the target pod.
258258
// For cold requests, it updates the LRU cache to track which pods have been used recently.
259-
func (s *NoHitLRU) PreRequest(ctx context.Context, request *types.LLMRequest, schedulingResult *types.SchedulingResult, _ int) {
259+
func (s *NoHitLRU) PreRequest(ctx context.Context, request *types.LLMRequest, schedulingResult *types.SchedulingResult) {
260260
logger := log.FromContext(ctx).V(logutil.DEBUG)
261261

262262
if schedulingResult == nil || len(schedulingResult.ProfileResults) == 0 {

pkg/plugins/scorer/no_hit_lru_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ func TestNoHitLRUPreferLeastRecentlyUsedAfterColdRequests(t *testing.T) {
337337
t.Run("initial cold request seeds cache", func(_ *testing.T) {
338338
coldReqA := &types.LLMRequest{RequestId: "cold-1"}
339339
scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReqA, pods)
340-
scorer.PreRequest(ctx, coldReqA, requestToPod(podA), 0)
340+
scorer.PreRequest(ctx, coldReqA, requestToPod(podA))
341341
// After podA handles a cold request, other pods should score higher for new cold requests
342342
assertHighestScoredPod(podB, "after-podA-used")
343343
})
@@ -367,7 +367,7 @@ func TestNoHitLRUPreferLeastRecentlyUsedAfterColdRequests(t *testing.T) {
367367
t.Fatalf("expected neutral score for warm request, got %f", score)
368368
}
369369
}
370-
scorer.PreRequest(ctx, warmReq, requestToPod(podB), 0)
370+
scorer.PreRequest(ctx, warmReq, requestToPod(podB))
371371
postWarmReq := &types.LLMRequest{RequestId: "cold-after-warm"}
372372
postWarmScores := scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), postWarmReq, pods)
373373
if postWarmScores[podB] <= postWarmScores[podA] {
@@ -379,7 +379,7 @@ func TestNoHitLRUPreferLeastRecentlyUsedAfterColdRequests(t *testing.T) {
379379
// Simulate podB handling a cold request
380380
coldReqB := &types.LLMRequest{RequestId: "cold-2"}
381381
scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReqB, pods)
382-
scorer.PreRequest(ctx, coldReqB, requestToPod(podB), 0)
382+
scorer.PreRequest(ctx, coldReqB, requestToPod(podB))
383383
// Now podC should score highest since both podA and podB have been used
384384
assertHighestScoredPod(podC, "after-podB-used")
385385
})
@@ -388,7 +388,7 @@ func TestNoHitLRUPreferLeastRecentlyUsedAfterColdRequests(t *testing.T) {
388388
// Simulate podC handling a cold request
389389
coldReqC := &types.LLMRequest{RequestId: "cold-3"}
390390
scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReqC, pods)
391-
scorer.PreRequest(ctx, coldReqC, requestToPod(podC), 0)
391+
scorer.PreRequest(ctx, coldReqC, requestToPod(podC))
392392
// Now podA should score highest again (LRU rotation)
393393
assertHighestScoredPod(podA, "after-podC-used")
394394
})

pkg/scheduling/pd/scheduler_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ func TestPDSchedule(t *testing.T) {
266266

267267
if test.wantRes2 != nil { // Checking the prefix match in the decode pod.
268268
// make sure prefix plugin stores the prefix hit in cache, so we can test it in the following schedule call
269-
prefixScorer.PreRequest(ctx, test.req, got, 0)
269+
prefixScorer.PreRequest(ctx, test.req, got)
270270
time.Sleep(time.Second)
271271

272272
got, err = scheduler.Schedule(ctx, test.req, test.input)

test/e2e/e2e_suite_test.go

Lines changed: 24 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package e2e
22

33
import (
4-
"context"
54
"fmt"
65
"io"
76
"os/exec"
@@ -14,22 +13,19 @@ import (
1413
"github.com/onsi/gomega"
1514
"github.com/onsi/gomega/gexec"
1615
apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
17-
k8sruntime "k8s.io/apimachinery/pkg/runtime"
1816
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
19-
"sigs.k8s.io/controller-runtime/pkg/client"
2017
"sigs.k8s.io/controller-runtime/pkg/client/config"
2118
k8slog "sigs.k8s.io/controller-runtime/pkg/log"
19+
20+
infextv1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
2221
infextv1a2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
2322
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/env"
23+
testutils "sigs.k8s.io/gateway-api-inference-extension/test/utils"
2424
)
2525

2626
const (
27-
// defaultExistsTimeout is the default timeout for a resource to exist in the api server.
28-
defaultExistsTimeout = 30 * time.Second
2927
// defaultReadyTimeout is the default timeout for a resource to report a ready state.
3028
defaultReadyTimeout = 3 * time.Minute
31-
// defaultModelReadyTimeout is the default timeout for the model server deployment to report a ready state.
32-
defaultModelReadyTimeout = 10 * time.Minute
3329
// defaultInterval is the default interval to check if a resource exists or ready conditions.
3430
defaultInterval = time.Millisecond * 250
3531
// xInferPoolManifest is the manifest for the inference pool CRD with 'inference.networking.x-k8s.io' group.
@@ -57,19 +53,16 @@ const (
5753
)
5854

5955
var (
60-
ctx = context.Background()
61-
k8sClient client.Client
62-
port string
63-
scheme = k8sruntime.NewScheme()
56+
port string
57+
58+
testConfig *testutils.TestConfig
6459

6560
eppTag = env.GetEnvString("EPP_TAG", "dev", ginkgo.GinkgoLogr)
6661
vllmSimTag = env.GetEnvString("VLLM_SIMULATOR_TAG", "dev", ginkgo.GinkgoLogr)
67-
routingSideCarTag = env.GetEnvString("ROUTING_SIDECAR_TAG", "v0.2.0", ginkgo.GinkgoLogr)
62+
routingSideCarTag = env.GetEnvString("ROUTING_SIDECAR_TAG", "dev", ginkgo.GinkgoLogr)
6863

69-
existsTimeout = env.GetEnvDuration("EXISTS_TIMEOUT", defaultExistsTimeout, ginkgo.GinkgoLogr)
70-
readyTimeout = env.GetEnvDuration("READY_TIMEOUT", defaultReadyTimeout, ginkgo.GinkgoLogr)
71-
modelReadyTimeout = env.GetEnvDuration("MODEL_READY_TIMEOUT", defaultModelReadyTimeout, ginkgo.GinkgoLogr)
72-
interval = defaultInterval
64+
readyTimeout = env.GetEnvDuration("READY_TIMEOUT", defaultReadyTimeout, ginkgo.GinkgoLogr)
65+
interval = defaultInterval
7366
)
7467

7568
func TestEndToEnd(t *testing.T) {
@@ -83,16 +76,17 @@ var _ = ginkgo.BeforeSuite(func() {
8376
port = "30080"
8477

8578
setupK8sCluster()
79+
testConfig = testutils.NewTestConfig(nsName)
8680
setupK8sClient()
8781
createCRDs()
8882
createEnvoy()
89-
applyYAMLFile(rbacManifest)
90-
applyYAMLFile(serviceAccountManifest)
91-
applyYAMLFile(servicesManifest)
83+
testutils.ApplyYAMLFile(testConfig, rbacManifest)
84+
testutils.ApplyYAMLFile(testConfig, serviceAccountManifest)
85+
testutils.ApplyYAMLFile(testConfig, servicesManifest)
9286

93-
infPoolYaml := readYaml(inferExtManifest)
87+
infPoolYaml := testutils.ReadYaml(inferExtManifest)
9488
infPoolYaml = substituteMany(infPoolYaml, map[string]string{"${POOL_NAME}": modelName + "-inference-pool"})
95-
createObjsFromYaml(infPoolYaml)
89+
testutils.CreateObjsFromYaml(testConfig, infPoolYaml)
9690
})
9791

9892
var _ = ginkgo.AfterSuite(func() {
@@ -147,32 +141,33 @@ func setupK8sClient() {
147141
k8sCfg := config.GetConfigOrDie()
148142
gomega.ExpectWithOffset(1, k8sCfg).NotTo(gomega.BeNil())
149143

150-
err := clientgoscheme.AddToScheme(scheme)
144+
err := clientgoscheme.AddToScheme(testConfig.Scheme)
151145
gomega.Expect(err).NotTo(gomega.HaveOccurred())
152146

153-
err = apiextv1.AddToScheme(scheme)
147+
err = infextv1.Install(testConfig.Scheme)
154148
gomega.Expect(err).NotTo(gomega.HaveOccurred())
155149

156-
err = infextv1a2.Install(scheme)
150+
err = apiextv1.AddToScheme(testConfig.Scheme)
157151
gomega.Expect(err).NotTo(gomega.HaveOccurred())
158152

159-
k8sClient, err = client.New(k8sCfg, client.Options{Scheme: scheme})
153+
err = infextv1a2.Install(testConfig.Scheme)
160154
gomega.Expect(err).NotTo(gomega.HaveOccurred())
161-
gomega.Expect(k8sClient).NotTo(gomega.BeNil())
155+
156+
testConfig.CreateCli()
162157

163158
k8slog.SetLogger(ginkgo.GinkgoLogr)
164159
}
165160

166161
// createCRDs creates the Inference Extension CRDs used for testing.
167162
func createCRDs() {
168163
crds := runKustomize(gieCrdsKustomize)
169-
createObjsFromYaml(crds)
164+
testutils.CreateObjsFromYaml(testConfig, crds)
170165
}
171166

172167
func createEnvoy() {
173-
manifests := readYaml(envoyManifest)
168+
manifests := testutils.ReadYaml(envoyManifest)
174169
ginkgo.By("Creating envoy proxy resources from manifest: " + envoyManifest)
175-
createObjsFromYaml(manifests)
170+
testutils.CreateObjsFromYaml(testConfig, manifests)
176171
}
177172

178173
const kindClusterConfig = `

0 commit comments

Comments
 (0)