Skip to content

Commit af8cc85

Browse files
committed
Address PR comments
Signed-off-by: Ernest Wong <[email protected]>
1 parent d98664c commit af8cc85

File tree

2 files changed

+68
-21
lines changed

2 files changed

+68
-21
lines changed

config/manifests/inferencepool-resources.yaml

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Note: If you change this file, please also change the file used for e2e tests!
2-
#
2+
#
33
# https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/test/testdata/inferencepool-e2e.yaml
44
apiVersion: inference.networking.x-k8s.io/v1alpha2
55
kind: InferencePool
@@ -27,6 +27,12 @@ spec:
2727
appProtocol: http2
2828
type: ClusterIP
2929
---
30+
apiVersion: v1
31+
kind: ServiceAccount
32+
metadata:
33+
name: vllm-llama3-8b-instruct-epp
34+
namespace: default
35+
---
3036
apiVersion: apps/v1
3137
kind: Deployment
3238
metadata:
@@ -44,6 +50,7 @@ spec:
4450
labels:
4551
app: vllm-llama3-8b-instruct-epp
4652
spec:
53+
serviceAccountName: vllm-llama3-8b-instruct-epp
4754
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
4855
terminationGracePeriodSeconds: 130
4956
containers:
@@ -81,20 +88,38 @@ spec:
8188
initialDelaySeconds: 5
8289
periodSeconds: 10
8390
---
84-
kind: ClusterRole
91+
kind: Role
8592
apiVersion: rbac.authorization.k8s.io/v1
8693
metadata:
8794
name: pod-read
95+
namespace: default
8896
rules:
8997
- apiGroups: ["inference.networking.x-k8s.io"]
90-
resources: ["inferencepools"]
91-
verbs: ["get", "watch", "list"]
92-
- apiGroups: ["inference.networking.x-k8s.io"]
93-
resources: ["inferencemodels"]
98+
resources: ["inferencepools", "inferencemodels"]
9499
verbs: ["get", "watch", "list"]
95100
- apiGroups: [""]
96101
resources: ["pods"]
97102
verbs: ["get", "watch", "list"]
103+
---
104+
kind: RoleBinding
105+
apiVersion: rbac.authorization.k8s.io/v1
106+
metadata:
107+
name: pod-read-binding
108+
namespace: default
109+
subjects:
110+
- kind: ServiceAccount
111+
name: vllm-llama3-8b-instruct-epp
112+
namespace: default
113+
roleRef:
114+
apiGroup: rbac.authorization.k8s.io
115+
kind: Role
116+
name: pod-read
117+
---
118+
kind: ClusterRole
119+
apiVersion: rbac.authorization.k8s.io/v1
120+
metadata:
121+
name: auth-reviewer
122+
rules:
98123
- apiGroups:
99124
- authentication.k8s.io
100125
resources:
@@ -107,16 +132,16 @@ rules:
107132
- subjectaccessreviews
108133
verbs:
109134
- create
110-
---
135+
---
111136
kind: ClusterRoleBinding
112137
apiVersion: rbac.authorization.k8s.io/v1
113138
metadata:
114-
name: pod-read-binding
139+
name: auth-reviewer-binding
115140
subjects:
116141
- kind: ServiceAccount
117-
name: default
142+
name: vllm-llama3-8b-instruct-epp
118143
namespace: default
119144
roleRef:
120145
apiGroup: rbac.authorization.k8s.io
121146
kind: ClusterRole
122-
name: pod-read
147+
name: auth-reviewer

test/testdata/inferencepool-e2e.yaml

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ spec:
2626
appProtocol: http2
2727
type: ClusterIP
2828
---
29+
apiVersion: v1
30+
kind: ServiceAccount
31+
metadata:
32+
name: vllm-llama3-8b-instruct-epp
33+
namespace: $E2E_NS
34+
---
2935
apiVersion: apps/v1
3036
kind: Deployment
3137
metadata:
@@ -43,6 +49,7 @@ spec:
4349
labels:
4450
app: vllm-llama3-8b-instruct-epp
4551
spec:
52+
serviceAccountName: vllm-llama3-8b-instruct-epp
4653
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
4754
terminationGracePeriodSeconds: 130
4855
containers:
@@ -62,9 +69,6 @@ spec:
6269
- "9002"
6370
- -grpcHealthPort
6471
- "9003"
65-
env:
66-
- name: USE_STREAMING
67-
value: "true"
6872
ports:
6973
- containerPort: 9002
7074
- containerPort: 9003
@@ -83,20 +87,38 @@ spec:
8387
initialDelaySeconds: 5
8488
periodSeconds: 10
8589
---
86-
kind: ClusterRole
90+
kind: Role
8791
apiVersion: rbac.authorization.k8s.io/v1
8892
metadata:
8993
name: pod-read
94+
namespace: $E2E_NS
9095
rules:
9196
- apiGroups: ["inference.networking.x-k8s.io"]
92-
resources: ["inferencepools"]
93-
verbs: ["get", "watch", "list"]
94-
- apiGroups: ["inference.networking.x-k8s.io"]
95-
resources: ["inferencemodels"]
97+
resources: ["inferencepools", "inferencemodels"]
9698
verbs: ["get", "watch", "list"]
9799
- apiGroups: [""]
98100
resources: ["pods"]
99101
verbs: ["get", "watch", "list"]
102+
---
103+
kind: RoleBinding
104+
apiVersion: rbac.authorization.k8s.io/v1
105+
metadata:
106+
name: pod-read-binding
107+
namespace: default
108+
subjects:
109+
- kind: ServiceAccount
110+
name: vllm-llama3-8b-instruct-epp
111+
namespace: $E2E_NS
112+
roleRef:
113+
apiGroup: rbac.authorization.k8s.io
114+
kind: Role
115+
name: pod-read
116+
---
117+
kind: ClusterRole
118+
apiVersion: rbac.authorization.k8s.io/v1
119+
metadata:
120+
name: auth-reviewer
121+
rules:
100122
- apiGroups:
101123
- authentication.k8s.io
102124
resources:
@@ -113,12 +135,12 @@ rules:
113135
kind: ClusterRoleBinding
114136
apiVersion: rbac.authorization.k8s.io/v1
115137
metadata:
116-
name: pod-read-binding
138+
name: auth-reviewer-binding
117139
subjects:
118140
- kind: ServiceAccount
119-
name: default
141+
name: vllm-llama3-8b-instruct-epp
120142
namespace: $E2E_NS
121143
roleRef:
122144
apiGroup: rbac.authorization.k8s.io
123145
kind: ClusterRole
124-
name: pod-read
146+
name: auth-reviewer

0 commit comments

Comments
 (0)