Skip to content

Commit 49432c8

Browse files
committed
Address PR comments
Signed-off-by: Ernest Wong <[email protected]>
1 parent d98664c commit 49432c8

File tree

2 files changed

+68
-18
lines changed

2 files changed

+68
-18
lines changed

config/manifests/inferencepool-resources.yaml

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Note: If you change this file, please also change the file used for e2e tests!
2-
#
2+
#
33
# https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/test/testdata/inferencepool-e2e.yaml
44
apiVersion: inference.networking.x-k8s.io/v1alpha2
55
kind: InferencePool
@@ -27,6 +27,12 @@ spec:
2727
appProtocol: http2
2828
type: ClusterIP
2929
---
30+
apiVersion: v1
31+
kind: ServiceAccount
32+
metadata:
33+
name: vllm-llama3-8b-instruct-epp
34+
namespace: default
35+
---
3036
apiVersion: apps/v1
3137
kind: Deployment
3238
metadata:
@@ -44,6 +50,7 @@ spec:
4450
labels:
4551
app: vllm-llama3-8b-instruct-epp
4652
spec:
53+
serviceAccountName: vllm-llama3-8b-instruct-epp
4754
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
4855
terminationGracePeriodSeconds: 130
4956
containers:
@@ -81,20 +88,38 @@ spec:
8188
initialDelaySeconds: 5
8289
periodSeconds: 10
8390
---
84-
kind: ClusterRole
91+
kind: Role
8592
apiVersion: rbac.authorization.k8s.io/v1
8693
metadata:
8794
name: pod-read
95+
namespace: default
8896
rules:
8997
- apiGroups: ["inference.networking.x-k8s.io"]
90-
resources: ["inferencepools"]
91-
verbs: ["get", "watch", "list"]
92-
- apiGroups: ["inference.networking.x-k8s.io"]
93-
resources: ["inferencemodels"]
98+
resources: ["inferencepools", "inferencemodels"]
9499
verbs: ["get", "watch", "list"]
95100
- apiGroups: [""]
96101
resources: ["pods"]
97102
verbs: ["get", "watch", "list"]
103+
---
104+
kind: RoleBinding
105+
apiVersion: rbac.authorization.k8s.io/v1
106+
metadata:
107+
name: pod-read-binding
108+
namespace: default
109+
subjects:
110+
- kind: ServiceAccount
111+
name: vllm-llama3-8b-instruct-epp
112+
namespace: default
113+
roleRef:
114+
apiGroup: rbac.authorization.k8s.io
115+
kind: Role
116+
name: pod-read
117+
---
118+
kind: ClusterRole
119+
apiVersion: rbac.authorization.k8s.io/v1
120+
metadata:
121+
name: auth-reviewer
122+
rules:
98123
- apiGroups:
99124
- authentication.k8s.io
100125
resources:
@@ -107,16 +132,16 @@ rules:
107132
- subjectaccessreviews
108133
verbs:
109134
- create
110-
---
135+
---
111136
kind: ClusterRoleBinding
112137
apiVersion: rbac.authorization.k8s.io/v1
113138
metadata:
114-
name: pod-read-binding
139+
name: auth-reviewer-binding
115140
subjects:
116141
- kind: ServiceAccount
117-
name: default
142+
name: vllm-llama3-8b-instruct-epp
118143
namespace: default
119144
roleRef:
120145
apiGroup: rbac.authorization.k8s.io
121146
kind: ClusterRole
122-
name: pod-read
147+
name: auth-reviewer

test/testdata/inferencepool-e2e.yaml

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ spec:
2626
appProtocol: http2
2727
type: ClusterIP
2828
---
29+
apiVersion: v1
30+
kind: ServiceAccount
31+
metadata:
32+
name: vllm-llama3-8b-instruct-epp
33+
namespace: $E2E_NS
34+
---
2935
apiVersion: apps/v1
3036
kind: Deployment
3137
metadata:
@@ -43,6 +49,7 @@ spec:
4349
labels:
4450
app: vllm-llama3-8b-instruct-epp
4551
spec:
52+
serviceAccountName: vllm-llama3-8b-instruct-epp
4653
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
4754
terminationGracePeriodSeconds: 130
4855
containers:
@@ -83,20 +90,38 @@ spec:
8390
initialDelaySeconds: 5
8491
periodSeconds: 10
8592
---
86-
kind: ClusterRole
93+
kind: Role
8794
apiVersion: rbac.authorization.k8s.io/v1
8895
metadata:
8996
name: pod-read
97+
namespace: $E2E_NS
9098
rules:
9199
- apiGroups: ["inference.networking.x-k8s.io"]
92-
resources: ["inferencepools"]
93-
verbs: ["get", "watch", "list"]
94-
- apiGroups: ["inference.networking.x-k8s.io"]
95-
resources: ["inferencemodels"]
100+
resources: ["inferencepools", "inferencemodels"]
96101
verbs: ["get", "watch", "list"]
97102
- apiGroups: [""]
98103
resources: ["pods"]
99104
verbs: ["get", "watch", "list"]
105+
---
106+
kind: RoleBinding
107+
apiVersion: rbac.authorization.k8s.io/v1
108+
metadata:
109+
name: pod-read-binding
110+
namespace: default
111+
subjects:
112+
- kind: ServiceAccount
113+
name: vllm-llama3-8b-instruct-epp
114+
namespace: $E2E_NS
115+
roleRef:
116+
apiGroup: rbac.authorization.k8s.io
117+
kind: Role
118+
name: pod-read
119+
---
120+
kind: ClusterRole
121+
apiVersion: rbac.authorization.k8s.io/v1
122+
metadata:
123+
name: auth-reviewer
124+
rules:
100125
- apiGroups:
101126
- authentication.k8s.io
102127
resources:
@@ -113,12 +138,12 @@ rules:
113138
kind: ClusterRoleBinding
114139
apiVersion: rbac.authorization.k8s.io/v1
115140
metadata:
116-
name: pod-read-binding
141+
name: auth-reviewer-binding
117142
subjects:
118143
- kind: ServiceAccount
119-
name: default
144+
name: vllm-llama3-8b-instruct-epp
120145
namespace: $E2E_NS
121146
roleRef:
122147
apiGroup: rbac.authorization.k8s.io
123148
kind: ClusterRole
124-
name: pod-read
149+
name: auth-reviewer

0 commit comments

Comments
 (0)