From ccaa5e5c71b01129308fd01e50bed21f8fda06d9 Mon Sep 17 00:00:00 2001
From: Ernest Wong <chwong719@gmail.com>
Date: Tue, 29 Jul 2025 12:35:03 -0700
Subject: [PATCH 1/2] fix: split EPP RBAC into cluster and namespaced scoped
 permission

Signed-off-by: Ernest Wong <chwong719@gmail.com>
---
 .../charts/inferencepool/templates/rbac.yaml  | 38 +++++++++----
 config/manifests/inferencepool-resources.yaml | 54 +++++++++++++------
 test/e2e/epp/e2e_suite_test.go                | 19 ++++++-
 test/testdata/inferencepool-e2e.yaml          | 54 +++++++++++++------
 test/utils/utils.go                           |  4 +-
 5 files changed, 124 insertions(+), 45 deletions(-)

diff --git a/config/charts/inferencepool/templates/rbac.yaml b/config/charts/inferencepool/templates/rbac.yaml
index 217fd24c8..0c432f4b9 100644
--- a/config/charts/inferencepool/templates/rbac.yaml
+++ b/config/charts/inferencepool/templates/rbac.yaml
@@ -5,15 +5,6 @@ metadata:
   labels:
     {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
 rules:
-- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencemodels", "inferencepools"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: ["inference.networking.k8s.io"]
-  resources: ["inferencepools"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["get", "watch", "list"]
 - apiGroups:
   - authentication.k8s.io
   resources:
@@ -40,6 +31,35 @@ roleRef:
   kind: ClusterRole
   name: {{ include "gateway-api-inference-extension.name" . }}
 ---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+rules:
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencemodels", "inferencepools"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "watch", "list"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+subjects:
+- kind: ServiceAccount
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: {{ include "gateway-api-inference-extension.name" . }}
+---
 apiVersion: v1
 kind: ServiceAccount
 metadata:
diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml
index fa49b25c7..58c5a8d34 100644
--- a/config/manifests/inferencepool-resources.yaml
+++ b/config/manifests/inferencepool-resources.yaml
@@ -29,6 +29,12 @@ spec:
       appProtocol: http2
   type: ClusterIP
 ---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: vllm-llama3-8b-instruct-epp
+  namespace: default
+---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -46,6 +52,7 @@ spec:
       labels:
         app: vllm-llama3-8b-instruct-epp
     spec:
+      serviceAccountName: vllm-llama3-8b-instruct-epp
       # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
       terminationGracePeriodSeconds: 130
       containers:
@@ -174,23 +181,38 @@ data:
         weight: 1
       - pluginRef: max-score-picker
 ---
-kind: ClusterRole
+kind: Role
 apiVersion: rbac.authorization.k8s.io/v1
 metadata:
   name: pod-read
+  namespace: default
+rules:
+- apiGroups: [ "inference.networking.x-k8s.io" ]
+  resources: [ "inferencepools", "inferencemodels" ]
+  verbs: [ "get", "watch", "list" ]
+- apiGroups: [ "" ]
+  resources: [ "pods" ]
+  verbs: [ "get", "watch", "list" ]
+---
+kind: RoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: pod-read-binding
+  namespace: default
+subjects:
+- kind: ServiceAccount
+  name: vllm-llama3-8b-instruct-epp
+  namespace: default
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: pod-read
+---
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: auth-reviewer
 rules:
-- apiGroups: ["inference.networking.k8s.io"]
-  resources: ["inferencepools"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencepools"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencemodels"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["get", "watch", "list"]
 - apiGroups:
   - authentication.k8s.io
   resources:
@@ -207,12 +229,12 @@ rules:
 kind: ClusterRoleBinding
 apiVersion: rbac.authorization.k8s.io/v1
 metadata:
-  name: pod-read-binding
+  name: auth-reviewer-binding
 subjects:
 - kind: ServiceAccount
-  name: default
+  name: vllm-llama3-8b-instruct-epp
   namespace: default
 roleRef:
   apiGroup: rbac.authorization.k8s.io
   kind: ClusterRole
-  name: pod-read
+  name: auth-reviewer
diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go
index e4ca60475..0e8ee12af 100644
--- a/test/e2e/epp/e2e_suite_test.go
+++ b/test/e2e/epp/e2e_suite_test.go
@@ -392,14 +392,29 @@ func createInferExt(k8sClient client.Client, filePath string) {
 	ginkgo.By("Creating inference extension resources from manifest: " + filePath)
 	createObjsFromYaml(k8sClient, outManifests)
 
+	// Wait for the serviceaccount to exist.
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: inferExtName}, &corev1.ServiceAccount{})
+	}, existsTimeout, interval)
+
+	// Wait for the role to exist.
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "pod-read"}, &rbacv1.Role{})
+	}, existsTimeout, interval)
+
+	// Wait for the rolebinding to exist.
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "pod-read-binding"}, &rbacv1.RoleBinding{})
+	}, existsTimeout, interval)
+
 	// Wait for the clusterrole to exist.
 	testutils.EventuallyExists(ctx, func() error {
-		return k8sClient.Get(ctx, types.NamespacedName{Name: "pod-read"}, &rbacv1.ClusterRole{})
+		return k8sClient.Get(ctx, types.NamespacedName{Name: "auth-reviewer"}, &rbacv1.ClusterRole{})
 	}, existsTimeout, interval)
 
 	// Wait for the clusterrolebinding to exist.
 	testutils.EventuallyExists(ctx, func() error {
-		return k8sClient.Get(ctx, types.NamespacedName{Name: "pod-read-binding"}, &rbacv1.ClusterRoleBinding{})
+		return k8sClient.Get(ctx, types.NamespacedName{Name: "auth-reviewer-binding"}, &rbacv1.ClusterRoleBinding{})
 	}, existsTimeout, interval)
 
 	// Wait for the deployment to exist.
diff --git a/test/testdata/inferencepool-e2e.yaml b/test/testdata/inferencepool-e2e.yaml
index 3cd63f391..809ef3744 100644
--- a/test/testdata/inferencepool-e2e.yaml
+++ b/test/testdata/inferencepool-e2e.yaml
@@ -26,6 +26,12 @@ spec:
       appProtocol: http2
   type: ClusterIP
 ---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: vllm-llama3-8b-instruct-epp
+  namespace: $E2E_NS
+---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -43,6 +49,7 @@ spec:
       labels:
         app: vllm-llama3-8b-instruct-epp
     spec:
+      serviceAccountName: vllm-llama3-8b-instruct-epp
       # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
       terminationGracePeriodSeconds: 130
       containers:
@@ -171,23 +178,38 @@ data:
         weight: 1
       - pluginRef: max-score-picker
 ---
-kind: ClusterRole
+kind: Role
 apiVersion: rbac.authorization.k8s.io/v1
 metadata:
   name: pod-read
+  namespace: $E2E_NS
+rules:
+- apiGroups: [ "inference.networking.x-k8s.io" ]
+  resources: [ "inferencepools", "inferencemodels" ]
+  verbs: [ "get", "watch", "list" ]
+- apiGroups: [ "" ]
+  resources: [ "pods" ]
+  verbs: [ "get", "watch", "list" ]
+---
+kind: RoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: pod-read-binding
+  namespace: $E2E_NS
+subjects:
+- kind: ServiceAccount
+  name: vllm-llama3-8b-instruct-epp
+  namespace: $E2E_NS
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: pod-read
+---
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: auth-reviewer
 rules:
-- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencepools"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencemodels"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: ["inference.networking.k8s.io"]
-  resources: ["inferencepools"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["get", "watch", "list"]
 - apiGroups:
   - authentication.k8s.io
   resources:
@@ -204,12 +226,12 @@ rules:
 kind: ClusterRoleBinding
 apiVersion: rbac.authorization.k8s.io/v1
 metadata:
-  name: pod-read-binding
+  name: auth-reviewer-binding
 subjects:
 - kind: ServiceAccount
-  name: default
+  name: vllm-llama3-8b-instruct-epp
   namespace: $E2E_NS
 roleRef:
   apiGroup: rbac.authorization.k8s.io
   kind: ClusterRole
-  name: pod-read
+  name: auth-reviewer
diff --git a/test/utils/utils.go b/test/utils/utils.go
index 71293f038..5b26c9a10 100644
--- a/test/utils/utils.go
+++ b/test/utils/utils.go
@@ -45,7 +45,7 @@ import (
 func DeleteClusterResources(ctx context.Context, cli client.Client) error {
 	binding := &rbacv1.ClusterRoleBinding{
 		ObjectMeta: metav1.ObjectMeta{
-			Name: "pod-read-binding",
+			Name: "auth-reviewer-binding",
 		},
 	}
 	err := cli.Delete(ctx, binding, client.PropagationPolicy(metav1.DeletePropagationForeground))
@@ -54,7 +54,7 @@ func DeleteClusterResources(ctx context.Context, cli client.Client) error {
 	}
 	role := &rbacv1.ClusterRole{
 		ObjectMeta: metav1.ObjectMeta{
-			Name: "pod-read",
+			Name: "auth-reviewer",
 		},
 	}
 	err = cli.Delete(ctx, role, client.PropagationPolicy(metav1.DeletePropagationForeground))

From 6524475e6d77739e2e520f191c54f67fa924faff Mon Sep 17 00:00:00 2001
From: Ernest Wong <chwong719@gmail.com>
Date: Tue, 29 Jul 2025 13:42:08 -0700
Subject: [PATCH 2/2] New API group

Signed-off-by: Ernest Wong <chwong719@gmail.com>
---
 config/manifests/inferencepool-resources.yaml | 5 ++++-
 test/testdata/inferencepool-e2e.yaml          | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml
index 58c5a8d34..9c6d7c218 100644
--- a/config/manifests/inferencepool-resources.yaml
+++ b/config/manifests/inferencepool-resources.yaml
@@ -188,7 +188,10 @@ metadata:
   namespace: default
 rules:
 - apiGroups: [ "inference.networking.x-k8s.io" ]
-  resources: [ "inferencepools", "inferencemodels" ]
+  resources: [ "inferencemodels", "inferencepools" ]
+  verbs: [ "get", "watch", "list" ]
+- apiGroups: [ "inference.networking.k8s.io" ]
+  resources: [ "inferencepools" ]
   verbs: [ "get", "watch", "list" ]
 - apiGroups: [ "" ]
   resources: [ "pods" ]
diff --git a/test/testdata/inferencepool-e2e.yaml b/test/testdata/inferencepool-e2e.yaml
index 809ef3744..dd60aa09f 100644
--- a/test/testdata/inferencepool-e2e.yaml
+++ b/test/testdata/inferencepool-e2e.yaml
@@ -185,7 +185,10 @@ metadata:
   namespace: $E2E_NS
 rules:
 - apiGroups: [ "inference.networking.x-k8s.io" ]
-  resources: [ "inferencepools", "inferencemodels" ]
+  resources: [ "inferencemodels", "inferencepools" ]
+  verbs: [ "get", "watch", "list" ]
+- apiGroups: [ "inference.networking.k8s.io" ]
+  resources: [ "inferencepools" ]
   verbs: [ "get", "watch", "list" ]
 - apiGroups: [ "" ]
   resources: [ "pods" ]