From ccaa5e5c71b01129308fd01e50bed21f8fda06d9 Mon Sep 17 00:00:00 2001 From: Ernest Wong Date: Tue, 29 Jul 2025 12:35:03 -0700 Subject: [PATCH 1/2] fix: split EPP RBAC into cluster and namespaced scoped permission Signed-off-by: Ernest Wong --- .../charts/inferencepool/templates/rbac.yaml | 38 +++++++++---- config/manifests/inferencepool-resources.yaml | 54 +++++++++++++------ test/e2e/epp/e2e_suite_test.go | 19 ++++++- test/testdata/inferencepool-e2e.yaml | 54 +++++++++++++------ test/utils/utils.go | 4 +- 5 files changed, 124 insertions(+), 45 deletions(-) diff --git a/config/charts/inferencepool/templates/rbac.yaml b/config/charts/inferencepool/templates/rbac.yaml index 217fd24c8..0c432f4b9 100644 --- a/config/charts/inferencepool/templates/rbac.yaml +++ b/config/charts/inferencepool/templates/rbac.yaml @@ -5,15 +5,6 @@ metadata: labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} rules: -- apiGroups: ["inference.networking.x-k8s.io"] - resources: ["inferencemodels", "inferencepools"] - verbs: ["get", "watch", "list"] -- apiGroups: ["inference.networking.k8s.io"] - resources: ["inferencepools"] - verbs: ["get", "watch", "list"] -- apiGroups: [""] - resources: ["pods"] - verbs: ["get", "watch", "list"] - apiGroups: - authentication.k8s.io resources: @@ -40,6 +31,35 @@ roleRef: kind: ClusterRole name: {{ include "gateway-api-inference-extension.name" . }} --- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "gateway-api-inference-extension.name" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} +rules: +- apiGroups: ["inference.networking.x-k8s.io"] + resources: ["inferencemodels", "inferencepools"] + verbs: ["get", "watch", "list"] +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "watch", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "gateway-api-inference-extension.name" . }} + namespace: {{ .Release.Namespace }} +subjects: +- kind: ServiceAccount + name: {{ include "gateway-api-inference-extension.name" . }} + namespace: {{ .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "gateway-api-inference-extension.name" . }} +--- apiVersion: v1 kind: ServiceAccount metadata: diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml index fa49b25c7..58c5a8d34 100644 --- a/config/manifests/inferencepool-resources.yaml +++ b/config/manifests/inferencepool-resources.yaml @@ -29,6 +29,12 @@ spec: appProtocol: http2 type: ClusterIP --- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: vllm-llama3-8b-instruct-epp + namespace: default +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -46,6 +52,7 @@ spec: labels: app: vllm-llama3-8b-instruct-epp spec: + serviceAccountName: vllm-llama3-8b-instruct-epp # Conservatively, this timeout should mirror the longest grace period of the pods within the pool terminationGracePeriodSeconds: 130 containers: @@ -174,23 +181,38 @@ data: weight: 1 - pluginRef: max-score-picker --- -kind: ClusterRole +kind: Role apiVersion: rbac.authorization.k8s.io/v1 metadata: name: pod-read + namespace: default +rules: +- apiGroups: [ "inference.networking.x-k8s.io" ] + resources: [ "inferencepools", "inferencemodels" ] + verbs: [ "get", "watch", "list" ] +- apiGroups: [ "" ] + resources: [ "pods" ] + verbs: [ "get", "watch", "list" ] +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: pod-read-binding + namespace: default +subjects: +- kind: ServiceAccount + name: vllm-llama3-8b-instruct-epp + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: pod-read +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: auth-reviewer rules: -- apiGroups: ["inference.networking.k8s.io"] - resources: ["inferencepools"] - verbs: ["get", "watch", "list"] -- apiGroups: ["inference.networking.x-k8s.io"] - resources: ["inferencepools"] - verbs: ["get", "watch", "list"] -- apiGroups: ["inference.networking.x-k8s.io"] - resources: ["inferencemodels"] - verbs: ["get", "watch", "list"] -- apiGroups: [""] - resources: ["pods"] - verbs: ["get", "watch", "list"] - apiGroups: - authentication.k8s.io resources: @@ -207,12 +229,12 @@ rules: kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: pod-read-binding + name: auth-reviewer-binding subjects: - kind: ServiceAccount - name: default + name: vllm-llama3-8b-instruct-epp namespace: default roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: pod-read + name: auth-reviewer diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go index e4ca60475..0e8ee12af 100644 --- a/test/e2e/epp/e2e_suite_test.go +++ b/test/e2e/epp/e2e_suite_test.go @@ -392,14 +392,29 @@ func createInferExt(k8sClient client.Client, filePath string) { ginkgo.By("Creating inference extension resources from manifest: " + filePath) createObjsFromYaml(k8sClient, outManifests) + // Wait for the serviceaccount to exist. + testutils.EventuallyExists(ctx, func() error { + return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: inferExtName}, &corev1.ServiceAccount{}) + }, existsTimeout, interval) + + // Wait for the role to exist. + testutils.EventuallyExists(ctx, func() error { + return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "pod-read"}, &rbacv1.Role{}) + }, existsTimeout, interval) + + // Wait for the rolebinding to exist. + testutils.EventuallyExists(ctx, func() error { + return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "pod-read-binding"}, &rbacv1.RoleBinding{}) + }, existsTimeout, interval) + // Wait for the clusterrole to exist. testutils.EventuallyExists(ctx, func() error { - return k8sClient.Get(ctx, types.NamespacedName{Name: "pod-read"}, &rbacv1.ClusterRole{}) + return k8sClient.Get(ctx, types.NamespacedName{Name: "auth-reviewer"}, &rbacv1.ClusterRole{}) }, existsTimeout, interval) // Wait for the clusterrolebinding to exist. testutils.EventuallyExists(ctx, func() error { - return k8sClient.Get(ctx, types.NamespacedName{Name: "pod-read-binding"}, &rbacv1.ClusterRoleBinding{}) + return k8sClient.Get(ctx, types.NamespacedName{Name: "auth-reviewer-binding"}, &rbacv1.ClusterRoleBinding{}) }, existsTimeout, interval) // Wait for the deployment to exist. diff --git a/test/testdata/inferencepool-e2e.yaml b/test/testdata/inferencepool-e2e.yaml index 3cd63f391..809ef3744 100644 --- a/test/testdata/inferencepool-e2e.yaml +++ b/test/testdata/inferencepool-e2e.yaml @@ -26,6 +26,12 @@ spec: appProtocol: http2 type: ClusterIP --- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: vllm-llama3-8b-instruct-epp + namespace: $E2E_NS +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -43,6 +49,7 @@ spec: labels: app: vllm-llama3-8b-instruct-epp spec: + serviceAccountName: vllm-llama3-8b-instruct-epp # Conservatively, this timeout should mirror the longest grace period of the pods within the pool terminationGracePeriodSeconds: 130 containers: @@ -171,23 +178,38 @@ data: weight: 1 - pluginRef: max-score-picker --- -kind: ClusterRole +kind: Role apiVersion: rbac.authorization.k8s.io/v1 metadata: name: pod-read + namespace: $E2E_NS +rules: +- apiGroups: [ "inference.networking.x-k8s.io" ] + resources: [ "inferencepools", "inferencemodels" ] + verbs: [ "get", "watch", "list" ] +- apiGroups: [ "" ] + resources: [ "pods" ] + verbs: [ "get", "watch", "list" ] +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: pod-read-binding + namespace: $E2E_NS +subjects: +- kind: ServiceAccount + name: vllm-llama3-8b-instruct-epp + namespace: $E2E_NS +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: pod-read +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: auth-reviewer rules: -- apiGroups: ["inference.networking.x-k8s.io"] - resources: ["inferencepools"] - verbs: ["get", "watch", "list"] -- apiGroups: ["inference.networking.x-k8s.io"] - resources: ["inferencemodels"] - verbs: ["get", "watch", "list"] -- apiGroups: ["inference.networking.k8s.io"] - resources: ["inferencepools"] - verbs: ["get", "watch", "list"] -- apiGroups: [""] - resources: ["pods"] - verbs: ["get", "watch", "list"] - apiGroups: - authentication.k8s.io resources: @@ -204,12 +226,12 @@ rules: kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: pod-read-binding + name: auth-reviewer-binding subjects: - kind: ServiceAccount - name: default + name: vllm-llama3-8b-instruct-epp namespace: $E2E_NS roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: pod-read + name: auth-reviewer diff --git a/test/utils/utils.go b/test/utils/utils.go index 71293f038..5b26c9a10 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -45,7 +45,7 @@ import ( func DeleteClusterResources(ctx context.Context, cli client.Client) error { binding := &rbacv1.ClusterRoleBinding{ ObjectMeta: metav1.ObjectMeta{ - Name: "pod-read-binding", + Name: "auth-reviewer-binding", }, } err := cli.Delete(ctx, binding, client.PropagationPolicy(metav1.DeletePropagationForeground)) @@ -54,7 +54,7 @@ func DeleteClusterResources(ctx context.Context, cli client.Client) error { } role := &rbacv1.ClusterRole{ ObjectMeta: metav1.ObjectMeta{ - Name: "pod-read", + Name: "auth-reviewer", }, } err = cli.Delete(ctx, role, client.PropagationPolicy(metav1.DeletePropagationForeground)) From 6524475e6d77739e2e520f191c54f67fa924faff Mon Sep 17 00:00:00 2001 From: Ernest Wong Date: Tue, 29 Jul 2025 13:42:08 -0700 Subject: [PATCH 2/2] New API group Signed-off-by: Ernest Wong --- config/manifests/inferencepool-resources.yaml | 5 ++++- test/testdata/inferencepool-e2e.yaml | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml index 58c5a8d34..9c6d7c218 100644 --- a/config/manifests/inferencepool-resources.yaml +++ b/config/manifests/inferencepool-resources.yaml @@ -188,7 +188,10 @@ metadata: namespace: default rules: - apiGroups: [ "inference.networking.x-k8s.io" ] - resources: [ "inferencepools", "inferencemodels" ] + resources: [ "inferencemodels", "inferencepools" ] + verbs: [ "get", "watch", "list" ] +- apiGroups: [ "inference.networking.k8s.io" ] + resources: [ "inferencepools" ] verbs: [ "get", "watch", "list" ] - apiGroups: [ "" ] resources: [ "pods" ] diff --git a/test/testdata/inferencepool-e2e.yaml b/test/testdata/inferencepool-e2e.yaml index 809ef3744..dd60aa09f 100644 --- a/test/testdata/inferencepool-e2e.yaml +++ b/test/testdata/inferencepool-e2e.yaml @@ -185,7 +185,10 @@ metadata: namespace: $E2E_NS rules: - apiGroups: [ "inference.networking.x-k8s.io" ] - resources: [ "inferencepools", "inferencemodels" ] + resources: [ "inferencemodels", "inferencepools" ] + verbs: [ "get", "watch", "list" ] +- apiGroups: [ "inference.networking.k8s.io" ] + resources: [ "inferencepools" ] verbs: [ "get", "watch", "list" ] - apiGroups: [ "" ] resources: [ "pods" ]