diff --git a/helm/templates/deployment-cache-server.yaml b/helm/templates/deployment-cache-server.yaml index cac0f49fb..6ed2ecf27 100644 --- a/helm/templates/deployment-cache-server.yaml +++ b/helm/templates/deployment-cache-server.yaml @@ -16,6 +16,9 @@ spec: labels: {{- include "chart.cacheserverLabels" . | nindent 8 }} spec: + {{- if .Values.cacheserverSpec.priorityClassName}} + priorityClassName: {{ .Values.cacheserverSpec.priorityClassName | quote }} + {{- end }} {{- if .Values.cacheserverSpec.nodeSelectorTerms}} affinity: nodeAffinity: diff --git a/helm/templates/deployment-router.yaml b/helm/templates/deployment-router.yaml index 64711b37e..4e27e5c0c 100644 --- a/helm/templates/deployment-router.yaml +++ b/helm/templates/deployment-router.yaml @@ -18,6 +18,9 @@ spec: {{- include "chart.routerLabels" . | nindent 8 }} spec: serviceAccountName: {{ .Release.Name }}-router-service-account + {{- if .Values.routerSpec.priorityClassName }} + priorityClassName: {{ .Values.routerSpec.priorityClassName | quote }} + {{- end }} {{- if .Values.routerSpec.nodeSelectorTerms }} affinity: nodeAffinity: diff --git a/helm/templates/deployment-vllm-multi.yaml b/helm/templates/deployment-vllm-multi.yaml index 1a1119ed3..ccbfa943b 100644 --- a/helm/templates/deployment-vllm-multi.yaml +++ b/helm/templates/deployment-vllm-multi.yaml @@ -43,6 +43,9 @@ spec: helm-release-name: {{ .Release.Name }} {{- include "chart.engineLabels" . | nindent 8 }} spec: + {{- if $modelSpec.priorityClassName }} + priorityClassName: {{ $modelSpec.priorityClassName | quote }} + {{- end }} {{- if hasKey $modelSpec "initContainer" }} {{- $container := $modelSpec.initContainer }} initContainers: diff --git a/helm/values.schema.json b/helm/values.schema.json index 14393dd77..a8ff97454 100644 --- a/helm/values.schema.json +++ b/helm/values.schema.json @@ -88,6 +88,9 @@ "requestGPUType": { "type": "string" }, + "priorityClassName": { + "type": "string" + }, "pvcStorage": { "type": "string" }, diff --git a/helm/values.yaml b/helm/values.yaml index b4862742b..4578a8093 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -18,6 +18,7 @@ servingEngineSpec: # Each entry in the modelSpec array should contain the following fields: # - annotations: (Optional, map) The annotations to add to the deployment, e.g., {model: "opt125m"} # - serviceAccountName: (Optional, string) The name of the service account to use for the deployment, e.g., "vllm-service-account" + # - priorityClassName: (Optional, string) The name of the priority class name for the deployment, e.g., "high-priority" # - podAnnotations: (Optional, map) The annotations to add to the pod, e.g., {model: "opt125m"} # - name: (string) The name of the model, e.g., "example-model" # - repository: (string) The repository of the model, e.g., "vllm/vllm-openai" @@ -267,6 +268,9 @@ routerSpec: # -- Number of replicas replicaCount: 1 + # -- Priority Class + priorityClassName: "" + # -- Container port containerPort: 8000