@@ -46,6 +46,7 @@ type ExtProcServerRunner struct {
46
46
GrpcPort int
47
47
DestinationEndpointHintMetadataNamespace string
48
48
DestinationEndpointHintKey string
49
+ FairnessIDHeaderKey string
49
50
PoolNamespacedName types.NamespacedName
50
51
Datastore datastore.Datastore
51
52
SecureServing bool
@@ -63,24 +64,25 @@ type ExtProcServerRunner struct {
63
64
64
65
// Default values for CLI flags in main
65
66
const (
66
- DefaultGrpcPort = 9002 // default for --grpc-port
67
- DefaultGrpcHealthPort = 9003 // default for --grpc-health-port
68
- DefaultMetricsPort = 9090 // default for --metrics-port
69
- DefaultDestinationEndpointHintMetadataNamespace = "envoy.lb" // default for --destinationEndpointHintMetadataNamespace
70
- DefaultDestinationEndpointHintKey = "x-gateway-destination-endpoint" // default for --destination-endpoint-hint-key
71
- DefaultPoolName = "" // required but no default
72
- DefaultPoolNamespace = "default" // default for --pool-namespace
73
- DefaultRefreshMetricsInterval = 50 * time .Millisecond // default for --refresh-metrics-interval
74
- DefaultRefreshPrometheusMetricsInterval = 5 * time .Second // default for --refresh-prometheus-metrics-interval
75
- DefaultSecureServing = true // default for --secure-serving
76
- DefaultHealthChecking = false // default for --health-checking
77
- DefaultEnablePprof = true // default for --enable-pprof
78
- DefaultTotalQueuedRequestsMetric = "vllm:num_requests_waiting" // default for --total-queued-requests-metric
79
- DefaultKvCacheUsagePercentageMetric = "vllm:gpu_cache_usage_perc" // default for --kv-cache-usage-percentage-metric
80
- DefaultLoraInfoMetric = "vllm:lora_requests_info" // default for --lora-info-metric
81
- DefaultCertPath = "" // default for --cert-path
82
- DefaultConfigFile = "" // default for --config-file
83
- DefaultConfigText = "" // default for --config-text
67
+ DefaultGrpcPort = 9002 // default for --grpc-port
68
+ DefaultGrpcHealthPort = 9003 // default for --grpc-health-port
69
+ DefaultMetricsPort = 9090 // default for --metrics-port
70
+ DefaultDestinationEndpointHintMetadataNamespace = "envoy.lb" // default for --destinationEndpointHintMetadataNamespace
71
+ DefaultDestinationEndpointHintKey = "x-gateway-destination-endpoint" // default for --destination-endpoint-hint-key
72
+ DefaultFairnessIDHeaderKey = "x-gateway-inference-fairness-id" // default for --fairness-id-header-key
73
+ DefaultPoolName = "" // required but no default
74
+ DefaultPoolNamespace = "default" // default for --pool-namespace
75
+ DefaultRefreshMetricsInterval = 50 * time .Millisecond // default for --refresh-metrics-interval
76
+ DefaultRefreshPrometheusMetricsInterval = 5 * time .Second // default for --refresh-prometheus-metrics-interval
77
+ DefaultSecureServing = true // default for --secure-serving
78
+ DefaultHealthChecking = false // default for --health-checking
79
+ DefaultEnablePprof = true // default for --enable-pprof
80
+ DefaultTotalQueuedRequestsMetric = "vllm:num_requests_waiting" // default for --total-queued-requests-metric
81
+ DefaultKvCacheUsagePercentageMetric = "vllm:gpu_cache_usage_perc" // default for --kv-cache-usage-percentage-metric
82
+ DefaultLoraInfoMetric = "vllm:lora_requests_info" // default for --lora-info-metric
83
+ DefaultCertPath = "" // default for --cert-path
84
+ DefaultConfigFile = "" // default for --config-file
85
+ DefaultConfigText = "" // default for --config-text
84
86
DefaultMetricsStalenessThreshold = 2 * time .Second
85
87
)
86
88
@@ -91,6 +93,7 @@ func NewDefaultExtProcServerRunner() *ExtProcServerRunner {
91
93
GrpcPort : DefaultGrpcPort ,
92
94
DestinationEndpointHintKey : DefaultDestinationEndpointHintKey ,
93
95
DestinationEndpointHintMetadataNamespace : DefaultDestinationEndpointHintMetadataNamespace ,
96
+ FairnessIDHeaderKey : DefaultFairnessIDHeaderKey ,
94
97
PoolNamespacedName : types.NamespacedName {Name : DefaultPoolName , Namespace : DefaultPoolNamespace },
95
98
SecureServing : DefaultSecureServing ,
96
99
HealthChecking : DefaultHealthChecking ,
@@ -159,6 +162,7 @@ func (r *ExtProcServerRunner) AsRunnable(logger logr.Logger) manager.Runnable {
159
162
extProcServer := handlers .NewStreamingServer (
160
163
r .DestinationEndpointHintMetadataNamespace ,
161
164
r .DestinationEndpointHintKey ,
165
+ r .FairnessIDHeaderKey ,
162
166
r .Datastore ,
163
167
r .Director ,
164
168
)
0 commit comments