diff --git a/cmd/llm-d-routing-sidecar/main.go b/cmd/llm-d-routing-sidecar/main.go index 8d9f38c..651e873 100644 --- a/cmd/llm-d-routing-sidecar/main.go +++ b/cmd/llm-d-routing-sidecar/main.go @@ -60,7 +60,7 @@ func main() { if *connector == proxy.ConnectorNIXLV1 { logger.Info("Warning: nixl connector is deprecated and will be removed in a future release in favor of --connector=nixlv2") } - logger.Info("p/d connector validated", "connector", connector) + logger.Info("p/d connector validated", "connector", *connector) // Determine namespace and pool name for SSRF protection if *enableSSRFProtection { diff --git a/go.mod b/go.mod index 352e439..ab9d2c1 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,8 @@ require ( github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/onsi/ginkgo/v2 v2.23.4 github.com/onsi/gomega v1.37.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0 + go.opentelemetry.io/otel v1.36.0 k8s.io/apimachinery v0.31.3 k8s.io/client-go v0.31.3 k8s.io/klog/v2 v2.130.1 @@ -17,7 +19,9 @@ require ( require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.12.0 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/go-openapi/jsonreference v0.21.0 // indirect github.com/go-openapi/swag v0.23.0 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect @@ -29,19 +33,19 @@ require ( github.com/json-iterator/go v1.1.12 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect - github.com/rogpeppe/go-internal v1.13.1 // indirect github.com/spf13/pflag v1.0.5 // indirect - github.com/stretchr/testify v1.10.0 // indirect github.com/x448/float16 v0.8.4 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/otel/metric v1.36.0 // indirect + go.opentelemetry.io/otel/trace v1.36.0 // indirect go.uber.org/automaxprocs v1.6.0 // indirect golang.org/x/net v0.38.0 // indirect golang.org/x/oauth2 v0.27.0 // indirect - golang.org/x/sys v0.32.0 // indirect + golang.org/x/sys v0.33.0 // indirect golang.org/x/term v0.30.0 // indirect golang.org/x/text v0.23.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.31.0 // indirect - gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 1173fd5..2a6a466 100644 --- a/go.sum +++ b/go.sum @@ -4,10 +4,15 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/emicklei/go-restful/v3 v3.12.0 h1:y2DdzBAURM29NFF94q6RaY4vjIH1rtwDapwQtU84iWk= github.com/emicklei/go-restful/v3 v3.12.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= @@ -42,11 +47,8 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= @@ -79,6 +81,16 @@ github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0 h1:DheMAlT6POBP+gh8RUH19EOTnQIor5QE0uSRPtzCpSw= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0/go.mod h1:wZcGmeVO9nzP67aYSLDqXNWK87EZWhi7JWj1v7ZXf94= +go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg= +go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E= +go.opentelemetry.io/otel/metric v1.36.0 h1:MoWPKVhQvJ+eeXWHFBOPoBOi20jh6Iq2CcCREuTYufE= +go.opentelemetry.io/otel/metric v1.36.0/go.mod h1:zC7Ks+yeyJt4xig9DEw9kuUFe5C3zLbVjV2PzT6qzbs= +go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w= +go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA= go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= @@ -100,8 +112,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= -golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= +golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y= golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/internal/proxy/chat_completions.go b/internal/proxy/chat_completions.go index 725ed0b..bc96db9 100644 --- a/internal/proxy/chat_completions.go +++ b/internal/proxy/chat_completions.go @@ -18,6 +18,9 @@ package proxy import ( "net/http" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" ) var ( @@ -29,8 +32,15 @@ var ( ) func (s *Server) chatCompletionsHandler(w http.ResponseWriter, r *http.Request) { - prefillPodHostPort := r.Header.Get(requestHeaderPrefillHostPort) + tracer := otel.GetTracerProvider().Tracer("llm-d-routing-sidecar") + ctx, span := tracer.Start(r.Context(), "routing_proxy.request") + defer span.End() + + span.SetAttributes( + attribute.String("llm_d.proxy.connector", s.config.Connector), + ) + prefillPodHostPort := r.Header.Get(requestHeaderPrefillHostPort) if prefillPodHostPort == "" { // backward compatible behavior: to remove in next release prefillPodHostPort = r.Header.Get(requestHeaderPrefillURL) @@ -38,6 +48,8 @@ func (s *Server) chatCompletionsHandler(w http.ResponseWriter, r *http.Request) if prefillPodHostPort == "" { s.logger.V(4).Info("skip disaggregated prefill") + // Update the request context for downstream handlers + r = r.WithContext(ctx) s.decoderProxy.ServeHTTP(w, r) return } @@ -54,5 +66,7 @@ func (s *Server) chatCompletionsHandler(w http.ResponseWriter, r *http.Request) } s.logger.V(4).Info("SSRF protection: prefill target allowed", "target", prefillPodHostPort) + + r = r.WithContext(ctx) s.runConnectorProtocol(w, r, prefillPodHostPort) } diff --git a/internal/proxy/connector_nixlv2.go b/internal/proxy/connector_nixlv2.go index 21d1b69..dabc8aa 100644 --- a/internal/proxy/connector_nixlv2.go +++ b/internal/proxy/connector_nixlv2.go @@ -23,9 +23,20 @@ import ( "strings" "github.com/google/uuid" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" ) func (s *Server) runNIXLProtocolV2(w http.ResponseWriter, r *http.Request, prefillPodHostPort string) { + tracer := otel.GetTracerProvider().Tracer("llm-d-routing-sidecar") + ctx, span := tracer.Start(r.Context(), "routing_proxy.nixl_v2_protocol") + defer span.End() + + span.SetAttributes( + attribute.String("llm_d.proxy.connector", "nixlv2"), + attribute.String("llm_d.prefill.target_host", prefillPodHostPort), + ) + s.logger.V(4).Info("running NIXL protocol V2", "url", prefillPodHostPort) // Read request body @@ -57,10 +68,13 @@ func (s *Server) runNIXLProtocolV2(w http.ResponseWriter, r *http.Request, prefi uuidStr := uuid.String() // Prefill Stage + _, prefillSpan := tracer.Start(ctx, "routing_proxy.nixl_v2_prefill") + prefillSpan.SetAttributes( + attribute.String("llm_d.nixl.stage", "prefill"), + ) // 1. Prepare prefill request - ctx := r.Context() - preq := r.Clone(ctx) + preq := r.Clone(r.Context()) preq.Header.Add(requestHeaderRequestID, uuidStr) @@ -131,7 +145,7 @@ func (s *Server) runNIXLProtocolV2(w http.ResponseWriter, r *http.Request, prefi // Decode Stage // 1. Prepare decode request - dreq := r.Clone(ctx) + dreq := r.Clone(r.Context()) dreq.Header.Add(requestHeaderRequestID, uuidStr) @@ -159,7 +173,6 @@ func (s *Server) runNIXLProtocolV2(w http.ResponseWriter, r *http.Request, prefi dreq.ContentLength = int64(len(dbody)) // 2. Forward to local decoder. - s.logger.V(5).Info("sending request to decoder", "body", string(dbody)) s.decoderProxy.ServeHTTP(w, dreq) } diff --git a/internal/proxy/proxy.go b/internal/proxy/proxy.go index 68e6409..5e59d75 100644 --- a/internal/proxy/proxy.go +++ b/internal/proxy/proxy.go @@ -31,6 +31,7 @@ import ( "github.com/go-logr/logr" lru "github.com/hashicorp/golang-lru/v2" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "k8s.io/klog/v2" ) @@ -165,8 +166,11 @@ func (s *Server) Start(ctx context.Context) error { // Configure handlers mux := s.createRoutes() + // Wrap the server with OpenTelemetry HTTP instrumentation + handler := otelhttp.NewHandler(mux, "routing-proxy-server") + server := &http.Server{ - Handler: mux, + Handler: handler, // No ReadTimeout/WriteTimeout for LLM inference - can take hours for large contexts IdleTimeout: 300 * time.Second, // 5 minutes for keep-alive connections ReadHeaderTimeout: 30 * time.Second, // Reasonable for headers only