Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions pkg/agent/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,13 @@ func (a *Client) Serve() {
if status.Code(err) == codes.Canceled {
klog.V(2).InfoS("stream canceled", "serverID", a.serverID, "agentID", a.agentID)
} else {
klog.ErrorS(err, "could not read stream", "serverID", a.serverID, "agentID", a.agentID)
select {
case <-a.stopCh:
klog.V(5).InfoS("could not read stream because agent client is shutting down", "serverID", a.serverID, "agentID", a.agentID, "err", err)
default:
// If stopCh is not closed, this is a legitimate, unexpected error.
klog.ErrorS(err, "could not read stream", "serverID", a.serverID, "agentID", a.agentID)
}
}
return
}
Expand Down Expand Up @@ -407,7 +413,13 @@ func (a *Client) Serve() {
closePkt.GetCloseResponse().ConnectID = connID
}
if err := a.Send(closePkt); err != nil {
klog.ErrorS(err, "close response failure", "")
if err == io.EOF {
klog.V(4).InfoS("received EOF; connection already closed", "connectionID", connID, "dialID", dialReq.Random, "err", err)
} else if _, ok := a.connManager.Get(connID); !ok {
klog.V(5).InfoS("connection already closed", "connectionID", connID, "dialID", dialReq.Random, "err", err)
} else {
klog.ErrorS(err, "close response failure", "connectionID", connID, "dialID", dialReq.Random)
}
}
close(dataCh)
a.connManager.Delete(connID)
Expand Down
8 changes: 4 additions & 4 deletions pkg/server/backend_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,10 @@ func containIDType(idTypes []header.IdentifierType, idType header.IdentifierType
// addBackend adds a backend.
func (s *DefaultBackendStorage) addBackend(identifier string, idType header.IdentifierType, backend *Backend) {
if !containIDType(s.idTypes, idType) {
klog.V(4).InfoS("fail to add backend", "backend", identifier, "error", &ErrWrongIDType{idType, s.idTypes})
klog.V(3).InfoS("fail to add backend", "backend", identifier, "error", &ErrWrongIDType{idType, s.idTypes})
return
}
klog.V(5).InfoS("Register backend for agent", "agentID", identifier)
klog.V(2).InfoS("Register backend for agent", "agentID", identifier)
s.mu.Lock()
defer s.mu.Unlock()
_, ok := s.backends[identifier]
Expand All @@ -327,7 +327,7 @@ func (s *DefaultBackendStorage) removeBackend(identifier string, idType header.I
klog.ErrorS(&ErrWrongIDType{idType, s.idTypes}, "fail to remove backend")
return
}
klog.V(5).InfoS("Remove connection for agent", "agentID", identifier)
klog.V(2).InfoS("Remove connection for agent", "agentID", identifier)
s.mu.Lock()
defer s.mu.Unlock()
backends, ok := s.backends[identifier]
Expand Down Expand Up @@ -400,7 +400,7 @@ func (s *DefaultBackendStorage) GetRandomBackend() (*Backend, error) {
return nil, &ErrNotFound{}
}
agentID := s.agentIDs[s.random.Intn(len(s.agentIDs))]
klog.V(5).InfoS("Pick agent as backend", "agentID", agentID)
klog.V(3).InfoS("Pick agent as backend", "agentID", agentID)
// always return the first connection to an agent, because the agent
// will close later connections if there are multiple.
return s.backends[agentID][0], nil
Expand Down
67 changes: 62 additions & 5 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,53 @@ const (
destHostKey key = iota
)

// mapDialErrorToHTTPStatus maps common TCP/network error strings to appropriate HTTP status codes
func mapDialErrorToHTTPStatus(errStr string) int {
// Convert to lowercase for case-insensitive matching
errLower := strings.ToLower(errStr)

// Check each error pattern and return appropriate status code
switch {
// Timeouts - backend didn't respond in time -> 504 Gateway Timeout
case strings.Contains(errLower, "i/o timeout"),
strings.Contains(errLower, "deadline exceeded"),
strings.Contains(errLower, "context deadline exceeded"),
strings.Contains(errLower, "timeout"):
return 504

// Resource exhaustion errors -> 503 Service Unavailable
case strings.Contains(errLower, "too many open files"),
strings.Contains(errLower, "socket: too many open files"):
return 503

// Connection errors -> 502 Bad Gateway
case strings.Contains(errLower, "connection refused"),
strings.Contains(errLower, "connection reset by peer"),
strings.Contains(errLower, "broken pipe"),
strings.Contains(errLower, "network is unreachable"),
strings.Contains(errLower, "no route to host"),
strings.Contains(errLower, "host is unreachable"),
strings.Contains(errLower, "network is down"):
return 502

// DNS resolution failures -> 502 Bad Gateway
case strings.Contains(errLower, "no such host"),
strings.Contains(errLower, "name resolution"),
strings.Contains(errLower, "lookup") && strings.Contains(errLower, "no such host"):
return 502

// TLS/SSL errors -> 502 Bad Gateway
case strings.Contains(errLower, "tls"),
strings.Contains(errLower, "ssl"),
strings.Contains(errLower, "certificate"):
return 502

// Default to 502 Bad Gateway for unknown proxy errors
default:
return 502
}
}

func (c *ProxyClientConnection) send(pkt *client.Packet) error {
defer func(start time.Time) { metrics.Metrics.ObserveFrontendWriteLatency(time.Since(start)) }(time.Now())
if c.Mode == ModeGRPC {
Expand All @@ -121,11 +168,21 @@ func (c *ProxyClientConnection) send(pkt *client.Packet) error {
_, err := c.HTTP.Write(pkt.GetData().Data)
return err
} else if pkt.Type == client.PacketType_DIAL_RSP {
if pkt.GetDialResponse().Error != "" {
body := bytes.NewBufferString(pkt.GetDialResponse().Error)
dialErr := pkt.GetDialResponse().Error
if dialErr != "" {
// // Map the error to appropriate HTTP status code
statusCode := mapDialErrorToHTTPStatus(dialErr)
statusText := http.StatusText(statusCode)
body := bytes.NewBufferString(dialErr)
t := http.Response{
StatusCode: 503,
StatusCode: statusCode,
Status: fmt.Sprintf("%d %s", statusCode, statusText),
Body: io.NopCloser(body),
Header: http.Header{
"Content-Type": []string{"text/plain; charset=utf-8"},
},
Proto: "HTTP/1.1",
ProtoMinor: 1,
}

t.Write(c.HTTP)
Expand Down Expand Up @@ -718,7 +775,7 @@ func (s *ProxyServer) Connect(stream agent.AgentService_ConnectServer) error {
}
agentID := backend.GetAgentID()

klog.V(5).InfoS("Connect request from agent", "agentID", agentID, "serverID", s.serverID)
klog.V(2).InfoS("Connect request from agent", "agentID", agentID, "serverID", s.serverID)
labels := runpprof.Labels(
"serverCount", strconv.Itoa(s.serverCount),
"agentID", agentID,
Expand Down Expand Up @@ -945,7 +1002,7 @@ func (s *ProxyServer) serveRecvBackend(backend *Backend, agentID string, recvCh
klog.V(5).InfoS("Ignoring unrecognized packet from backend", "packet", pkt, "agentID", agentID)
}
}
klog.V(5).InfoS("Close backend of agent", "agentID", agentID)
klog.V(3).InfoS("Close backend of agent", "agentID", agentID)
}

func (s *ProxyServer) sendBackendClose(backend *Backend, connectID int64, random int64, reason string) {
Expand Down
43 changes: 26 additions & 17 deletions pkg/server/tunnel.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func (t *Tunnel) ServeHTTP(w http.ResponseWriter, r *http.Request) {
defer metrics.Metrics.HTTPConnectionDec()

klog.V(2).InfoS("Received request for host", "method", r.Method, "host", r.Host, "userAgent", r.UserAgent())
if r.TLS != nil {
if r.TLS != nil && len(r.TLS.PeerCertificates) > 0 {
klog.V(2).InfoS("TLS", "commonName", r.TLS.PeerCertificates[0].Subject.CommonName)
}
if r.Method != http.MethodConnect {
Expand All @@ -60,14 +60,6 @@ func (t *Tunnel) ServeHTTP(w http.ResponseWriter, r *http.Request) {
return
}

// Send the HTTP 200 OK status after a successful hijack
_, err = conn.Write([]byte("HTTP/1.1 200 Connection Established\r\n\r\n"))
if err != nil {
klog.ErrorS(err, "failed to send 200 connection established")
conn.Close()
return
}

var closeOnce sync.Once
defer closeOnce.Do(func() { conn.Close() })

Expand Down Expand Up @@ -104,15 +96,23 @@ func (t *Tunnel) ServeHTTP(w http.ResponseWriter, r *http.Request) {
connected: connected,
start: time.Now(),
backend: backend,
dialID: random,
agentID: backend.GetAgentID(),
}
t.Server.PendingDial.Add(random, connection)
if err := backend.Send(dialRequest); err != nil {
klog.ErrorS(err, "failed to tunnel dial request")
klog.ErrorS(err, "failed to tunnel dial request", "host", r.Host, "dialID", connection.dialID, "agentID", connection.agentID)
// Send proper HTTP error response
conn.Write([]byte(fmt.Sprintf("HTTP/1.1 502 Bad Gateway\r\nContent-Type: text/plain; charset=utf-8\r\n\r\nFailed to tunnel dial request: %v\r\n", err)))
conn.Close()
return
}
ctxt := backend.Context()
if ctxt.Err() != nil {
klog.ErrorS(err, "context reports failure")
klog.ErrorS(ctxt.Err(), "context reports failure")
conn.Write([]byte(fmt.Sprintf("HTTP/1.1 502 Bad Gateway\r\nContent-Type: text/plain; charset=utf-8\r\n\r\nBackend context error: %v\r\n", ctxt.Err())))
conn.Close()
return
}

select {
Expand All @@ -123,6 +123,15 @@ func (t *Tunnel) ServeHTTP(w http.ResponseWriter, r *http.Request) {

select {
case <-connection.connected: // Waiting for response before we begin full communication.
// Now that connection is established, send 200 OK to switch to tunnel mode
_, err = conn.Write([]byte("HTTP/1.1 200 Connection Established\r\n\r\n"))
if err != nil {
klog.ErrorS(err, "failed to send 200 connection established", "host", r.Host, "agentID", connection.agentID)
conn.Close()
return
}
klog.V(3).InfoS("Connection established, sent 200 OK", "host", r.Host, "agentID", connection.agentID, "connectionID", connection.connectID)

case <-closed: // Connection was closed before being established
}

Expand All @@ -142,22 +151,22 @@ func (t *Tunnel) ServeHTTP(w http.ResponseWriter, r *http.Request) {
conn.Close()
}()

klog.V(3).InfoS("Starting proxy to host", "host", r.Host)
pkt := make([]byte, 1<<15) // Match GRPC Window size

connID := connection.connectID
agentID := connection.agentID
klog.V(3).InfoS("Starting proxy to host", "host", r.Host, "agentID", agentID, "connectionID", connID)

pkt := make([]byte, 1<<15) // Match GRPC Window size
var acc int

for {
n, err := bufrw.Read(pkt[:])
acc += n
if err == io.EOF {
klog.V(1).InfoS("EOF from host", "host", r.Host)
klog.V(1).InfoS("EOF from host", "host", r.Host, "agentID", agentID, "connectionID", connID)
break
}
if err != nil {
klog.ErrorS(err, "Received failure on connection")
klog.ErrorS(err, "Received failure on connection", "host", r.Host, "agentID", agentID, "connectionID", connID)
break
}

Expand All @@ -172,7 +181,7 @@ func (t *Tunnel) ServeHTTP(w http.ResponseWriter, r *http.Request) {
}
err = backend.Send(packet)
if err != nil {
klog.ErrorS(err, "error sending packet")
klog.ErrorS(err, "error sending packet", "host", r.Host, "agentID", agentID, "connectionID", connID)
break
}
klog.V(5).InfoS("Forwarding data on tunnel to agent",
Expand Down
62 changes: 11 additions & 51 deletions tests/proxy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,7 @@ func TestFailedDNSLookupProxy_HTTPCONN(t *testing.T) {
t.Error(err)
}

urlString := "http://thissssssxxxxx.com:80"
urlString := "http://thisdefinitelydoesnotexist.com:80"
serverURL, _ := url.Parse(urlString)

// Send HTTP-Connect request
Expand All @@ -705,36 +705,12 @@ func TestFailedDNSLookupProxy_HTTPCONN(t *testing.T) {
t.Errorf("reading HTTP response from CONNECT: %v", err)
}

if res.StatusCode != 200 {
t.Errorf("expect 200; got %d", res.StatusCode)
}
if br.Buffered() > 0 {
t.Error("unexpected extra buffer")
}
dialer := func(_, _ string) (net.Conn, error) {
return conn, nil
}

c := &http.Client{
Transport: &http.Transport{
Dial: dialer,
},
}

resp, err := c.Get(urlString)
if err != nil {
t.Error(err)
if res.StatusCode != 502 {
t.Errorf("expect 502; got %d", res.StatusCode)
}

if resp.StatusCode != 503 {
t.Errorf("expect 503; got %d", res.StatusCode)
}

body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if !strings.Contains(err.Error(), "connection reset by peer") {
t.Error(err)
}
body, err := io.ReadAll(res.Body)
res.Body.Close()

if !strings.Contains(string(body), "no such host") {
t.Errorf("Unexpected error: %v", err)
Expand Down Expand Up @@ -779,37 +755,21 @@ func TestFailedDial_HTTPCONN(t *testing.T) {
br := bufio.NewReader(conn)
res, err := http.ReadResponse(br, nil)
if err != nil {
t.Fatalf("reading HTTP response from CONNECT: %v", err)
}
if res.StatusCode != 200 {
t.Fatalf("expect 200; got %d", res.StatusCode)
t.Errorf("reading HTTP response from CONNECT: %v", err)
}

dialer := func(_, _ string) (net.Conn, error) {
return conn, nil
if res.StatusCode != 502 {
t.Errorf("expect 502; got %d", res.StatusCode)
}

c := &http.Client{
Transport: &http.Transport{
Dial: dialer,
},
}

resp, err := c.Get(server.URL)
body, err := io.ReadAll(res.Body)
res.Body.Close()
if err != nil {
t.Fatal(err)
}

body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err == nil {
t.Fatalf("Expected error reading response body; response=%q", body)
} else if !strings.Contains(err.Error(), "connection reset by peer") {
t.Error(err)
}

if !strings.Contains(string(body), "connection refused") {
t.Errorf("Unexpected error: %v", err)
t.Errorf("Expected 'connection refused' in error body, got: %s", string(body))
}

if err := ps.Metrics().ExpectServerDialFailure(metricsserver.DialFailureErrorResponse, 1); err != nil {
Expand Down