diff --git a/README.md b/README.md index 177f3915..d831cdd6 100644 --- a/README.md +++ b/README.md @@ -270,6 +270,10 @@ Usage of mev-boost: use Sepolia -version only print version + -metrics + enables a metrics server (default: false) + -metrics-addr string + listening address for the metrics server (default: "localhost:18551") ``` ### `-relays` vs `-relay` @@ -309,6 +313,10 @@ Example for setting a minimum bid value of 0.06 ETH: -relay $YOUR_RELAY_CHOICE_C ``` +### Enabling metrics + +Optionally, the `-metrics` flag can be provided to expose a prometheus metrics server. The metrics server address/port can be changed with the `-metrics-addr` (e.g., `-metrics-addr localhost:9009`) flag. + --- # API diff --git a/cli/flags.go b/cli/flags.go index ab8760bd..f147c12b 100644 --- a/cli/flags.go +++ b/cli/flags.go @@ -7,6 +7,7 @@ const ( GenesisCategory = "GENESIS" RelayCategory = "RELAYS" GeneralCategory = "GENERAL" + Metrics = "METRICS" ) var flags = []cli.Flag{ @@ -35,6 +36,10 @@ var flags = []cli.Flag{ timeoutGetPayloadFlag, timeoutRegValFlag, maxRetriesFlag, + + // metrics + metricsFlag, + metricsAddrFlag, } var ( @@ -178,4 +183,19 @@ var ( Value: 5, Category: RelayCategory, } + + // metrics + metricsFlag = &cli.BoolFlag{ + Name: "metrics", + Sources: cli.EnvVars("METRICS_ENABLED"), + Usage: "enables a metrics server", + Category: Metrics, + } + metricsAddrFlag = &cli.StringFlag{ + Name: "metrics-addr", + Sources: cli.EnvVars("METRICS_ADDR"), + Value: "localhost:18551", + Usage: "listening address for the metrics server", + Category: Metrics, + } ) diff --git a/cli/main.go b/cli/main.go index 9de3664b..b7f7ef60 100644 --- a/cli/main.go +++ b/cli/main.go @@ -68,6 +68,8 @@ func start(_ context.Context, cmd *cli.Command) error { genesisForkVersion, genesisTime = setupGenesis(cmd) relays, minBid, relayCheck = setupRelays(cmd) listenAddr = cmd.String(addrFlag.Name) + metricsEnabled = cmd.Bool(metricsFlag.Name) + metricsAddr = cmd.String(metricsAddrFlag.Name) ) opts := server.BoostServiceOpts{ @@ -82,6 +84,7 @@ func start(_ context.Context, cmd *cli.Command) error { RequestTimeoutGetPayload: time.Duration(cmd.Int(timeoutGetPayloadFlag.Name)) * time.Millisecond, RequestTimeoutRegVal: time.Duration(cmd.Int(timeoutRegValFlag.Name)) * time.Millisecond, RequestMaxRetries: cmd.Int(maxRetriesFlag.Name), + MetricsAddr: metricsAddr, } service, err := server.NewBoostService(opts) if err != nil { @@ -92,6 +95,15 @@ func start(_ context.Context, cmd *cli.Command) error { log.Error("no relay passed the health-check!") } + if metricsEnabled { + go func() { + log.Infof("metrics server listening on %v", opts.MetricsAddr) + if err := service.StartMetricsServer(); err != nil { + log.WithError(err).Error("metrics server exited with error") + } + }() + } + log.Infof("listening on %v", listenAddr) return service.StartHTTPServer() } diff --git a/go.mod b/go.mod index c7a038a8..cdf8b56f 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/flashbots/mev-boost go 1.24.0 require ( + github.com/VictoriaMetrics/metrics v1.40.1 github.com/ethereum/go-ethereum v1.15.9 github.com/flashbots/go-boost-utils v1.9.0 github.com/flashbots/go-utils v0.10.0 @@ -30,6 +31,8 @@ require ( github.com/mmcloughlin/addchain v0.4.0 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/supranational/blst v0.3.14 // indirect + github.com/valyala/fastrand v1.1.0 // indirect + github.com/valyala/histogram v1.2.0 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect golang.org/x/sync v0.13.0 // indirect rsc.io/tmplfunc v0.0.3 // indirect diff --git a/go.sum b/go.sum index b48e3377..431e31a1 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/VictoriaMetrics/fastcache v1.12.2 h1:N0y9ASrJ0F6h0QaC3o6uJb3NIZ9VKLjCM7NQbSmF7WI= github.com/VictoriaMetrics/fastcache v1.12.2/go.mod h1:AmC+Nzz1+3G2eCPapF6UcsnkThDcMsQicp4xDukwJYI= +github.com/VictoriaMetrics/metrics v1.40.1 h1:FrF5uJRpIVj9fayWcn8xgiI+FYsKGMslzPuOXjdeyR4= +github.com/VictoriaMetrics/metrics v1.40.1/go.mod h1:XE4uudAAIRaJE614Tl5HMrtoEU6+GDZO4QTnNSsZRuA= github.com/attestantio/go-builder-client v0.7.2 h1:bOrtysEIZd9bEM+mAeT6OtAo6LSAft/qylBLwFoFwZ0= github.com/attestantio/go-builder-client v0.7.2/go.mod h1:+NADxbaknI5yxl+0mCkMa/VciVsesxRMGNP/poDfV08= github.com/attestantio/go-eth2-client v0.27.1 h1:g7bm+gG/p+gfzYdEuxuAepVWYb8EO+2KojV5/Lo2BxM= @@ -114,6 +116,10 @@ github.com/trailofbits/go-fuzz-utils v0.0.0-20240830175354-474de707d2aa h1:jXdW8 github.com/trailofbits/go-fuzz-utils v0.0.0-20240830175354-474de707d2aa/go.mod h1:/7KgvY5ghyUsjocUh9dMkLCwKtNxqe0kWl5SIdpLtO8= github.com/urfave/cli/v3 v3.2.0 h1:m8WIXY0U9LCuUl5r+0fqLWDhNYWt6qvlW+GcF4EoXf8= github.com/urfave/cli/v3 v3.2.0/go.mod h1:FJSKtM/9AiiTOJL4fJ6TbMUkxBXn7GO9guZqoZtpYpo= +github.com/valyala/fastrand v1.1.0 h1:f+5HkLW4rsgzdNoleUOB69hyT9IlD2ZQh9GyDMfb5G8= +github.com/valyala/fastrand v1.1.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ= +github.com/valyala/histogram v1.2.0 h1:wyYGAZZt3CpwUiIb9AU/Zbllg1llXyrtApRS815OLoQ= +github.com/valyala/histogram v1.2.0/go.mod h1:Hb4kBwb4UxsaNbbbh+RRz8ZR6pdodR57tzWUS3BUzXY= github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= diff --git a/server/get_header.go b/server/get_header.go index ee1aec8c..c70d768f 100644 --- a/server/get_header.go +++ b/server/get_header.go @@ -19,6 +19,7 @@ import ( "github.com/attestantio/go-eth2-client/spec" "github.com/attestantio/go-eth2-client/spec/phase0" "github.com/flashbots/mev-boost/config" + "github.com/flashbots/mev-boost/server/params" "github.com/flashbots/mev-boost/server/types" "github.com/google/uuid" "github.com/sirupsen/logrus" @@ -94,13 +95,16 @@ func (m *BoostService) getHeader(log *logrus.Entry, slot phase0.Slot, pubkey, pa // Send the request log.Debug("requesting header") + start := time.Now() resp, err := m.httpClientGetHeader.Do(req) + RecordRelayLatency(params.PathGetHeader, relay.String(), float64(time.Since(start).Microseconds())) if err != nil { log.WithError(err).Warn("error calling getHeader on relay") return } defer resp.Body.Close() + RecordRelayStatusCode(strconv.Itoa(resp.StatusCode), params.PathGetHeader, relay.String()) // Check if no header is available if resp.StatusCode == http.StatusNoContent { log.Debug("no-content response") @@ -207,9 +211,15 @@ func (m *BoostService) getHeader(log *logrus.Entry, slot phase0.Slot, pubkey, pa log.Debug("bid received") + RecordRelayLastSlot(relay.String(), uint64(slot)) + + valueEthFloat64, _ := valueEth.Float64() + RecordBidValue(relay.String(), valueEthFloat64) + // Skip if value is lower than the minimum bid if bidInfo.value.CmpBig(m.relayMinBid.BigInt()) == -1 { log.Debug("ignoring bid below min-bid value") + IncrementBidBelowMinBid(relay.String()) return } @@ -248,13 +258,18 @@ func (m *BoostService) getHeader(log *logrus.Entry, slot phase0.Slot, pubkey, pa log.Debug("new best bid") result.response = *bid result.bidInfo = bidInfo + result.t = time.Now() }(relay) } wg.Wait() - // Set the winning relays before returning result.relays = relays[BlockHashHex(result.bidInfo.blockHash.String())] + + if len(result.relays) > 0 { + RecordWinningBidValue(result.bidInfo.value.Float64()) + } + return result, nil } diff --git a/server/get_payload.go b/server/get_payload.go index eba025d0..b298a65b 100644 --- a/server/get_payload.go +++ b/server/get_payload.go @@ -10,6 +10,7 @@ import ( "mime" "net/http" "slices" + "strconv" "sync/atomic" "time" @@ -212,20 +213,23 @@ func (m *BoostService) innerGetPayload(log *logrus.Entry, signedBlindedBeaconBlo req.Header.Set(HeaderDateMilliseconds, fmt.Sprintf("%d", time.Now().UTC().UnixMilli())) req.Header.Set(HeaderUserAgent, userAgent) - // Send the request + statusCode := http.StatusOK + endpoint := params.PathGetPayload + if version == GetPayloadV2 { + statusCode = http.StatusAccepted + endpoint = params.PathGetPayloadV2 + } + // Send the request and record latency log.Debug("submitting signed blinded block") + start := time.Now() resp, err := m.httpClientGetPayload.Do(req) + RecordRelayLatency(endpoint, relay.String(), float64(time.Since(start).Microseconds())) if err != nil { log.WithError(err).Warnf("error calling getPayload%s on relay", version) return nil, err } - var statusCode int - if version == GetPayloadV1 { - statusCode = http.StatusOK - } else { - statusCode = http.StatusAccepted - } + RecordRelayStatusCode(strconv.Itoa(statusCode), endpoint, relay.String()) // Check that the response was successful if resp.StatusCode != statusCode { err = fmt.Errorf("%w: %d", errHTTPErrorResponse, resp.StatusCode) diff --git a/server/metrics.go b/server/metrics.go new file mode 100644 index 00000000..fb6df1cf --- /dev/null +++ b/server/metrics.go @@ -0,0 +1,58 @@ +package server + +import ( + "fmt" + + "github.com/VictoriaMetrics/metrics" +) + +var winningBidValue = metrics.NewHistogram("mev_boost_winning_bid_value") + +const ( + beaconNodeStatusLabel = `mev_boost_beacon_node_status_code_total{http_status_code="%s",endpoint="%s"}` + bidValuesLabel = `mev_boost_bid_values{relay="%s"}` + bidsBelowMinBidLabel = `mev_boost_bids_below_min_bid_total{relay="%s"}` + relayLatencyLabel = `mev_boost_relay_latency{endpoint="%s",relay="%s"}` + relayStatusCodeLabel = `mev_boost_relay_status_code_total{http_status_code="%s",endpoint="%s",relay="%s"}` + relayLastSlotLabel = `mev_boost_relay_last_slot{relay="%s"}` + msIntoSlotLabel = `mev_boost_millisec_into_slot{endpoint="%s"}` +) + +func IncrementBeaconNodeStatus(status, endpoint string) { + l := fmt.Sprintf(beaconNodeStatusLabel, status, endpoint) + metrics.GetOrCreateCounter(l).Inc() +} + +func RecordBidValue(relay string, value float64) { + l := fmt.Sprintf(bidValuesLabel, relay) + metrics.GetOrCreateHistogram(l).Update(value) +} + +func IncrementBidBelowMinBid(relay string) { + l := fmt.Sprintf(bidsBelowMinBidLabel, relay) + metrics.GetOrCreateCounter(l).Inc() +} + +func RecordWinningBidValue(value float64) { + winningBidValue.Update(value) +} + +func RecordRelayLatency(endpoint, relay string, latency float64) { + l := fmt.Sprintf(relayLatencyLabel, endpoint, relay) + metrics.GetOrCreateHistogram(l).Update(latency) +} + +func RecordRelayStatusCode(httpStatus, endpoint, relay string) { + l := fmt.Sprintf(relayStatusCodeLabel, httpStatus, endpoint, relay) + metrics.GetOrCreateCounter(l).Inc() +} + +func RecordRelayLastSlot(relay string, slot uint64) { + l := fmt.Sprintf(relayLastSlotLabel, relay) + metrics.GetOrCreateGauge(l, nil).Set(float64(slot)) +} + +func RecordMsIntoSlot(endpoint string, ms float64) { + l := fmt.Sprintf(msIntoSlotLabel, endpoint) + metrics.GetOrCreateHistogram(l).Update(ms) +} diff --git a/server/register_validator.go b/server/register_validator.go index 08bae69c..cb31fd10 100644 --- a/server/register_validator.go +++ b/server/register_validator.go @@ -5,6 +5,8 @@ import ( "context" "fmt" "net/http" + "strconv" + "time" "github.com/flashbots/mev-boost/server/params" "github.com/flashbots/mev-boost/server/types" @@ -45,7 +47,9 @@ func (m *BoostService) registerValidator(log *logrus.Entry, regBytes []byte, hea }).Debug("sending the registerValidator request") // Send the request + start := time.Now() resp, err := m.httpClientRegVal.Do(req) + RecordRelayLatency(params.PathRegisterValidator, relay.String(), float64(time.Since(start).Microseconds())) if err != nil { log.WithError(err).Warn("error calling registerValidator on relay") respErrCh <- err @@ -53,6 +57,7 @@ func (m *BoostService) registerValidator(log *logrus.Entry, regBytes []byte, hea } resp.Body.Close() + RecordRelayStatusCode(strconv.Itoa(resp.StatusCode), params.PathRegisterValidator, relay.String()) // Check if response is successful if resp.StatusCode == http.StatusOK { log.Debug("relay accepted registrations") diff --git a/server/service.go b/server/service.go index 60e2a6db..172535bd 100644 --- a/server/service.go +++ b/server/service.go @@ -13,6 +13,7 @@ import ( "sync/atomic" "time" + "github.com/VictoriaMetrics/metrics" "github.com/attestantio/go-eth2-client/spec/phase0" "github.com/flashbots/go-boost-utils/ssz" "github.com/flashbots/go-utils/httplogger" @@ -63,6 +64,8 @@ type BoostServiceOpts struct { RequestTimeoutGetPayload time.Duration RequestTimeoutRegVal time.Duration RequestMaxRetries int + + MetricsAddr string } // BoostService - the mev-boost service @@ -86,6 +89,8 @@ type BoostService struct { slotUID *slotUID slotUIDLock sync.Mutex + + metricsAddr string } // NewBoostService created a new BoostService @@ -108,6 +113,7 @@ func NewBoostService(opts BoostServiceOpts) (*BoostService, error) { genesisTime: opts.GenesisTime, bids: make(map[string]bidResp), slotUID: &slotUID{}, + metricsAddr: opts.MetricsAddr, builderSigningDomain: builderSigningDomain, httpClientGetHeader: http.Client{ @@ -187,6 +193,15 @@ func (m *BoostService) StartHTTPServer() error { return err } +// StartMetricsServer starts the HTTP server for exporting metrics +func (m *BoostService) StartMetricsServer() error { + serveMux := http.NewServeMux() + serveMux.HandleFunc("/metrics", func(w http.ResponseWriter, _ *http.Request) { + metrics.WritePrometheus(w, true) + }) + return http.ListenAndServe(m.metricsAddr, serveMux) +} + func (m *BoostService) startBidCacheCleanupTask() { for { time.Sleep(1 * time.Minute) @@ -209,8 +224,10 @@ func (m *BoostService) handleRoot(w http.ResponseWriter, _ *http.Request) { func (m *BoostService) handleStatus(w http.ResponseWriter, _ *http.Request) { w.Header().Set(HeaderKeyVersion, config.Version) if !m.relayCheck || m.CheckRelays() > 0 { + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusOK), params.PathStatus) m.respondOK(w, nilResponse) } else { + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusServiceUnavailable), params.PathStatus) m.respondError(w, http.StatusServiceUnavailable, "all relays are unavailable") } } @@ -234,6 +251,7 @@ func (m *BoostService) handleRegisterValidator(w http.ResponseWriter, req *http. // Read the validator registrations regBytes, err := io.ReadAll(req.Body) if err != nil { + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusInternalServerError), params.PathRegisterValidator) m.respondError(w, http.StatusInternalServerError, err.Error()) return } @@ -243,9 +261,11 @@ func (m *BoostService) handleRegisterValidator(w http.ResponseWriter, req *http. err = m.registerValidator(log, regBytes, header) if err == nil { // One of the relays responded OK + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusOK), params.PathRegisterValidator) m.respondOK(w, nilResponse) } else { // None of the relays responded OK + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusBadGateway), params.PathRegisterValidator) m.respondError(w, http.StatusBadGateway, err.Error()) } } @@ -265,6 +285,7 @@ func (m *BoostService) handleGetHeader(w http.ResponseWriter, req *http.Request) // Parse the slot slotValue, err := strconv.ParseUint(vars["slot"], 10, 64) if err != nil { + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusBadRequest), params.PathGetHeader) m.respondError(w, http.StatusBadRequest, errInvalidSlot.Error()) return } @@ -284,6 +305,7 @@ func (m *BoostService) handleGetHeader(w http.ResponseWriter, req *http.Request) // Query the relays for the header result, err := m.getHeader(log, slot, pubkey, parentHashHex, ua, rawProposerAcceptContentTypes) if err != nil { + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusBadRequest), params.PathGetHeader) m.respondError(w, http.StatusBadRequest, err.Error()) return } @@ -291,6 +313,7 @@ func (m *BoostService) handleGetHeader(w http.ResponseWriter, req *http.Request) // Bail if none of the relays returned a bid if result.response.IsEmpty() { log.Info("no bid received") + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusNoContent), params.PathGetHeader) w.WriteHeader(http.StatusNoContent) return } @@ -326,12 +349,16 @@ func (m *BoostService) handleGetHeader(w http.ResponseWriter, req *http.Request) // Respond appropriately switch proposerPreferredContentType { case MediaTypeJSON: + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusOK), params.PathGetHeader) log.Debug("responding with JSON") m.respondGetHeaderJSON(w, &result) + case MediaTypeOctetStream: + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusOK), params.PathGetHeader) log.Debug("responding with SSZ") m.respondGetHeaderSSZ(w, &result) default: + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusNotAcceptable), params.PathGetHeader) message := fmt.Sprintf("unsupported media type: %s", proposerPreferredContentType) log.Error(message) m.respondError(w, http.StatusNotAcceptable, message) @@ -362,6 +389,7 @@ func (m *BoostService) handleGetPayload(w http.ResponseWriter, req *http.Request // Read the body first, so we can log it later on error signedBlindedBlockBytes, err := io.ReadAll(req.Body) if err != nil { + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusBadRequest), params.PathGetPayload) log.WithError(err).Error("could not read body of request from the beacon node") m.respondError(w, http.StatusBadRequest, err.Error()) return @@ -372,6 +400,7 @@ func (m *BoostService) handleGetPayload(w http.ResponseWriter, req *http.Request // If no payload has been received from relay, log loudly about withholding! if result == nil || getPayloadResponseIsEmpty(result) { + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusBadGateway), params.PathGetPayload) originRelays := types.RelayEntriesToStrings(originalBid.relays) log.WithField("relaysWithBid", strings.Join(originRelays, ", ")).Error("no payload received from relay!") m.respondError(w, http.StatusBadGateway, errNoSuccessfulRelayResponse.Error()) @@ -394,12 +423,15 @@ func (m *BoostService) handleGetPayload(w http.ResponseWriter, req *http.Request // Respond appropriately switch proposerPreferredContentType { case MediaTypeJSON: + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusOK), params.PathGetPayload) log.Debug("responding with JSON") m.respondGetPayloadJSON(w, result) case MediaTypeOctetStream: + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusOK), params.PathGetPayload) log.Debug("responding with SSZ") m.respondGetPayloadSSZ(w, result) default: + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusNotAcceptable), params.PathGetPayload) message := fmt.Sprintf("unsupported media type: %s", proposerPreferredContentType) log.Error(message) m.respondError(w, http.StatusNotAcceptable, message) @@ -427,6 +459,7 @@ func (m *BoostService) handleGetPayloadV2(w http.ResponseWriter, req *http.Reque // Read the body first, so we can log it later on error signedBlindedBlockBytes, err := io.ReadAll(req.Body) if err != nil { + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusBadRequest), params.PathGetPayloadV2) log.WithError(err).Error("could not read body of request from the beacon node") m.respondError(w, http.StatusBadRequest, err.Error()) return @@ -437,12 +470,14 @@ func (m *BoostService) handleGetPayloadV2(w http.ResponseWriter, req *http.Reque // If no relay accepted the submission, log about the failure if !success { + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusBadGateway), params.PathGetPayloadV2) originRelays := types.RelayEntriesToStrings(originalBid.relays) log.WithField("relaysWithBid", strings.Join(originRelays, ", ")).Error("no relay accepted the signed blinded beacon block submission!") m.respondError(w, http.StatusBadGateway, errNoSuccessfulRelayResponse.Error()) return } + IncrementBeaconNodeStatus(strconv.Itoa(http.StatusAccepted), params.PathGetPayloadV2) log.Info("successfully submitted signed blinded beacon block to relay") w.WriteHeader(http.StatusAccepted) } @@ -461,11 +496,14 @@ func (m *BoostService) CheckRelays() int { log := m.log.WithField("url", url) log.Debug("checking relay status") + start := time.Now() code, err := SendHTTPRequest(context.Background(), m.httpClientGetHeader, http.MethodGet, url, "", nil, nil, nil) + RecordRelayLatency(params.PathStatus, relay.String(), float64(time.Since(start).Microseconds())) if err != nil { log.WithError(err).Error("relay status error - request failed") return } + RecordRelayStatusCode(strconv.Itoa(code), params.PathStatus, relay.String()) if code == http.StatusOK { log.Debug("relay status OK") } else {