From 098985ded01654b290988646d932db3d3ab14500 Mon Sep 17 00:00:00 2001 From: Robert Wu Date: Sun, 14 Sep 2025 23:06:01 -0400 Subject: [PATCH 1/5] improve error handling in prometheus exporter --- CHANGELOG.md | 1 + exporters/prometheus/exporter.go | 40 ++- exporters/prometheus/exporter_test.go | 356 +++++++++++++++++++++----- 3 files changed, 315 insertions(+), 82 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95737f41795..01c95cc0325 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - `WithInstrumentationAttributes` in `go.opentelemetry.io/otel/trace` synchronously de-duplicates the passed attributes instead of delegating it to the returned `TracerOption`. (#7266) - `WithInstrumentationAttributes` in `go.opentelemetry.io/otel/meter` synchronously de-duplicates the passed attributes instead of delegating it to the returned `MeterOption`. (#7266) - `WithInstrumentationAttributes` in `go.opentelemetry.io/otel/log` synchronously de-duplicates the passed attributes instead of delegating it to the returned `LoggerOption`. (#7266) +- Improve error handling of dropping data during translation by using `NewInvalidMetric` in `go.opentelemetry.io/otel/exporters/prometheus`. (#7363) diff --git a/exporters/prometheus/exporter.go b/exporters/prometheus/exporter.go index 0f29c0abbde..1b5ba9e7e4f 100644 --- a/exporters/prometheus/exporter.go +++ b/exporters/prometheus/exporter.go @@ -215,7 +215,7 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) { attrKeys, attrVals, err := getAttrs(scopeMetrics.Scope.Attributes, c.labelNamer) if err != nil { - otel.Handle(err) + reportError(ch, nil, err) continue } for i := range attrKeys { @@ -231,18 +231,20 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) { for _, m := range scopeMetrics.Metrics { typ := c.metricType(m) if typ == nil { + reportError(ch, nil, errors.New("invalid metric type")) continue } name, err := c.getName(m) if err != nil { // TODO(#7066): Handle this error better. It's not clear this can be // reached, bad metric names should / will be caught at creation time. - otel.Handle(err) + reportError(ch, nil, err) continue } drop, help := c.validateMetrics(name, m.Description, typ) if drop { + reportError(ch, nil, errors.New("invalid metric")) continue } @@ -336,7 +338,7 @@ func addExponentialHistogramMetric[N int64 | float64]( for _, dp := range histogram.DataPoints { keys, values, err := getAttrs(dp.Attributes, labelNamer) if err != nil { - otel.Handle(err) + reportError(ch, nil, err) continue } keys = append(keys, kv.keys...) @@ -348,9 +350,14 @@ func addExponentialHistogramMetric[N int64 | float64]( scale := dp.Scale if scale < -4 { // Reject scales below -4 as they cannot be represented in Prometheus - otel.Handle(fmt.Errorf( - "exponential histogram scale %d is below minimum supported scale -4, skipping data point", - scale)) + reportError( + ch, + desc, + fmt.Errorf( + "exponential histogram scale %d is below minimum supported scale -4, skipping data point", + scale, + ), + ) continue } @@ -395,7 +402,7 @@ func addExponentialHistogramMetric[N int64 | float64]( dp.StartTime, values...) if err != nil { - otel.Handle(err) + reportError(ch, desc, err) continue } m = addExemplars(m, dp.Exemplars, labelNamer) @@ -414,7 +421,7 @@ func addHistogramMetric[N int64 | float64]( for _, dp := range histogram.DataPoints { keys, values, err := getAttrs(dp.Attributes, labelNamer) if err != nil { - otel.Handle(err) + reportError(ch, nil, err) continue } keys = append(keys, kv.keys...) @@ -430,7 +437,7 @@ func addHistogramMetric[N int64 | float64]( } m, err := prometheus.NewConstHistogram(desc, dp.Count, float64(dp.Sum), buckets, values...) if err != nil { - otel.Handle(err) + reportError(ch, desc, err) continue } m = addExemplars(m, dp.Exemplars, labelNamer) @@ -454,7 +461,7 @@ func addSumMetric[N int64 | float64]( for _, dp := range sum.DataPoints { keys, values, err := getAttrs(dp.Attributes, labelNamer) if err != nil { - otel.Handle(err) + reportError(ch, nil, err) continue } keys = append(keys, kv.keys...) @@ -463,7 +470,7 @@ func addSumMetric[N int64 | float64]( desc := prometheus.NewDesc(name, m.Description, keys, nil) m, err := prometheus.NewConstMetric(desc, valueType, float64(dp.Value), values...) if err != nil { - otel.Handle(err) + reportError(ch, desc, err) continue } // GaugeValues don't support Exemplars at this time @@ -486,7 +493,7 @@ func addGaugeMetric[N int64 | float64]( for _, dp := range gauge.DataPoints { keys, values, err := getAttrs(dp.Attributes, labelNamer) if err != nil { - otel.Handle(err) + reportError(ch, nil, err) continue } keys = append(keys, kv.keys...) @@ -495,7 +502,7 @@ func addGaugeMetric[N int64 | float64]( desc := prometheus.NewDesc(name, m.Description, keys, nil) m, err := prometheus.NewConstMetric(desc, prometheus.GaugeValue, float64(dp.Value), values...) if err != nil { - otel.Handle(err) + reportError(ch, desc, err) continue } ch <- m @@ -713,3 +720,10 @@ func attributesToLabels(attrs []attribute.KeyValue, labelNamer otlptranslator.La } return labels, nil } + +func reportError(ch chan<- prometheus.Metric, desc *prometheus.Desc, err error) { + if desc == nil { + desc = prometheus.NewInvalidDesc(err) + } + ch <- prometheus.NewInvalidMetric(desc, err) +} diff --git a/exporters/prometheus/exporter_test.go b/exporters/prometheus/exporter_test.go index 08f5c5a46d9..4c9c924c653 100644 --- a/exporters/prometheus/exporter_test.go +++ b/exporters/prometheus/exporter_test.go @@ -30,6 +30,11 @@ import ( "go.opentelemetry.io/otel/trace" ) +// producerFunc adapts a function to implement metric.Producer. +type producerFunc func(context.Context) ([]metricdata.ScopeMetrics, error) + +func (f producerFunc) Produce(ctx context.Context) ([]metricdata.ScopeMetrics, error) { return f(ctx) } + func TestPrometheusExporter(t *testing.T) { testCases := []struct { name string @@ -760,6 +765,7 @@ func TestDuplicateMetrics(t *testing.T) { recordMetrics func(ctx context.Context, meterA, meterB otelmetric.Meter) options []Option possibleExpectedFiles []string + expectGatherError bool }{ { name: "no_conflict_two_counters", @@ -946,6 +952,7 @@ func TestDuplicateMetrics(t *testing.T) { "testdata/conflict_type_counter_and_updowncounter_1.txt", "testdata/conflict_type_counter_and_updowncounter_2.txt", }, + expectGatherError: true, }, { name: "conflict_type_histogram_and_updowncounter", @@ -966,6 +973,7 @@ func TestDuplicateMetrics(t *testing.T) { "testdata/conflict_type_histogram_and_updowncounter_1.txt", "testdata/conflict_type_histogram_and_updowncounter_2.txt", }, + expectGatherError: true, }, } @@ -1006,19 +1014,26 @@ func TestDuplicateMetrics(t *testing.T) { tc.recordMetrics(ctx, meterA, meterB) - match := false - for _, filename := range tc.possibleExpectedFiles { - file, ferr := os.Open(filename) - require.NoError(t, ferr) - t.Cleanup(func() { require.NoError(t, file.Close()) }) - - err = testutil.GatherAndCompare(registry, file) - if err == nil { - match = true - break + if tc.expectGatherError { + // With improved error handling, conflicting instrument types emit an invalid metric. + // Gathering should surface an error instead of silently dropping. + _, err := registry.Gather() + require.Error(t, err) + } else { + match := false + for _, filename := range tc.possibleExpectedFiles { + file, ferr := os.Open(filename) + require.NoError(t, ferr) + t.Cleanup(func() { require.NoError(t, file.Close()) }) + + err = testutil.GatherAndCompare(registry, file) + if err == nil { + match = true + break + } } + require.Truef(t, match, "expected export not produced: %v", err) } - require.Truef(t, match, "expected export not produced: %v", err) }) } } @@ -1350,14 +1365,19 @@ func TestExponentialHistogramScaleValidation(t *testing.T) { keyVals{}, otlptranslator.LabelNamer{}, ) - assert.Error(t, capturedError) - assert.Contains(t, capturedError.Error(), "scale -5 is below minimum") + // Expect an invalid metric to be sent that carries the scale error. + var pm prometheus.Metric select { - case <-ch: - t.Error("Expected no metrics to be produced for invalid scale") + case pm = <-ch: default: - // No metrics were produced for the invalid scale + t.Fatalf("expected an invalid metric to be emitted for invalid scale, but channel was empty") } + var dtoMetric dto.Metric + werr := pm.Write(&dtoMetric) + require.Error(t, werr) + assert.Contains(t, werr.Error(), "below minimum supported scale -4") + // The exporter reports via invalid metric, not the global otel error handler. + assert.NoError(t, capturedError) }) } @@ -1748,16 +1768,113 @@ func TestDownscaleExponentialBucketEdgeCases(t *testing.T) { // TestEscapingErrorHandling increases test coverage by exercising some error // conditions. func TestEscapingErrorHandling(t *testing.T) { + // Helper to create a producer that emits a Summary (unsupported) metric. + makeSummaryProducer := func() metric.Producer { + return producerFunc(func(_ context.Context) ([]metricdata.ScopeMetrics, error) { + return []metricdata.ScopeMetrics{ + { + Metrics: []metricdata.Metrics{ + { + Name: "summary_metric", + Description: "unsupported summary", + Data: metricdata.Summary{}, + }, + }, + }, + }, nil + }) + } + // Helper to create a producer that emits a metric with an invalid name, to + // force getName() to fail and exercise reportError at that branch. + makeBadNameProducer := func() metric.Producer { + return producerFunc(func(_ context.Context) ([]metricdata.ScopeMetrics, error) { + return []metricdata.ScopeMetrics{ + { + Metrics: []metricdata.Metrics{ + { + Name: "$%^&", // intentionally invalid; translation should fail normalization + Description: "bad name for translation", + // Any supported type is fine; getName runs before add* functions. + Data: metricdata.Gauge[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + {Value: 1}, + }, + }, + }, + }, + }, + }, nil + }) + } + // Helper to create a producer that emits an ExponentialHistogram with a bad + // label, to exercise addExponentialHistogramMetric getAttrs error path. + makeBadEHProducer := func() metric.Producer { + return producerFunc(func(_ context.Context) ([]metricdata.ScopeMetrics, error) { + return []metricdata.ScopeMetrics{ + { + Metrics: []metricdata.Metrics{ + { + Name: "exp_hist_metric", + Description: "bad label", + Data: metricdata.ExponentialHistogram[float64]{ + DataPoints: []metricdata.ExponentialHistogramDataPoint[float64]{ + { + Attributes: attribute.NewSet(attribute.Key("$%^&").String("B")), + Scale: 0, + Count: 1, + ZeroThreshold: 0, + }, + }, + }, + }, + }, + }, + }, nil + }) + } + // Helper to create a producer that emits an ExponentialHistogram with + // inconsistent bucket counts vs total Count to trigger constructor error in addExponentialHistogramMetric. + makeBadEHCountProducer := func() metric.Producer { + return producerFunc(func(_ context.Context) ([]metricdata.ScopeMetrics, error) { + return []metricdata.ScopeMetrics{ + { + Metrics: []metricdata.Metrics{ + { + Name: "exp_hist_metric_bad", + Data: metricdata.ExponentialHistogram[float64]{ + DataPoints: []metricdata.ExponentialHistogramDataPoint[float64]{ + { + Scale: 0, + Count: 0, + ZeroThreshold: 0, + PositiveBucket: metricdata.ExponentialBucket{ + Offset: 0, + Counts: []uint64{1}, + }, + }, + }, + }, + }, + }, + }, + }, nil + }) + } + testCases := []struct { - name string - namespace string - counterName string - customScopeAttrs []attribute.KeyValue - customResourceAttrs []attribute.KeyValue - labelName string - expectNewErr string - expectMetricErr string - checkMetricFamilies func(t testing.TB, dtos []*dto.MetricFamily) + name string + namespace string + counterName string + customScopeAttrs []attribute.KeyValue + customResourceAttrs []attribute.KeyValue + labelName string + producer metric.Producer + skipInstrument bool + record func(ctx context.Context, meter otelmetric.Meter) error + expectNewErr string + expectMetricErr string + expectGatherErrContains string + checkMetricFamilies func(t testing.TB, dtos []*dto.MetricFamily) }{ { name: "simple happy path", @@ -1778,6 +1895,14 @@ func TestEscapingErrorHandling(t *testing.T) { counterName: "foo", expectNewErr: `normalization for label name "$%^&" resulted in invalid name "____"`, }, + { + name: "bad translated metric name via producer", + // Use a producer to emit a metric with an invalid name to trigger getName error. + producer: makeBadNameProducer(), + skipInstrument: true, + // Error message comes from normalization in the translator; match on a stable substring. + expectGatherErrContains: "normalization", + }, { name: "good namespace, names should be escaped", namespace: "my-strange-namespace", @@ -1809,10 +1934,9 @@ func TestEscapingErrorHandling(t *testing.T) { customScopeAttrs: []attribute.KeyValue{ attribute.Key("$%^&").String("B"), }, - checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { - require.Len(t, mfs, 1) - require.Equal(t, "target_info", mfs[0].GetName()) - }, + // With improved error handling, invalid scope label names result in an invalid metric + // and Gather returns an error containing the normalization failure. + expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "____"`, }, { name: "bad translated metric name", @@ -1821,15 +1945,92 @@ func TestEscapingErrorHandling(t *testing.T) { }, { // label names are not translated and therefore not checked until - // collection time, and there is no place to catch and return this error. - // Instead we drop the metric. - name: "bad translated label name", - counterName: "foo", - labelName: "$%^&", - checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { - require.Len(t, mfs, 1) - require.Equal(t, "target_info", mfs[0].GetName()) + // collection time; with improved error handling, we emit an invalid metric and + // surface the error during Gather. + name: "bad translated label name", + counterName: "foo", + labelName: "$%^&", + expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "____"`, + }, + { + name: "unsupported data type via producer", + // Use a producer to emit a Summary data point; no SDK instruments. + producer: makeSummaryProducer(), + skipInstrument: true, + expectGatherErrContains: "invalid metric type", + }, + { + name: "bad exponential histogram label name via producer", + producer: makeBadEHProducer(), + skipInstrument: true, + expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "____"`, + }, + { + name: "exponential histogram constructor error via producer (count mismatch)", + producer: makeBadEHCountProducer(), + skipInstrument: true, + expectGatherErrContains: "count", + }, + { + name: "sum constructor error via duplicate label name", + record: func(ctx context.Context, meter otelmetric.Meter) error { + c, err := meter.Int64Counter("sum_metric_dup") + if err != nil { + return err + } + // Duplicate variable label name with scope label to make Desc invalid. + c.Add(ctx, 1, otelmetric.WithAttributes(attribute.String(scopeNameLabel, "x"))) + return nil }, + expectGatherErrContains: "duplicate label", + }, + { + name: "gauge constructor error via duplicate label name", + record: func(ctx context.Context, meter otelmetric.Meter) error { + g, err := meter.Float64Gauge("gauge_metric_dup") + if err != nil { + return err + } + g.Record(ctx, 1.0, otelmetric.WithAttributes(attribute.String(scopeNameLabel, "x"))) + return nil + }, + expectGatherErrContains: "duplicate label", + }, + { + name: "histogram constructor error via duplicate label name", + record: func(ctx context.Context, meter otelmetric.Meter) error { + h, err := meter.Float64Histogram("hist_metric_dup") + if err != nil { + return err + } + h.Record(ctx, 1.23, otelmetric.WithAttributes(attribute.String(scopeNameLabel, "x"))) + return nil + }, + expectGatherErrContains: "duplicate label", + }, + { + name: "bad gauge label name", + record: func(ctx context.Context, meter otelmetric.Meter) error { + g, err := meter.Float64Gauge("gauge_metric") + if err != nil { + return err + } + g.Record(ctx, 1, otelmetric.WithAttributes(attribute.Key("$%^&").String("B"))) + return nil + }, + expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "____"`, + }, + { + name: "bad histogram label name", + record: func(ctx context.Context, meter otelmetric.Meter) error { + h, err := meter.Float64Histogram("hist_metric") + if err != nil { + return err + } + h.Record(ctx, 1.23, otelmetric.WithAttributes(attribute.Key("$%^&").String("B"))) + return nil + }, + expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "____"`, }, } @@ -1845,49 +2046,66 @@ func TestEscapingErrorHandling(t *testing.T) { }) ctx = trace.ContextWithSpanContext(ctx, sc) - exporter, err := New( + opts := []Option{ WithRegisterer(registry), WithTranslationStrategy(otlptranslator.UnderscoreEscapingWithSuffixes), WithNamespace(tc.namespace), WithResourceAsConstantLabels(attribute.NewDenyKeysFilter()), - ) + } + if tc.producer != nil { + opts = append(opts, WithProducer(tc.producer)) + } + exporter, err := New(opts...) if tc.expectNewErr != "" { require.ErrorContains(t, err, tc.expectNewErr) return } require.NoError(t, err) - - res, err := resource.New(ctx, - resource.WithAttributes(semconv.ServiceName("prometheus_test")), - resource.WithAttributes(semconv.TelemetrySDKVersion("latest")), - resource.WithAttributes(tc.customResourceAttrs...), - ) - require.NoError(t, err) - provider := metric.NewMeterProvider( - metric.WithReader(exporter), - metric.WithResource(res), - ) - - fooCounter, err := provider.Meter( - "meterfoo", - otelmetric.WithInstrumentationVersion("v0.1.0"), - otelmetric.WithInstrumentationAttributes(tc.customScopeAttrs...), - ). - Int64Counter( - tc.counterName, - otelmetric.WithUnit("s"), - otelmetric.WithDescription(fmt.Sprintf(`meter %q counter`, tc.counterName))) - if tc.expectMetricErr != "" { - require.ErrorContains(t, err, tc.expectMetricErr) - return - } - require.NoError(t, err) - var opts []otelmetric.AddOption - if tc.labelName != "" { - opts = append(opts, otelmetric.WithAttributes(attribute.String(tc.labelName, "foo"))) + if !tc.skipInstrument { + res, err := resource.New(ctx, + resource.WithAttributes(semconv.ServiceName("prometheus_test")), + resource.WithAttributes(semconv.TelemetrySDKVersion("latest")), + resource.WithAttributes(tc.customResourceAttrs...), + ) + require.NoError(t, err) + provider := metric.NewMeterProvider( + metric.WithReader(exporter), + metric.WithResource(res), + ) + meter := provider.Meter( + "meterfoo", + otelmetric.WithInstrumentationVersion("v0.1.0"), + otelmetric.WithInstrumentationAttributes(tc.customScopeAttrs...), + ) + if tc.record != nil { + err := tc.record(ctx, meter) + require.NoError(t, err) + } else { + fooCounter, err := meter.Int64Counter( + tc.counterName, + otelmetric.WithUnit("s"), + otelmetric.WithDescription(fmt.Sprintf(`meter %q counter`, tc.counterName))) + if tc.expectMetricErr != "" { + require.ErrorContains(t, err, tc.expectMetricErr) + return + } + require.NoError(t, err) + var addOpts []otelmetric.AddOption + if tc.labelName != "" { + addOpts = append(addOpts, otelmetric.WithAttributes(attribute.String(tc.labelName, "foo"))) + } + fooCounter.Add(ctx, 100, addOpts...) + } + } else { + // When skipping instruments, still register the reader so Collect will run. + _ = metric.NewMeterProvider(metric.WithReader(exporter)) } - fooCounter.Add(ctx, 100, opts...) got, err := registry.Gather() + if tc.expectGatherErrContains != "" { + require.Error(t, err) + require.Contains(t, err.Error(), tc.expectGatherErrContains) + return + } require.NoError(t, err) if tc.checkMetricFamilies != nil { tc.checkMetricFamilies(t, got) From fb9840066362440a22a23329802ff15f0d0b3f3a Mon Sep 17 00:00:00 2001 From: Robert Wu Date: Thu, 25 Sep 2025 16:51:57 -0400 Subject: [PATCH 2/5] create error variables --- exporters/prometheus/errors.go | 14 +++ exporters/prometheus/exporter.go | 11 +- exporters/prometheus/exporter_test.go | 144 +++++++++++++++++++++++++- 3 files changed, 160 insertions(+), 9 deletions(-) create mode 100644 exporters/prometheus/errors.go diff --git a/exporters/prometheus/errors.go b/exporters/prometheus/errors.go new file mode 100644 index 00000000000..3bd3354d07d --- /dev/null +++ b/exporters/prometheus/errors.go @@ -0,0 +1,14 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package prometheus // import "go.opentelemetry.io/otel/exporters/prometheus" + +import "errors" + +// Internal sentinel errors for consistent error checks in tests. +// These are unexported to avoid growing public API surface. +var ( + errInvalidMetricType = errors.New("invalid metric type") + errInvalidMetric = errors.New("invalid metric") + errEHScaleBelowMin = errors.New("exponential histogram scale below minimum supported") +) diff --git a/exporters/prometheus/exporter.go b/exporters/prometheus/exporter.go index 1b5ba9e7e4f..7fd80818359 100644 --- a/exporters/prometheus/exporter.go +++ b/exporters/prometheus/exporter.go @@ -231,20 +231,18 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) { for _, m := range scopeMetrics.Metrics { typ := c.metricType(m) if typ == nil { - reportError(ch, nil, errors.New("invalid metric type")) + reportError(ch, nil, errInvalidMetricType) continue } name, err := c.getName(m) if err != nil { - // TODO(#7066): Handle this error better. It's not clear this can be - // reached, bad metric names should / will be caught at creation time. reportError(ch, nil, err) continue } drop, help := c.validateMetrics(name, m.Description, typ) if drop { - reportError(ch, nil, errors.New("invalid metric")) + reportError(ch, nil, errInvalidMetric) continue } @@ -353,10 +351,7 @@ func addExponentialHistogramMetric[N int64 | float64]( reportError( ch, desc, - fmt.Errorf( - "exponential histogram scale %d is below minimum supported scale -4, skipping data point", - scale, - ), + fmt.Errorf("%w: %d (min -4)", errEHScaleBelowMin, scale), ) continue } diff --git a/exporters/prometheus/exporter_test.go b/exporters/prometheus/exporter_test.go index 4c9c924c653..bd782912a30 100644 --- a/exporters/prometheus/exporter_test.go +++ b/exporters/prometheus/exporter_test.go @@ -1375,7 +1375,7 @@ func TestExponentialHistogramScaleValidation(t *testing.T) { var dtoMetric dto.Metric werr := pm.Write(&dtoMetric) require.Error(t, werr) - assert.Contains(t, werr.Error(), "below minimum supported scale -4") + assert.ErrorIs(t, werr, errEHScaleBelowMin) // The exporter reports via invalid metric, not the global otel error handler. assert.NoError(t, capturedError) }) @@ -1902,6 +1902,21 @@ func TestEscapingErrorHandling(t *testing.T) { skipInstrument: true, // Error message comes from normalization in the translator; match on a stable substring. expectGatherErrContains: "normalization", + checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { + // target_info should still be exported. + require.NotEmpty(t, mfs) + other := 0 + seenTarget := false + for _, mf := range mfs { + if mf.GetName() == "target_info" { + seenTarget = true + continue + } + other++ + } + require.True(t, seenTarget) + require.Equal(t, 0, other) + }, }, { name: "good namespace, names should be escaped", @@ -1937,6 +1952,21 @@ func TestEscapingErrorHandling(t *testing.T) { // With improved error handling, invalid scope label names result in an invalid metric // and Gather returns an error containing the normalization failure. expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "____"`, + checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { + // target_info should still be exported; metric with bad scope label dropped. + require.NotEmpty(t, mfs) + other := 0 + seenTarget := false + for _, mf := range mfs { + if mf.GetName() == "target_info" { + seenTarget = true + continue + } + other++ + } + require.True(t, seenTarget) + require.Equal(t, 0, other) + }, }, { name: "bad translated metric name", @@ -1958,18 +1988,60 @@ func TestEscapingErrorHandling(t *testing.T) { producer: makeSummaryProducer(), skipInstrument: true, expectGatherErrContains: "invalid metric type", + checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { + require.NotEmpty(t, mfs) + other := 0 + seenTarget := false + for _, mf := range mfs { + if mf.GetName() == "target_info" { + seenTarget = true + continue + } + other++ + } + require.True(t, seenTarget) + require.Equal(t, 0, other) + }, }, { name: "bad exponential histogram label name via producer", producer: makeBadEHProducer(), skipInstrument: true, expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "____"`, + checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { + require.NotEmpty(t, mfs) + other := 0 + seenTarget := false + for _, mf := range mfs { + if mf.GetName() == "target_info" { + seenTarget = true + continue + } + other++ + } + require.True(t, seenTarget) + require.Equal(t, 0, other) + }, }, { name: "exponential histogram constructor error via producer (count mismatch)", producer: makeBadEHCountProducer(), skipInstrument: true, expectGatherErrContains: "count", + checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { + require.NotEmpty(t, mfs) + other := 0 + seenTarget := false + for _, mf := range mfs { + if mf.GetName() == "target_info" { + seenTarget = true + continue + } + other++ + } + require.True(t, seenTarget) + require.Equal(t, 0, other) + }, }, { name: "sum constructor error via duplicate label name", @@ -1983,6 +2055,20 @@ func TestEscapingErrorHandling(t *testing.T) { return nil }, expectGatherErrContains: "duplicate label", + checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { + require.NotEmpty(t, mfs) + other := 0 + seenTarget := false + for _, mf := range mfs { + if mf.GetName() == "target_info" { + seenTarget = true + continue + } + other++ + } + require.True(t, seenTarget) + require.Equal(t, 0, other) + }, }, { name: "gauge constructor error via duplicate label name", @@ -1995,6 +2081,20 @@ func TestEscapingErrorHandling(t *testing.T) { return nil }, expectGatherErrContains: "duplicate label", + checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { + require.NotEmpty(t, mfs) + other := 0 + seenTarget := false + for _, mf := range mfs { + if mf.GetName() == "target_info" { + seenTarget = true + continue + } + other++ + } + require.True(t, seenTarget) + require.Equal(t, 0, other) + }, }, { name: "histogram constructor error via duplicate label name", @@ -2007,6 +2107,20 @@ func TestEscapingErrorHandling(t *testing.T) { return nil }, expectGatherErrContains: "duplicate label", + checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { + require.NotEmpty(t, mfs) + other := 0 + seenTarget := false + for _, mf := range mfs { + if mf.GetName() == "target_info" { + seenTarget = true + continue + } + other++ + } + require.True(t, seenTarget) + require.Equal(t, 0, other) + }, }, { name: "bad gauge label name", @@ -2019,6 +2133,20 @@ func TestEscapingErrorHandling(t *testing.T) { return nil }, expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "____"`, + checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { + require.NotEmpty(t, mfs) + other := 0 + seenTarget := false + for _, mf := range mfs { + if mf.GetName() == "target_info" { + seenTarget = true + continue + } + other++ + } + require.True(t, seenTarget) + require.Equal(t, 0, other) + }, }, { name: "bad histogram label name", @@ -2031,6 +2159,20 @@ func TestEscapingErrorHandling(t *testing.T) { return nil }, expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "____"`, + checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { + require.NotEmpty(t, mfs) + other := 0 + seenTarget := false + for _, mf := range mfs { + if mf.GetName() == "target_info" { + seenTarget = true + continue + } + other++ + } + require.True(t, seenTarget) + require.Equal(t, 0, other) + }, }, } From 87ff8d001830cb1f90f29cf06d1c765c09162d06 Mon Sep 17 00:00:00 2001 From: Robert Wu Date: Thu, 25 Sep 2025 17:49:38 -0400 Subject: [PATCH 3/5] fix unit tests and address PR comment --- exporters/prometheus/errors.go | 9 ++++----- exporters/prometheus/exporter.go | 6 +++--- exporters/prometheus/exporter_test.go | 24 +++++++++++++++--------- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/exporters/prometheus/errors.go b/exporters/prometheus/errors.go index 3bd3354d07d..3c8a5b7cac8 100644 --- a/exporters/prometheus/errors.go +++ b/exporters/prometheus/errors.go @@ -5,10 +5,9 @@ package prometheus // import "go.opentelemetry.io/otel/exporters/prometheus" import "errors" -// Internal sentinel errors for consistent error checks in tests. -// These are unexported to avoid growing public API surface. +// Sentinel errors for consistent error checks in tests. var ( - errInvalidMetricType = errors.New("invalid metric type") - errInvalidMetric = errors.New("invalid metric") - errEHScaleBelowMin = errors.New("exponential histogram scale below minimum supported") + ErrInvalidMetricType = errors.New("invalid metric type") + ErrInvalidMetric = errors.New("invalid metric") + ErrEHScaleBelowMin = errors.New("exponential histogram scale below minimum supported") ) diff --git a/exporters/prometheus/exporter.go b/exporters/prometheus/exporter.go index d73786b83ed..10362a3278e 100644 --- a/exporters/prometheus/exporter.go +++ b/exporters/prometheus/exporter.go @@ -258,7 +258,7 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) { for k, m := range scopeMetrics.Metrics { typ := c.metricType(m) if typ == nil { - reportError(ch, nil, errInvalidMetricType) + reportError(ch, nil, ErrInvalidMetricType) continue } name, e := c.getName(m) @@ -270,7 +270,7 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) { drop, help := c.validateMetrics(name, m.Description, typ) if drop { - reportError(ch, nil, errInvalidMetric) + reportError(ch, nil, ErrInvalidMetric) continue } @@ -389,7 +389,7 @@ func addExponentialHistogramMetric[N int64 | float64]( reportError( ch, desc, - fmt.Errorf("%w: %d (min -4)", errEHScaleBelowMin, scale), + fmt.Errorf("%w: %d (min -4)", ErrEHScaleBelowMin, scale), ) err = errors.Join(err, e) continue diff --git a/exporters/prometheus/exporter_test.go b/exporters/prometheus/exporter_test.go index e7b20a44f53..3dea60c5442 100644 --- a/exporters/prometheus/exporter_test.go +++ b/exporters/prometheus/exporter_test.go @@ -1379,7 +1379,7 @@ func TestExponentialHistogramScaleValidation(t *testing.T) { var dtoMetric dto.Metric werr := pm.Write(&dtoMetric) require.Error(t, werr) - assert.ErrorIs(t, werr, errEHScaleBelowMin) + assert.ErrorIs(t, werr, ErrEHScaleBelowMin) // The exporter reports via invalid metric, not the global otel error handler. assert.NoError(t, capturedError) }) @@ -1886,6 +1886,7 @@ func TestEscapingErrorHandling(t *testing.T) { expectNewErr string expectMetricErr string expectGatherErrContains string + expectGatherErrIs error checkMetricFamilies func(t testing.TB, dtos []*dto.MetricFamily) }{ { @@ -1963,7 +1964,7 @@ func TestEscapingErrorHandling(t *testing.T) { }, // With improved error handling, invalid scope label names result in an invalid metric // and Gather returns an error containing the normalization failure. - expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "____"`, + expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "_"`, checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { // target_info should still be exported; metric with bad scope label dropped. require.NotEmpty(t, mfs) @@ -1992,14 +1993,14 @@ func TestEscapingErrorHandling(t *testing.T) { name: "bad translated label name", counterName: "foo", labelName: "$%^&", - expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "____"`, + expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "_"`, }, { name: "unsupported data type via producer", // Use a producer to emit a Summary data point; no SDK instruments. - producer: makeSummaryProducer(), - skipInstrument: true, - expectGatherErrContains: "invalid metric type", + producer: makeSummaryProducer(), + skipInstrument: true, + expectGatherErrIs: ErrInvalidMetricType, checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { require.NotEmpty(t, mfs) other := 0 @@ -2019,7 +2020,7 @@ func TestEscapingErrorHandling(t *testing.T) { name: "bad exponential histogram label name via producer", producer: makeBadEHProducer(), skipInstrument: true, - expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "____"`, + expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "_"`, checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { require.NotEmpty(t, mfs) other := 0 @@ -2144,7 +2145,7 @@ func TestEscapingErrorHandling(t *testing.T) { g.Record(ctx, 1, otelmetric.WithAttributes(attribute.Key("$%^&").String("B"))) return nil }, - expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "____"`, + expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "_"`, checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { require.NotEmpty(t, mfs) other := 0 @@ -2170,7 +2171,7 @@ func TestEscapingErrorHandling(t *testing.T) { h.Record(ctx, 1.23, otelmetric.WithAttributes(attribute.Key("$%^&").String("B"))) return nil }, - expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "____"`, + expectGatherErrContains: `normalization for label name "$%^&" resulted in invalid name "_"`, checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { require.NotEmpty(t, mfs) other := 0 @@ -2260,6 +2261,11 @@ func TestEscapingErrorHandling(t *testing.T) { require.Contains(t, err.Error(), tc.expectGatherErrContains) return } + if tc.expectGatherErrIs != nil { + require.Error(t, err) + require.ErrorIs(t, err, tc.expectGatherErrIs) + return + } require.NoError(t, err) if tc.checkMetricFamilies != nil { tc.checkMetricFamilies(t, got) From c6c6ad32229a8a2f08797b5bc13d75d38271ec79 Mon Sep 17 00:00:00 2001 From: Robert Wu Date: Thu, 25 Sep 2025 20:28:31 -0400 Subject: [PATCH 4/5] address PR comments --- CHANGELOG.md | 2 +- exporters/prometheus/exporter_test.go | 40 +++++++++++++++++++++++++-- exporters/prometheus/go.sum | 2 ++ 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5878b1fa0f..d63207844a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,7 +39,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - `WithInstrumentationAttributes` in `go.opentelemetry.io/otel/meter` synchronously de-duplicates the passed attributes instead of delegating it to the returned `MeterOption`. (#7266) - `WithInstrumentationAttributes` in `go.opentelemetry.io/otel/log` synchronously de-duplicates the passed attributes instead of delegating it to the returned `LoggerOption`. (#7266) - `Distinct` in `go.opentelemetry.io/otel/attribute` is no longer guaranteed to uniquely identify an attribute set. Collisions between `Distinct` values for different Sets are possible with extremely high cardinality (billions of series per instrument), but are highly unlikely. (#7175) -- Improve error handling of dropping data during translation by using `NewInvalidMetric` in `go.opentelemetry.io/otel/exporters/prometheus`. (#7363) +- Improve error handling for dropped data during translation by using `prometheus.NewInvalidMetric` in `go.opentelemetry.io/otel/exporters/prometheus`. **Breaking Change:** Previously, these cases were only logged and scrapes succeeded. Now, when translation would drop data (e.g., invalid label/value), the exporter emits a `NewInvalidMetric`, and Prometheus scrapes **fail with HTTP 500** by default. To preserve the prior behavior (scrapes succeed while errors are logged), configure your Prometheus HTTP handler with: `promhttp.HandlerOpts{ ErrorHandling: promhttp.ContinueOnError }`. (#7363) diff --git a/exporters/prometheus/exporter_test.go b/exporters/prometheus/exporter_test.go index 3dea60c5442..82d8d1012e4 100644 --- a/exporters/prometheus/exporter_test.go +++ b/exporters/prometheus/exporter_test.go @@ -8,12 +8,15 @@ import ( "errors" "fmt" "math" + "net/http" + "net/http/httptest" "os" "sync" "testing" "time" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/client_golang/prometheus/testutil" dto "github.com/prometheus/client_model/go" "github.com/prometheus/otlptranslator" @@ -37,6 +40,22 @@ type producerFunc func(context.Context) ([]metricdata.ScopeMetrics, error) func (f producerFunc) Produce(ctx context.Context) ([]metricdata.ScopeMetrics, error) { return f(ctx) } +// Helper: scrape with ContinueOnError and return body + status. +func scrapeWithContinueOnError(reg *prometheus.Registry) (int, string) { + h := promhttp.HandlerFor( + reg, + promhttp.HandlerOpts{ + ErrorHandling: promhttp.ContinueOnError, + }, + ) + + rr := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/metrics", http.NoBody) + h.ServeHTTP(rr, req) + + return rr.Code, rr.Body.String() +} + func TestPrometheusExporter(t *testing.T) { testCases := []struct { name string @@ -1021,6 +1040,23 @@ func TestDuplicateMetrics(t *testing.T) { // Gathering should surface an error instead of silently dropping. _, err := registry.Gather() require.Error(t, err) + + // 2) Also assert what users will see if they opt into ContinueOnError. + // Compare the HTTP body to an expected file that contains only the valid series + // (e.g., "target_info" and any non-conflicting families). + status, body := scrapeWithContinueOnError(registry) + require.Equal(t, http.StatusOK, status) + + matched := false + for _, filename := range tc.possibleExpectedFiles { + want, ferr := os.ReadFile(filename) + require.NoError(t, ferr) + if body == string(want) { + matched = true + break + } + } + require.Truef(t, matched, "expected export not produced under ContinueOnError; got:\n%s", body) } else { match := false for _, filename := range tc.possibleExpectedFiles { @@ -1378,8 +1414,7 @@ func TestExponentialHistogramScaleValidation(t *testing.T) { } var dtoMetric dto.Metric werr := pm.Write(&dtoMetric) - require.Error(t, werr) - assert.ErrorIs(t, werr, ErrEHScaleBelowMin) + require.ErrorIs(t, werr, ErrEHScaleBelowMin) // The exporter reports via invalid metric, not the global otel error handler. assert.NoError(t, capturedError) }) @@ -2262,7 +2297,6 @@ func TestEscapingErrorHandling(t *testing.T) { return } if tc.expectGatherErrIs != nil { - require.Error(t, err) require.ErrorIs(t, err, tc.expectGatherErrIs) return } diff --git a/exporters/prometheus/go.sum b/exporters/prometheus/go.sum index c2cead281b8..6137bb6472f 100644 --- a/exporters/prometheus/go.sum +++ b/exporters/prometheus/go.sum @@ -13,6 +13,8 @@ github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= From 63ae765be2af394fa0d1319820ccc2780ea36f2e Mon Sep 17 00:00:00 2001 From: Robert Wu Date: Thu, 25 Sep 2025 20:44:37 -0400 Subject: [PATCH 5/5] update error variables --- exporters/prometheus/errors.go | 6 +++--- exporters/prometheus/exporter.go | 6 +++--- exporters/prometheus/exporter_test.go | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/exporters/prometheus/errors.go b/exporters/prometheus/errors.go index 3c8a5b7cac8..f2076de761e 100644 --- a/exporters/prometheus/errors.go +++ b/exporters/prometheus/errors.go @@ -7,7 +7,7 @@ import "errors" // Sentinel errors for consistent error checks in tests. var ( - ErrInvalidMetricType = errors.New("invalid metric type") - ErrInvalidMetric = errors.New("invalid metric") - ErrEHScaleBelowMin = errors.New("exponential histogram scale below minimum supported") + errInvalidMetricType = errors.New("invalid metric type") + errInvalidMetric = errors.New("invalid metric") + errEHScaleBelowMin = errors.New("exponential histogram scale below minimum supported") ) diff --git a/exporters/prometheus/exporter.go b/exporters/prometheus/exporter.go index 10362a3278e..d73786b83ed 100644 --- a/exporters/prometheus/exporter.go +++ b/exporters/prometheus/exporter.go @@ -258,7 +258,7 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) { for k, m := range scopeMetrics.Metrics { typ := c.metricType(m) if typ == nil { - reportError(ch, nil, ErrInvalidMetricType) + reportError(ch, nil, errInvalidMetricType) continue } name, e := c.getName(m) @@ -270,7 +270,7 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) { drop, help := c.validateMetrics(name, m.Description, typ) if drop { - reportError(ch, nil, ErrInvalidMetric) + reportError(ch, nil, errInvalidMetric) continue } @@ -389,7 +389,7 @@ func addExponentialHistogramMetric[N int64 | float64]( reportError( ch, desc, - fmt.Errorf("%w: %d (min -4)", ErrEHScaleBelowMin, scale), + fmt.Errorf("%w: %d (min -4)", errEHScaleBelowMin, scale), ) err = errors.Join(err, e) continue diff --git a/exporters/prometheus/exporter_test.go b/exporters/prometheus/exporter_test.go index 82d8d1012e4..d087b272a39 100644 --- a/exporters/prometheus/exporter_test.go +++ b/exporters/prometheus/exporter_test.go @@ -1414,7 +1414,7 @@ func TestExponentialHistogramScaleValidation(t *testing.T) { } var dtoMetric dto.Metric werr := pm.Write(&dtoMetric) - require.ErrorIs(t, werr, ErrEHScaleBelowMin) + require.ErrorIs(t, werr, errEHScaleBelowMin) // The exporter reports via invalid metric, not the global otel error handler. assert.NoError(t, capturedError) }) @@ -2035,7 +2035,7 @@ func TestEscapingErrorHandling(t *testing.T) { // Use a producer to emit a Summary data point; no SDK instruments. producer: makeSummaryProducer(), skipInstrument: true, - expectGatherErrIs: ErrInvalidMetricType, + expectGatherErrIs: errInvalidMetricType, checkMetricFamilies: func(t testing.TB, mfs []*dto.MetricFamily) { require.NotEmpty(t, mfs) other := 0