diff --git a/cmd/generator/README.md b/cmd/generator/README.md new file mode 100644 index 0000000000000..6cf0bab8684de --- /dev/null +++ b/cmd/generator/README.md @@ -0,0 +1,147 @@ +# Histogram Generator + +A Go package for generating realistic histogram data for OpenTelemetry metrics testing. This package provides statistical distribution functions and can optionally publish generated metrics to OTLP endpoints. + +## Architecture + +The package is organized into focused modules for maintainability and scalability: + +``` +cmd/generator/ +├── generator.go # Package documentation and overview +├── types.go # Core data structures and types +├── histogram_generator.go # Main histogram generation logic +├── distributions.go # Statistical distribution functions +├── otlp_publisher.go # OTLP endpoint publishing functionality +└── example_test.go # Usage examples +``` + +### Key Components + +- **HistogramGenerator**: Main generator that creates histogram data from statistical distributions +- **OTLPPublisher**: Handles publishing metrics to OpenTelemetry Protocol endpoints using both telemetrygen and custom OTLP +- **Distribution Functions**: Various statistical distributions (Normal, Exponential, Gamma, etc.) +- **Types**: Shared data structures for histogram inputs, outputs, and configuration + +### Publishing Approach + +The package uses the official OpenTelemetry telemetrygen package for all metric publishing, ensuring: + +- **Standard Compliance**: Full compatibility with OTLP endpoints +- **Reliability**: Uses the same code as the official telemetrygen tool +- **Simplicity**: Clean API with `metrics.Start(cfg)` under the hood +- **Flexibility**: Supports Gauge, Sum, and Histogram metric types + +The histogram generator creates realistic data distributions, while telemetrygen handles the actual OTLP publishing. + +## Usage + +### Basic Generation + +```go +generator := NewHistogramGenerator(GenerationOptions{ + Seed: 12345, // For reproducible results +}) + +input := HistogramInput{ + Count: 1000, + Min: ptr(10.0), + Max: ptr(200.0), + Boundaries: []float64{25, 50, 75, 100, 150}, + Attributes: map[string]string{"service.name": "test-service"}, +} + +result, err := generator.GenerateHistogram(input, func(rnd *rand.Rand, t time.Time) float64 { + return NormalRandom(rnd, 75, 25) // mean=75, stddev=25 +}) +``` + +### Generation with Publishing + +```go +generator := NewHistogramGenerator(GenerationOptions{ + Seed: time.Now().UnixNano(), + Endpoint: "localhost:4318", // OTLP HTTP endpoint +}) + +result, err := generator.GenerateAndPublishHistograms(input, valueFunc) +``` + +### Direct Publishing with Telemetrygen + +You can also use telemetrygen directly for different metric types: + +```go +publisher := NewOTLPPublisher("localhost:4318") + +// Send different types of metrics using telemetrygen +err := publisher.SendSumMetric("requests_total", 100) +err = publisher.SendGaugeMetric("cpu_usage", 75.5) +err = publisher.SendHistogramMetricSimple("response_time") +``` + +This approach uses the official OpenTelemetry telemetrygen package under the hood, ensuring compatibility with standard OTLP endpoints. + +## Available Distributions + +- **NormalRandom**: Normal (Gaussian) distribution +- **ExponentialRandom**: Exponential distribution +- **GammaRandom**: Gamma distribution +- **LogNormalRandom**: Log-normal distribution +- **WeibullRandom**: Weibull distribution +- **BetaRandom**: Beta distribution + +### Time-based Functions + +- **SinusoidalValue**: Sinusoidal patterns with noise +- **SpikyValue**: Baseline with occasional spikes +- **TrendingValue**: Linear trend with noise + +## Design Principles + +### Single Responsibility +Each file has a focused purpose: +- `types.go`: Data structures only +- `distributions.go`: Statistical functions only +- `histogram_generator.go`: Core generation logic only +- `otlp_publisher.go`: Publishing logic only + +### Dependency Injection +The generator accepts value functions, allowing for flexible distribution selection and custom patterns. + +### Testability +All components are designed for easy unit testing with deterministic seeds and dependency injection. + +### Extensibility +New distributions can be added to `distributions.go` without affecting other components. + +## Features + +- ✅ **Statistical Distributions**: Multiple distribution functions for realistic data +- ✅ **OTLP Publishing**: Direct integration with OpenTelemetry Protocol endpoints +- ✅ **Flexible Generation**: Custom value functions and deterministic seeds +- ✅ **Multiple Metric Types**: Support for Gauge, Sum, and Histogram metrics + +## Future Enhancements + +1. **Additional Publishers**: Support for Prometheus, StatsD, etc. +2. **More Distributions**: Poisson, Binomial, etc. +3. **Validation**: Input validation for histogram consistency +4. **Batch Generation**: Generate multiple histograms efficiently +5. **Configuration Files**: YAML/JSON configuration support + +## Testing + +The package includes comprehensive examples in `example_test.go` and integrates with the test cases in `share/testdata/histograms/`. + +Run tests: +```bash +go test ./cmd/generator/... +``` + +## Integration + +This generator is used by: +- `share/testdata/histograms/histograms.go`: Test case generation +- Various metric exporters for testing realistic data patterns +- Performance testing tools for load generation \ No newline at end of file diff --git a/cmd/generator/distributions.go b/cmd/generator/distributions.go new file mode 100644 index 0000000000000..55144d10f3979 --- /dev/null +++ b/cmd/generator/distributions.go @@ -0,0 +1,92 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package generator + +import ( + "math" + "math/rand" + "time" +) + +// Distribution functions for generating statistical data + +func ExponentialRandom(rnd *rand.Rand, rate float64) float64 { + return -math.Log(1.0-rnd.Float64()) / rate +} + +func NormalRandom(rnd *rand.Rand, mean, stddev float64) float64 { + return rnd.NormFloat64()*stddev + mean +} + +func LogNormalRandom(rnd *rand.Rand, mu, sigma float64) float64 { + return math.Exp(NormalRandom(rnd, mu, sigma)) +} + +func WeibullRandom(rnd *rand.Rand, shape, scale float64) float64 { + return scale * math.Pow(-math.Log(1.0-rnd.Float64()), 1.0/shape) +} + +func BetaRandom(rnd *rand.Rand, alpha, beta float64) float64 { + x := GammaRandom(rnd, alpha, 1.0) + y := GammaRandom(rnd, beta, 1.0) + return x / (x + y) +} + +func GammaRandom(rnd *rand.Rand, alpha, beta float64) float64 { + if alpha < 1.0 { + // Use Johnk's generator for alpha < 1 + for { + u := rnd.Float64() + v := rnd.Float64() + x := math.Pow(u, 1.0/alpha) + y := math.Pow(v, 1.0/(1.0-alpha)) + if x+y <= 1.0 { + if x+y > 0 { + return beta * x / (x + y) * (-math.Log(rnd.Float64())) + } + } + } + } + + // Marsaglia and Tsang's method for alpha >= 1 + d := alpha - 1.0/3.0 + c := 1.0 / math.Sqrt(9.0*d) + + for { + x := rnd.NormFloat64() + v := 1.0 + c*x + if v <= 0 { + continue + } + v = v * v * v + u := rnd.Float64() + if u < 1.0-0.0331*(x*x)*(x*x) { + return beta * d * v + } + if math.Log(u) < 0.5*x*x+d*(1.0-v+math.Log(v)) { + return beta * d * v + } + } +} + +// Time-based value functions + +func SinusoidalValue(rnd *rand.Rand, timestamp time.Time, amplitude, period, phase, baseline float64) float64 { + t := float64(timestamp.Unix()) + noise := rnd.NormFloat64() * amplitude * 0.1 // 10% noise + return baseline + amplitude*math.Sin(2*math.Pi*t/period+phase) + noise +} + +func SpikyValue(rnd *rand.Rand, baseline, spikeHeight, spikeProb float64) float64 { + if rnd.Float64() < spikeProb { + return baseline + spikeHeight*rnd.Float64() + } + return baseline + rnd.NormFloat64()*baseline*0.1 +} + +func TrendingValue(rnd *rand.Rand, timestamp time.Time, startValue, trendRate, noise float64) float64 { + t := float64(timestamp.Unix()) + trend := startValue + trendRate*t + return trend + rnd.NormFloat64()*noise +} diff --git a/cmd/generator/example_test.go b/cmd/generator/example_test.go new file mode 100644 index 0000000000000..47064a3656f19 --- /dev/null +++ b/cmd/generator/example_test.go @@ -0,0 +1,632 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package generator_test + +import ( + "fmt" + "math/rand" + "testing" + "time" + + "github.com/amazon-contributing/opentelemetry-collector-contrib/cmd/generator" +) + +// TestHistogramGenerator_GenerateHistogram_Example demonstrates basic histogram generation +// CURRENT CAPABILITY: Statistical histogram data generation with custom distributions +func TestHistogramGenerator_GenerateHistogram_Example(t *testing.T) { + fmt.Println("=== CURRENT GOAL: Statistical Histogram Data Generation ===") + + // Create a generator with a fixed seed for reproducible results + gen := generator.NewHistogramGenerator(generator.GenerationOptions{ + Seed: 12345, + }) + + // Define histogram input parameters + input := generator.HistogramInput{ + Count: 1000, + Min: ptr(10.0), + Max: ptr(200.0), + Boundaries: []float64{25, 50, 75, 100, 150}, + Attributes: map[string]string{ + "service.name": "payment-service", + "environment": "production", + }, + } + + // Generate histogram using normal distribution + result, err := gen.GenerateHistogram(input, func(rnd *rand.Rand, t time.Time) float64 { + return generator.NormalRandom(rnd, 75, 25) // mean=75, stddev=25 + }) + + if err != nil { + panic(err) + } + + fmt.Printf("✅ Generated %d samples with sum=%.2f, avg=%.2f\n", + result.Expected.Count, result.Expected.Sum, result.Expected.Average) + fmt.Printf("✅ Min=%.2f, Max=%.2f\n", *result.Expected.Min, *result.Expected.Max) + fmt.Printf("✅ Bucket distribution: %v\n", result.Input.Counts) + + // This example demonstrates histogram generation with statistical distributions. + // Output will vary due to randomness, but structure is consistent. +} + +// TestHieHistogrenerator_GenerateAndPublishHistograms_Example shows telemetrygen integration +// CURRENT LIMITATION: Only supports basic histogram publishing, not custom bucket data +func TestHistogramGenerator_GenerateAndPublishHistograms_Example(t *testing.T) { + // Make sure collector is running before removing the skip on this function + t.Skip() + fmt.Println("=== CURRENT GOAL: Basic OTLP Publishing via Telemetrygen ===") + + // Create a generator with OTLP endpoint + gen := generator.NewHistogramGenerator(generator.GenerationOptions{ + Seed: time.Now().UnixNano(), + Endpoint: "localhost:4318", // OTLP HTTP endpoint + }) + + input := generator.HistogramInput{ + Count: 500, + Boundaries: []float64{10, 50, 100, 500, 1000}, + Attributes: map[string]string{ + "service.name": "web-service", + "service.version": "1.0.0", + "environment": "staging", + }, + } + + // Generate and publish using exponential distribution + result, err := gen.GenerateAndPublishHistograms(input, func(rnd *rand.Rand, t time.Time) float64 { + return generator.ExponentialRandom(rnd, 0.01) // rate=0.01 + }) + + if err != nil { + fmt.Printf("❌ Error (expected - telemetrygen limitations): %v\n", err) + return + } + + fmt.Printf("✅ Generated and published histogram with %d samples\n", result.Expected.Count) + fmt.Printf("⚠️ Note: Uses telemetrygen's built-in histogram generation, not custom buckets\n") +} + +// TestOTLPPublisher_SendSumMetric_Example demonstrates current telemetrygen integration +// CURRENT CAPABILITY: Basic Sum, Gauge, Histogram via telemetrygen +// MISSING: Delta/Cumulative distinction, Summary, Exponential Histogram +func TestOTLPPublisher_SendSumMetric_Example(t *testing.T) { + // Make sure collector is running before removing the skip on this function + t.Skip() + fmt.Println("=== CURRENT GOAL: Basic Metric Types via Telemetrygen ===") + + publisher := generator.NewOTLPPublisher("localhost:4318") + + // ✅ WORKING: Basic metric types supported by telemetrygen + err := publisher.SendSumMetric("requests_total", 100) + if err != nil { + fmt.Printf("❌ Error sending sum metric: %v\n", err) + return + } + fmt.Println("✅ Sum metric sent (telemetrygen)") + + err = publisher.SendGaugeMetric("cpu_usage", 75.5) + if err != nil { + fmt.Printf("❌ Error sending gauge metric: %v\n", err) + return + } + fmt.Println("✅ Gauge metric sent (telemetrygen)") + + err = publisher.SendHistogramMetricSimple("response_time") + if err != nil { + fmt.Printf("❌ Error sending histogram metric: %v\n", err) + return + } + fmt.Println("✅ Basic histogram sent (telemetrygen)") + + fmt.Println("\n⚠️ LIMITATIONS:") + fmt.Println(" - No delta vs cumulative temporality control") + fmt.Println(" - No summary metrics") + fmt.Println(" - No exponential histograms") + fmt.Println(" - No custom histogram bucket data") +} + +// TestAllDistributions_Example demonstrates all available statistical distributions +// CURRENT CAPABILITY: Complete statistical distribution library +func TestAllDistributions_Example(t *testing.T) { + fmt.Println("=== CURRENT CAPABILITY: All Statistical Distributions ===") + + gen := generator.NewHistogramGenerator(generator.GenerationOptions{Seed: 42}) + boundaries := []float64{10, 25, 50, 75, 100, 150, 200} + attributes := map[string]string{"test": "distributions"} + + fmt.Println("\n📊 PROBABILITY DISTRIBUTIONS:") + + // Normal Distribution + result, _ := gen.GenerateHistogram(generator.HistogramInput{ + Count: 1000, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.NormalRandom(rnd, 75, 20) // mean=75, stddev=20 + }) + fmt.Printf("✅ Normal (μ=75, σ=20): avg=%.1f, samples=%d\n", + result.Expected.Average, result.Expected.Count) + + // Exponential Distribution + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 1000, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.ExponentialRandom(rnd, 0.02) // rate=0.02 + }) + fmt.Printf("✅ Exponential (λ=0.02): avg=%.1f, samples=%d\n", + result.Expected.Average, result.Expected.Count) + + // Log-Normal Distribution + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 1000, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.LogNormalRandom(rnd, 3.5, 0.8) // mu=3.5, sigma=0.8 + }) + fmt.Printf("✅ Log-Normal (μ=3.5, σ=0.8): avg=%.1f, samples=%d\n", + result.Expected.Average, result.Expected.Count) + + // Gamma Distribution + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 1000, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.GammaRandom(rnd, 2.0, 25.0) // shape=2, scale=25 + }) + fmt.Printf("✅ Gamma (α=2, β=25): avg=%.1f, samples=%d\n", + result.Expected.Average, result.Expected.Count) + + // Weibull Distribution + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 1000, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.WeibullRandom(rnd, 2.5, 80) // shape=2.5, scale=80 + }) + fmt.Printf("✅ Weibull (k=2.5, λ=80): avg=%.1f, samples=%d\n", + result.Expected.Average, result.Expected.Count) + + // Beta Distribution + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 1000, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.BetaRandom(rnd, 2, 5) * 200 // scale to 0-200 range + }) + fmt.Printf("✅ Beta (α=2, β=5) scaled: avg=%.1f, samples=%d\n", + result.Expected.Average, result.Expected.Count) +} + +// TestTimeBasedPatterns_Example demonstrates time-based value generation +// CURRENT CAPABILITY: Dynamic time-based patterns for realistic metrics +func TestTimeBasedPatterns_Example(t *testing.T) { + fmt.Println("\n=== CURRENT CAPABILITY: Time-Based Patterns ===") + + gen := generator.NewHistogramGenerator(generator.GenerationOptions{Seed: 123}) + boundaries := []float64{20, 40, 60, 80, 100, 120} + attributes := map[string]string{"pattern": "time-based"} + + fmt.Println("\n🕐 TIME-BASED FUNCTIONS:") + + // Sinusoidal Pattern (daily cycle) + result, _ := gen.GenerateHistogram(generator.HistogramInput{ + Count: 500, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.SinusoidalValue(rnd, t, 30, 86400, 0, 60) // 24-hour cycle + }) + fmt.Printf("✅ Sinusoidal (24h cycle): avg=%.1f, range=[%.1f-%.1f]\n", + result.Expected.Average, *result.Expected.Min, *result.Expected.Max) + + // Spiky Pattern (occasional bursts) + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 500, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.SpikyValue(rnd, 50, 100, 0.1) // 10% spike probability + }) + fmt.Printf("✅ Spiky (10%% spikes): avg=%.1f, range=[%.1f-%.1f]\n", + result.Expected.Average, *result.Expected.Min, *result.Expected.Max) + + // Trending Pattern (gradual increase) + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 500, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.TrendingValue(rnd, t, 40, 0.0001, 8) // slow upward trend + }) + fmt.Printf("✅ Trending (upward): avg=%.1f, range=[%.1f-%.1f]\n", + result.Expected.Average, *result.Expected.Min, *result.Expected.Max) +} + +// TestAdvancedHistogramFeatures_Example shows advanced histogram generation capabilities +// CURRENT CAPABILITY: Custom buckets, attributes, statistical validation +func TestAdvancedHistogramFeatures_Example(t *testing.T) { + fmt.Println("\n=== CURRENT CAPABILITY: Advanced Histogram Features ===") + + gen := generator.NewHistogramGenerator(generator.GenerationOptions{Seed: 999}) + + fmt.Println("\n🔧 ADVANCED FEATURES:") + + // Custom bucket boundaries for different use cases + latencyBoundaries := []float64{1, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000} + sizeBoundaries := []float64{1024, 4096, 16384, 65536, 262144, 1048576, 4194304} + + // Latency histogram with log-normal distribution + result, _ := gen.GenerateHistogram(generator.HistogramInput{ + Count: 2000, + Min: ptr(0.5), + Max: ptr(10000.0), + Boundaries: latencyBoundaries, + Attributes: map[string]string{ + "service.name": "api-gateway", + "endpoint": "/users", + "method": "GET", + "status_code": "200", + }, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.LogNormalRandom(rnd, 3.0, 1.2) // realistic latency distribution + }) + fmt.Printf("✅ Latency histogram: %d samples, avg=%.1fms\n", + result.Expected.Count, result.Expected.Average) + fmt.Printf(" Buckets: %v\n", result.Input.Counts) + + // File size histogram with exponential distribution + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 1500, + Boundaries: sizeBoundaries, + Attributes: map[string]string{ + "file_type": "image", + "compression": "jpeg", + "quality": "high", + }, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.ExponentialRandom(rnd, 0.000001) // file size distribution + }) + fmt.Printf("✅ File size histogram: %d samples, avg=%.0f bytes\n", + result.Expected.Count, result.Expected.Average) + + // Multi-modal distribution (combining two normals) + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 1000, + Boundaries: []float64{10, 30, 50, 70, 90, 110, 130, 150}, + Attributes: map[string]string{ + "distribution": "bimodal", + "use_case": "response_time", + }, + }, func(rnd *rand.Rand, t time.Time) float64 { + if rnd.Float64() < 0.7 { + return generator.NormalRandom(rnd, 40, 10) // fast responses (70%) + } + return generator.NormalRandom(rnd, 120, 15) // slow responses (30%) + }) + fmt.Printf("✅ Bimodal distribution: avg=%.1f (fast+slow responses)\n", + result.Expected.Average) +} + +// TestCurrentPublishingCapabilities_Example shows what publishing works today +// CURRENT CAPABILITY: Basic OTLP publishing via telemetrygen +func TestCurrentPublishingCapabilities_Example(t *testing.T) { + // Make sure collector is running before removing the skip on this function + t.Skip() + fmt.Println("\n=== CURRENT CAPABILITY: OTLP Publishing ===") + + publisher := generator.NewOTLPPublisher("localhost:4318") + + fmt.Println("\n📡 WORKING METRIC TYPES:") + + // These work with current telemetrygen integration + metrics := []struct { + name string + metricType string + value float64 + status string + }{ + {"http_requests_total", "Sum", 1500, "✅ Counter/Sum"}, + {"cpu_utilization", "Gauge", 67.8, "✅ Gauge"}, + {"response_time_histogram", "Histogram", 0, "✅ Basic Histogram"}, + {"memory_usage_bytes", "Gauge", 2147483648, "✅ Gauge (large values)"}, + {"error_rate", "Gauge", 0.025, "✅ Gauge (fractional)"}, + } + + for _, m := range metrics { + err := publisher.SendMetric(m.name, m.metricType, m.value) + if err != nil { + fmt.Printf("❌ %s: %v\n", m.status, err) + } else { + fmt.Printf("%s: %s (%.2f)\n", m.status, m.name, m.value) + } + } + + fmt.Println("\n⚠️ TELEMETRYGEN LIMITATIONS:") + fmt.Println(" - No custom histogram bucket data") + fmt.Println(" - No temporality control (delta vs cumulative)") + fmt.Println(" - No summary metrics") + fmt.Println(" - No exponential histograms") +} + +// TestShowCapabilities demonstrates all current capabilities +func TestShowCapabilities(t *testing.T) { + fmt.Println("\n🎯 RUNNING CAPABILITY DEMONSTRATION") + + // Run all the example functions to show capabilities + TestHistogramGenerator_GenerateHistogram_Example(t) + fmt.Println() + + // Skip publishing test that requires OTLP endpoint + fmt.Println("=== SKIPPING: OTLP Publishing (requires running collector) ===") + fmt.Println() + + TestAllDistributions_Example(t) + fmt.Println() + + TestTimeBasedPatterns_Example(t) + fmt.Println() + + TestAdvancedHistogramFeatures_Example(t) + fmt.Println() + + TestCurrentPublishingCapabilities_Example(t) + fmt.Println() + +} + +// TestHistogramGenerator_GenerateHistogram tests the core histogram generation functionality +func TestHistogramGenerator_GenerateHistogram(t *testing.T) { + gen := generator.NewHistogramGenerator(generator.GenerationOptions{ + Seed: 12345, // Fixed seed for reproducible tests + }) + + input := generator.HistogramInput{ + Count: 1000, + Min: ptr(10.0), + Max: ptr(200.0), + Boundaries: []float64{25, 50, 75, 100, 150}, + Attributes: map[string]string{ + "service.name": "test-service", + }, + } + + result, err := gen.GenerateHistogram(input, func(rnd *rand.Rand, t time.Time) float64 { + return generator.NormalRandom(rnd, 75, 25) // mean=75, stddev=25 + }) + + // Test basic functionality + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + // Test sample count + if result.Expected.Count != 1000 { + t.Errorf("Expected 1000 samples, got %d", result.Expected.Count) + } + + // Test min/max constraints + if result.Expected.Min == nil || *result.Expected.Min < 10.0 { + t.Errorf("Expected min >= 10.0, got %v", result.Expected.Min) + } + if result.Expected.Max == nil || *result.Expected.Max > 200.0 { + t.Errorf("Expected max <= 200.0, got %v", result.Expected.Max) + } + + // Test that average is reasonable for normal distribution (mean=75) + if result.Expected.Average < 60 || result.Expected.Average > 90 { + t.Errorf("Expected average around 75 (60-90), got %.2f", result.Expected.Average) + } + + // Test bucket counts + if len(result.Input.Counts) != len(input.Boundaries)+1 { + t.Errorf("Expected %d buckets, got %d", len(input.Boundaries)+1, len(result.Input.Counts)) + } + + // Test that sum equals count * average (approximately) + expectedSum := float64(result.Expected.Count) * result.Expected.Average + if abs(result.Expected.Sum-expectedSum) > 1.0 { + t.Errorf("Sum/average mismatch: sum=%.2f, count*avg=%.2f", result.Expected.Sum, expectedSum) + } + + // Test attributes are preserved + if result.Input.Attributes["service.name"] != "test-service" { + t.Errorf("Expected service.name=test-service, got %s", result.Input.Attributes["service.name"]) + } +} + +// TestHistogramGenerator_Distributions tests all statistical distributions +func TestHistogramGenerator_Distributions(t *testing.T) { + gen := generator.NewHistogramGenerator(generator.GenerationOptions{Seed: 42}) + boundaries := []float64{10, 25, 50, 75, 100} + input := generator.HistogramInput{ + Count: 1000, Boundaries: boundaries, + } + + distributions := []struct { + name string + valueFunc func(*rand.Rand, time.Time) float64 + minAvg float64 + maxAvg float64 + }{ + { + name: "Normal", + valueFunc: func(rnd *rand.Rand, t time.Time) float64 { + return generator.NormalRandom(rnd, 50, 10) + }, + minAvg: 40, maxAvg: 60, + }, + { + name: "Exponential", + valueFunc: func(rnd *rand.Rand, t time.Time) float64 { + return generator.ExponentialRandom(rnd, 0.02) + }, + minAvg: 30, maxAvg: 70, + }, + { + name: "Gamma", + valueFunc: func(rnd *rand.Rand, t time.Time) float64 { + return generator.GammaRandom(rnd, 2.0, 25.0) + }, + minAvg: 40, maxAvg: 60, + }, + } + + for _, dist := range distributions { + t.Run(dist.name, func(t *testing.T) { + result, err := gen.GenerateHistogram(input, dist.valueFunc) + + if err != nil { + t.Fatalf("Distribution %s failed: %v", dist.name, err) + } + + if result.Expected.Count != 1000 { + t.Errorf("Distribution %s: expected 1000 samples, got %d", dist.name, result.Expected.Count) + } + + if result.Expected.Average < dist.minAvg || result.Expected.Average > dist.maxAvg { + t.Errorf("Distribution %s: average %.2f outside expected range [%.1f-%.1f]", + dist.name, result.Expected.Average, dist.minAvg, dist.maxAvg) + } + }) + } +} + +// TestOTLPPublisher_Creation tests OTLP publisher creation +func TestOTLPPublisher_Creation(t *testing.T) { + t.Skip() + publisher := generator.NewOTLPPublisher("localhost:4318") + + if publisher == nil { + t.Fatal("Expected publisher to be created, got nil") + } + + // Test that publisher has the expected endpoint (this would require exposing the field or adding a getter) + // For now, just test that it was created successfully +} + +// TestGenerationOptions tests the generation options +func TestGenerationOptions(t *testing.T) { + // Test with seed + gen1 := generator.NewHistogramGenerator(generator.GenerationOptions{Seed: 123}) + gen2 := generator.NewHistogramGenerator(generator.GenerationOptions{Seed: 123}) + + input := generator.HistogramInput{Count: 100, Boundaries: []float64{50, 100}} + + result1, err1 := gen1.GenerateHistogram(input, func(rnd *rand.Rand, t time.Time) float64 { + return generator.NormalRandom(rnd, 75, 10) + }) + + result2, err2 := gen2.GenerateHistogram(input, func(rnd *rand.Rand, t time.Time) float64 { + return generator.NormalRandom(rnd, 75, 10) + }) + + if err1 != nil || err2 != nil { + t.Fatalf("Expected no errors, got %v, %v", err1, err2) + } + + // With same seed, results should be identical + if result1.Expected.Sum != result2.Expected.Sum { + t.Errorf("Expected identical sums with same seed, got %.2f vs %.2f", + result1.Expected.Sum, result2.Expected.Sum) + } +} + +// TestTimeBasedPatterns tests time-based value functions +func TestTimeBasedPatterns(t *testing.T) { + gen := generator.NewHistogramGenerator(generator.GenerationOptions{Seed: 999}) + input := generator.HistogramInput{Count: 100, Boundaries: []float64{25, 50, 75, 100}} + + patterns := []struct { + name string + valueFunc func(*rand.Rand, time.Time) float64 + }{ + { + name: "Sinusoidal", + valueFunc: func(rnd *rand.Rand, t time.Time) float64 { + return generator.SinusoidalValue(rnd, t, 20, 3600, 0, 50) + }, + }, + { + name: "Spiky", + valueFunc: func(rnd *rand.Rand, t time.Time) float64 { + return generator.SpikyValue(rnd, 50, 100, 0.1) + }, + }, + { + name: "Trending", + valueFunc: func(rnd *rand.Rand, t time.Time) float64 { + return generator.TrendingValue(rnd, t, 40, 0.001, 5) + }, + }, + } + + for _, pattern := range patterns { + t.Run(pattern.name, func(t *testing.T) { + result, err := gen.GenerateHistogram(input, pattern.valueFunc) + + if err != nil { + t.Fatalf("Pattern %s failed: %v", pattern.name, err) + } + + if result.Expected.Count != 100 { + t.Errorf("Pattern %s: expected 100 samples, got %d", pattern.name, result.Expected.Count) + } + + // Test that we got reasonable values (not all zeros or infinities) + if result.Expected.Sum <= 0 || result.Expected.Average <= 0 { + t.Errorf("Pattern %s: got unreasonable values sum=%.2f, avg=%.2f", + pattern.name, result.Expected.Sum, result.Expected.Average) + } + }) + } +} + +// TestEdgeCases tests edge cases and error conditions +func TestEdgeCases(t *testing.T) { + gen := generator.NewHistogramGenerator(generator.GenerationOptions{Seed: 1}) + + t.Run("ZeroCount", func(t *testing.T) { + input := generator.HistogramInput{Count: 0} + result, err := gen.GenerateHistogram(input, nil) + + if err != nil { + t.Fatalf("Expected no error with zero count, got %v", err) + } + + // Should default to some reasonable count + if result.Expected.Count == 0 { + t.Error("Expected non-zero count when input count is 0") + } + }) + + t.Run("NilValueFunc", func(t *testing.T) { + input := generator.HistogramInput{Count: 10} + result, err := gen.GenerateHistogram(input, nil) + + if err != nil { + t.Fatalf("Expected no error with nil value func, got %v", err) + } + + if result.Expected.Count == 0 { + t.Error("Expected samples even with nil value function") + } + }) + + t.Run("EmptyBoundaries", func(t *testing.T) { + input := generator.HistogramInput{Count: 10, Boundaries: []float64{}} + result, err := gen.GenerateHistogram(input, nil) + + if err != nil { + t.Fatalf("Expected no error with empty boundaries, got %v", err) + } + + // Should generate default boundaries + if len(result.Input.Boundaries) == 0 { + t.Error("Expected default boundaries when none provided") + } + }) +} + +// Helper function for absolute value +func abs(x float64) float64 { + if x < 0 { + return -x + } + return x +} + +// Helper function to create float64 pointers +func ptr(f float64) *float64 { + return &f +} diff --git a/cmd/generator/generator.go b/cmd/generator/generator.go new file mode 100644 index 0000000000000..86c6f5bd42480 --- /dev/null +++ b/cmd/generator/generator.go @@ -0,0 +1,33 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +// Package generator provides histogram generation capabilities for OpenTelemetry metrics testing. +// This package is designed to generate realistic histogram data using various statistical distributions +// and can optionally publish the generated metrics to OTLP endpoints. +// +// The package is organized into several focused modules: +// - types.go: Core data structures and types +// - histogram_generator.go: Main histogram generation logic +// - distributions.go: Statistical distribution functions +// - otlp_publisher.go: OTLP endpoint publishing functionality +// +// Example usage: +// +// generator := NewHistogramGenerator(GenerationOptions{ +// Seed: 12345, +// Endpoint: "localhost:4318", +// }) +// +// result, err := generator.GenerateAndPublishHistograms( +// HistogramInput{ +// Count: 1000, +// Min: ptr(10.0), +// Max: ptr(200.0), +// Boundaries: []float64{25, 50, 75, 100, 150}, +// Attributes: map[string]string{"service.name": "test-service"}, +// }, +// func(rnd *rand.Rand, t time.Time) float64 { +// return NormalRandom(rnd, 50, 15) +// }, +// ) +package generator diff --git a/cmd/generator/generator/README.md b/cmd/generator/generator/README.md new file mode 100644 index 0000000000000..6cf0bab8684de --- /dev/null +++ b/cmd/generator/generator/README.md @@ -0,0 +1,147 @@ +# Histogram Generator + +A Go package for generating realistic histogram data for OpenTelemetry metrics testing. This package provides statistical distribution functions and can optionally publish generated metrics to OTLP endpoints. + +## Architecture + +The package is organized into focused modules for maintainability and scalability: + +``` +cmd/generator/ +├── generator.go # Package documentation and overview +├── types.go # Core data structures and types +├── histogram_generator.go # Main histogram generation logic +├── distributions.go # Statistical distribution functions +├── otlp_publisher.go # OTLP endpoint publishing functionality +└── example_test.go # Usage examples +``` + +### Key Components + +- **HistogramGenerator**: Main generator that creates histogram data from statistical distributions +- **OTLPPublisher**: Handles publishing metrics to OpenTelemetry Protocol endpoints using both telemetrygen and custom OTLP +- **Distribution Functions**: Various statistical distributions (Normal, Exponential, Gamma, etc.) +- **Types**: Shared data structures for histogram inputs, outputs, and configuration + +### Publishing Approach + +The package uses the official OpenTelemetry telemetrygen package for all metric publishing, ensuring: + +- **Standard Compliance**: Full compatibility with OTLP endpoints +- **Reliability**: Uses the same code as the official telemetrygen tool +- **Simplicity**: Clean API with `metrics.Start(cfg)` under the hood +- **Flexibility**: Supports Gauge, Sum, and Histogram metric types + +The histogram generator creates realistic data distributions, while telemetrygen handles the actual OTLP publishing. + +## Usage + +### Basic Generation + +```go +generator := NewHistogramGenerator(GenerationOptions{ + Seed: 12345, // For reproducible results +}) + +input := HistogramInput{ + Count: 1000, + Min: ptr(10.0), + Max: ptr(200.0), + Boundaries: []float64{25, 50, 75, 100, 150}, + Attributes: map[string]string{"service.name": "test-service"}, +} + +result, err := generator.GenerateHistogram(input, func(rnd *rand.Rand, t time.Time) float64 { + return NormalRandom(rnd, 75, 25) // mean=75, stddev=25 +}) +``` + +### Generation with Publishing + +```go +generator := NewHistogramGenerator(GenerationOptions{ + Seed: time.Now().UnixNano(), + Endpoint: "localhost:4318", // OTLP HTTP endpoint +}) + +result, err := generator.GenerateAndPublishHistograms(input, valueFunc) +``` + +### Direct Publishing with Telemetrygen + +You can also use telemetrygen directly for different metric types: + +```go +publisher := NewOTLPPublisher("localhost:4318") + +// Send different types of metrics using telemetrygen +err := publisher.SendSumMetric("requests_total", 100) +err = publisher.SendGaugeMetric("cpu_usage", 75.5) +err = publisher.SendHistogramMetricSimple("response_time") +``` + +This approach uses the official OpenTelemetry telemetrygen package under the hood, ensuring compatibility with standard OTLP endpoints. + +## Available Distributions + +- **NormalRandom**: Normal (Gaussian) distribution +- **ExponentialRandom**: Exponential distribution +- **GammaRandom**: Gamma distribution +- **LogNormalRandom**: Log-normal distribution +- **WeibullRandom**: Weibull distribution +- **BetaRandom**: Beta distribution + +### Time-based Functions + +- **SinusoidalValue**: Sinusoidal patterns with noise +- **SpikyValue**: Baseline with occasional spikes +- **TrendingValue**: Linear trend with noise + +## Design Principles + +### Single Responsibility +Each file has a focused purpose: +- `types.go`: Data structures only +- `distributions.go`: Statistical functions only +- `histogram_generator.go`: Core generation logic only +- `otlp_publisher.go`: Publishing logic only + +### Dependency Injection +The generator accepts value functions, allowing for flexible distribution selection and custom patterns. + +### Testability +All components are designed for easy unit testing with deterministic seeds and dependency injection. + +### Extensibility +New distributions can be added to `distributions.go` without affecting other components. + +## Features + +- ✅ **Statistical Distributions**: Multiple distribution functions for realistic data +- ✅ **OTLP Publishing**: Direct integration with OpenTelemetry Protocol endpoints +- ✅ **Flexible Generation**: Custom value functions and deterministic seeds +- ✅ **Multiple Metric Types**: Support for Gauge, Sum, and Histogram metrics + +## Future Enhancements + +1. **Additional Publishers**: Support for Prometheus, StatsD, etc. +2. **More Distributions**: Poisson, Binomial, etc. +3. **Validation**: Input validation for histogram consistency +4. **Batch Generation**: Generate multiple histograms efficiently +5. **Configuration Files**: YAML/JSON configuration support + +## Testing + +The package includes comprehensive examples in `example_test.go` and integrates with the test cases in `share/testdata/histograms/`. + +Run tests: +```bash +go test ./cmd/generator/... +``` + +## Integration + +This generator is used by: +- `share/testdata/histograms/histograms.go`: Test case generation +- Various metric exporters for testing realistic data patterns +- Performance testing tools for load generation \ No newline at end of file diff --git a/cmd/generator/generator/distributions.go b/cmd/generator/generator/distributions.go new file mode 100644 index 0000000000000..55144d10f3979 --- /dev/null +++ b/cmd/generator/generator/distributions.go @@ -0,0 +1,92 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package generator + +import ( + "math" + "math/rand" + "time" +) + +// Distribution functions for generating statistical data + +func ExponentialRandom(rnd *rand.Rand, rate float64) float64 { + return -math.Log(1.0-rnd.Float64()) / rate +} + +func NormalRandom(rnd *rand.Rand, mean, stddev float64) float64 { + return rnd.NormFloat64()*stddev + mean +} + +func LogNormalRandom(rnd *rand.Rand, mu, sigma float64) float64 { + return math.Exp(NormalRandom(rnd, mu, sigma)) +} + +func WeibullRandom(rnd *rand.Rand, shape, scale float64) float64 { + return scale * math.Pow(-math.Log(1.0-rnd.Float64()), 1.0/shape) +} + +func BetaRandom(rnd *rand.Rand, alpha, beta float64) float64 { + x := GammaRandom(rnd, alpha, 1.0) + y := GammaRandom(rnd, beta, 1.0) + return x / (x + y) +} + +func GammaRandom(rnd *rand.Rand, alpha, beta float64) float64 { + if alpha < 1.0 { + // Use Johnk's generator for alpha < 1 + for { + u := rnd.Float64() + v := rnd.Float64() + x := math.Pow(u, 1.0/alpha) + y := math.Pow(v, 1.0/(1.0-alpha)) + if x+y <= 1.0 { + if x+y > 0 { + return beta * x / (x + y) * (-math.Log(rnd.Float64())) + } + } + } + } + + // Marsaglia and Tsang's method for alpha >= 1 + d := alpha - 1.0/3.0 + c := 1.0 / math.Sqrt(9.0*d) + + for { + x := rnd.NormFloat64() + v := 1.0 + c*x + if v <= 0 { + continue + } + v = v * v * v + u := rnd.Float64() + if u < 1.0-0.0331*(x*x)*(x*x) { + return beta * d * v + } + if math.Log(u) < 0.5*x*x+d*(1.0-v+math.Log(v)) { + return beta * d * v + } + } +} + +// Time-based value functions + +func SinusoidalValue(rnd *rand.Rand, timestamp time.Time, amplitude, period, phase, baseline float64) float64 { + t := float64(timestamp.Unix()) + noise := rnd.NormFloat64() * amplitude * 0.1 // 10% noise + return baseline + amplitude*math.Sin(2*math.Pi*t/period+phase) + noise +} + +func SpikyValue(rnd *rand.Rand, baseline, spikeHeight, spikeProb float64) float64 { + if rnd.Float64() < spikeProb { + return baseline + spikeHeight*rnd.Float64() + } + return baseline + rnd.NormFloat64()*baseline*0.1 +} + +func TrendingValue(rnd *rand.Rand, timestamp time.Time, startValue, trendRate, noise float64) float64 { + t := float64(timestamp.Unix()) + trend := startValue + trendRate*t + return trend + rnd.NormFloat64()*noise +} diff --git a/cmd/generator/generator/example_test.go b/cmd/generator/generator/example_test.go new file mode 100644 index 0000000000000..36b58174693bf --- /dev/null +++ b/cmd/generator/generator/example_test.go @@ -0,0 +1,649 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package generator_test + +import ( + "fmt" + "math/rand" + "slices" + "testing" + "time" + + "github.com/amazon-contributing/opentelemetry-collector-contrib/cmd/generator" +) + +// TestHistogramGenerator_GenerateHistogram_Example demonstrates basic histogram generation +// CURRENT CAPABILITY: Statistical histogram data generation with custom distributions +func TestHistogramGenerator_GenerateHistogram_Example(t *testing.T) { + fmt.Println("=== CURRENT GOAL: Statistical Histogram Data Generation ===") + + // Create a generator with a fixed seed for reproducible results + gen := generator.NewHistogramGenerator(generator.GenerationOptions{ + Seed: 12345, + }) + + // Define histogram input parameters + input := generator.HistogramInput{ + Count: 1000, + Min: ptr(10.0), + Max: ptr(200.0), + Boundaries: []float64{25, 50, 75, 100, 150}, + Attributes: map[string]string{ + "service.name": "payment-service", + "environment": "production", + }, + } + + // Generate histogram using normal distribution + result, err := gen.GenerateHistogram(input, func(rnd *rand.Rand, t time.Time) float64 { + return generator.NormalRandom(rnd, 75, 25) // mean=75, stddev=25 + }) + + if err != nil { + panic(err) + } + + fmt.Printf("✅ Generated %d samples with sum=%.2f, avg=%.2f\n", + result.Expected.Count, result.Expected.Sum, result.Expected.Average) + fmt.Printf("✅ Min=%.2f, Max=%.2f\n", *result.Expected.Min, *result.Expected.Max) + fmt.Printf("✅ Bucket distribution: %v\n", result.Input.Counts) + + // This example demonstrates histogram generation with statistical distributions. + // Output will vary due to randomness, but structure is consistent. +} + +// TestHieHistogrenerator_GenerateAndPublishHistograms_Example shows telemetrygen integration +// CURRENT LIMITATION: Only supports basic histogram publishing, not custom bucket data +func TestHistogramGenerator_GenerateAndPublishHistograms_Example(t *testing.T) { + // Make sure collector is running before removing the skip on this function + t.Skip() + fmt.Println("=== CURRENT GOAL: Basic OTLP Publishing via Telemetrygen ===") + + // Create a generator with OTLP endpoint + gen := generator.NewHistogramGenerator(generator.GenerationOptions{ + Seed: time.Now().UnixNano(), + Endpoint: "localhost:4318", // OTLP HTTP endpoint + }) + + input := generator.HistogramInput{ + Count: 500, + Boundaries: []float64{10, 50, 100, 500, 1000}, + Attributes: map[string]string{ + "service.name": "web-service", + "service.version": "1.0.0", + "environment": "staging", + }, + } + + // Generate and publish using exponential distribution + result, err := gen.GenerateAndPublishHistograms(input, func(rnd *rand.Rand, t time.Time) float64 { + return generator.ExponentialRandom(rnd, 0.01) // rate=0.01 + }) + + if err != nil { + fmt.Printf("❌ Error (expected - telemetrygen limitations): %v\n", err) + return + } + + fmt.Printf("✅ Generated and published histogram with %d samples\n", result.Expected.Count) + fmt.Printf("⚠️ Note: Uses telemetrygen's built-in histogram generation, not custom buckets\n") +} + +// TestOTLPPublisher_SendSumMetric_Example demonstrates current telemetrygen integration +// CURRENT CAPABILITY: Basic Sum, Gauge, Histogram via telemetrygen +// MISSING: Delta/Cumulative distinction, Summary, Exponential Histogram +func TestOTLPPublisher_SendSumMetric_Example(t *testing.T) { + // Make sure collector is running before removing the skip on this function + t.Skip() + fmt.Println("=== CURRENT GOAL: Basic Metric Types via Telemetrygen ===") + + publisher := generator.NewOTLPPublisher("localhost:4318") + + // ✅ WORKING: Basic metric types supported by telemetrygen + err := publisher.SendSumMetric("requests_total", 100) + if err != nil { + fmt.Printf("❌ Error sending sum metric: %v\n", err) + return + } + fmt.Println("✅ Sum metric sent (telemetrygen)") + + err = publisher.SendGaugeMetric("cpu_usage", 75.5) + if err != nil { + fmt.Printf("❌ Error sending gauge metric: %v\n", err) + return + } + fmt.Println("✅ Gauge metric sent (telemetrygen)") + + err = publisher.SendHistogramMetricSimple("response_time") + if err != nil { + fmt.Printf("❌ Error sending histogram metric: %v\n", err) + return + } + fmt.Println("✅ Basic histogram sent (telemetrygen)") + + fmt.Println("\n⚠️ LIMITATIONS:") + fmt.Println(" - No delta vs cumulative temporality control") + fmt.Println(" - No summary metrics") + fmt.Println(" - No exponential histograms") + fmt.Println(" - No custom histogram bucket data") +} + +// TestAllDistributions_Example demonstrates all available statistical distributions +// CURRENT CAPABILITY: Complete statistical distribution library +func TestAllDistributions_Example(t *testing.T) { + fmt.Println("=== CURRENT CAPABILITY: All Statistical Distributions ===") + + gen := generator.NewHistogramGenerator(generator.GenerationOptions{Seed: 42}) + boundaries := []float64{10, 25, 50, 75, 100, 150, 200} + attributes := map[string]string{"test": "distributions"} + + fmt.Println("\n📊 PROBABILITY DISTRIBUTIONS:") + + // Normal Distribution + result, _ := gen.GenerateHistogram(generator.HistogramInput{ + Count: 1000, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.NormalRandom(rnd, 75, 20) // mean=75, stddev=20 + }) + fmt.Printf("✅ Normal (μ=75, σ=20): avg=%.1f, samples=%d\n", + result.Expected.Average, result.Expected.Count) + + // Exponential Distribution + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 1000, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.ExponentialRandom(rnd, 0.02) // rate=0.02 + }) + fmt.Printf("✅ Exponential (λ=0.02): avg=%.1f, samples=%d\n", + result.Expected.Average, result.Expected.Count) + + // Log-Normal Distribution + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 1000, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.LogNormalRandom(rnd, 3.5, 0.8) // mu=3.5, sigma=0.8 + }) + fmt.Printf("✅ Log-Normal (μ=3.5, σ=0.8): avg=%.1f, samples=%d\n", + result.Expected.Average, result.Expected.Count) + + // Gamma Distribution + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 1000, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.GammaRandom(rnd, 2.0, 25.0) // shape=2, scale=25 + }) + fmt.Printf("✅ Gamma (α=2, β=25): avg=%.1f, samples=%d\n", + result.Expected.Average, result.Expected.Count) + + // Weibull Distribution + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 1000, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.WeibullRandom(rnd, 2.5, 80) // shape=2.5, scale=80 + }) + fmt.Printf("✅ Weibull (k=2.5, λ=80): avg=%.1f, samples=%d\n", + result.Expected.Average, result.Expected.Count) + + // Beta Distribution + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 1000, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.BetaRandom(rnd, 2, 5) * 200 // scale to 0-200 range + }) + fmt.Printf("✅ Beta (α=2, β=5) scaled: avg=%.1f, samples=%d\n", + result.Expected.Average, result.Expected.Count) +} + +// TestTimeBasedPatterns_Example demonstrates time-based value generation +// CURRENT CAPABILITY: Dynamic time-based patterns for realistic metrics +func TestTimeBasedPatterns_Example(t *testing.T) { + fmt.Println("\n=== CURRENT CAPABILITY: Time-Based Patterns ===") + + gen := generator.NewHistogramGenerator(generator.GenerationOptions{Seed: 123}) + boundaries := []float64{20, 40, 60, 80, 100, 120} + attributes := map[string]string{"pattern": "time-based"} + + fmt.Println("\n🕐 TIME-BASED FUNCTIONS:") + + // Sinusoidal Pattern (daily cycle) + result, _ := gen.GenerateHistogram(generator.HistogramInput{ + Count: 500, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.SinusoidalValue(rnd, t, 30, 86400, 0, 60) // 24-hour cycle + }) + fmt.Printf("✅ Sinusoidal (24h cycle): avg=%.1f, range=[%.1f-%.1f]\n", + result.Expected.Average, *result.Expected.Min, *result.Expected.Max) + + // Spiky Pattern (occasional bursts) + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 500, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.SpikyValue(rnd, 50, 100, 0.1) // 10% spike probability + }) + fmt.Printf("✅ Spiky (10%% spikes): avg=%.1f, range=[%.1f-%.1f]\n", + result.Expected.Average, *result.Expected.Min, *result.Expected.Max) + + // Trending Pattern (gradual increase) + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 500, Boundaries: boundaries, Attributes: attributes, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.TrendingValue(rnd, t, 40, 0.0001, 8) // slow upward trend + }) + fmt.Printf("✅ Trending (upward): avg=%.1f, range=[%.1f-%.1f]\n", + result.Expected.Average, *result.Expected.Min, *result.Expected.Max) +} + +// TestAdvancedHistogramFeatures_Example shows advanced histogram generation capabilities +// CURRENT CAPABILITY: Custom buckets, attributes, statistical validation +func TestAdvancedHistogramFeatures_Example(t *testing.T) { + fmt.Println("\n=== CURRENT CAPABILITY: Advanced Histogram Features ===") + + gen := generator.NewHistogramGenerator(generator.GenerationOptions{Seed: 999}) + + fmt.Println("\n🔧 ADVANCED FEATURES:") + + // Custom bucket boundaries for different use cases + latencyBoundaries := []float64{1, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000} + sizeBoundaries := []float64{1024, 4096, 16384, 65536, 262144, 1048576, 4194304} + + // Latency histogram with log-normal distribution + result, _ := gen.GenerateHistogram(generator.HistogramInput{ + Count: 2000, + Min: ptr(0.5), + Max: ptr(10000.0), + Boundaries: latencyBoundaries, + Attributes: map[string]string{ + "service.name": "api-gateway", + "endpoint": "/users", + "method": "GET", + "status_code": "200", + }, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.LogNormalRandom(rnd, 3.0, 1.2) // realistic latency distribution + }) + fmt.Printf("✅ Latency histogram: %d samples, avg=%.1fms\n", + result.Expected.Count, result.Expected.Average) + fmt.Printf(" Buckets: %v\n", result.Input.Counts) + + // File size histogram with exponential distribution + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 1500, + Boundaries: sizeBoundaries, + Attributes: map[string]string{ + "file_type": "image", + "compression": "jpeg", + "quality": "high", + }, + }, func(rnd *rand.Rand, t time.Time) float64 { + return generator.ExponentialRandom(rnd, 0.000001) // file size distribution + }) + fmt.Printf("✅ File size histogram: %d samples, avg=%.0f bytes\n", + result.Expected.Count, result.Expected.Average) + + // Multi-modal distribution (combining two normals) + result, _ = gen.GenerateHistogram(generator.HistogramInput{ + Count: 1000, + Boundaries: []float64{10, 30, 50, 70, 90, 110, 130, 150}, + Attributes: map[string]string{ + "distribution": "bimodal", + "use_case": "response_time", + }, + }, func(rnd *rand.Rand, t time.Time) float64 { + if rnd.Float64() < 0.7 { + return generator.NormalRandom(rnd, 40, 10) // fast responses (70%) + } + return generator.NormalRandom(rnd, 120, 15) // slow responses (30%) + }) + fmt.Printf("✅ Bimodal distribution: avg=%.1f (fast+slow responses)\n", + result.Expected.Average) +} + +// TestCurrentPublishingCapabilities_Example shows what publishing works today +// CURRENT CAPABILITY: Basic OTLP publishing via telemetrygen +func TestCurrentPublishingCapabilities_Example(t *testing.T) { + // Make sure collector is running before removing the skip on this function + t.Skip() + fmt.Println("\n=== CURRENT CAPABILITY: OTLP Publishing ===") + + publisher := generator.NewOTLPPublisher("localhost:4318") + + fmt.Println("\n📡 WORKING METRIC TYPES:") + + // These work with current telemetrygen integration + metrics := []struct { + name string + metricType string + value float64 + status string + }{ + {"http_requests_total", "Sum", 1500, "✅ Counter/Sum"}, + {"cpu_utilization", "Gauge", 67.8, "✅ Gauge"}, + {"response_time_histogram", "Histogram", 0, "✅ Basic Histogram"}, + {"memory_usage_bytes", "Gauge", 2147483648, "✅ Gauge (large values)"}, + {"error_rate", "Gauge", 0.025, "✅ Gauge (fractional)"}, + } + + for _, m := range metrics { + err := publisher.SendMetric(m.name, m.metricType, m.value) + if err != nil { + fmt.Printf("❌ %s: %v\n", m.status, err) + } else { + fmt.Printf("%s: %s (%.2f)\n", m.status, m.name, m.value) + } + } + + fmt.Println("\n⚠️ TELEMETRYGEN LIMITATIONS:") + fmt.Println(" - No custom histogram bucket data") + fmt.Println(" - No temporality control (delta vs cumulative)") + fmt.Println(" - No summary metrics") + fmt.Println(" - No exponential histograms") +} + +// TestShowCapabilities demonstrates all current capabilities +func TestShowCapabilities(t *testing.T) { + fmt.Println("\n🎯 RUNNING CAPABILITY DEMONSTRATION") + + // Run all the example functions to show capabilities + TestHistogramGenerator_GenerateHistogram_Example(t) + fmt.Println() + + // Skip publishing test that requires OTLP endpoint + fmt.Println("=== SKIPPING: OTLP Publishing (requires running collector) ===") + fmt.Println() + + TestAllDistributions_Example(t) + fmt.Println() + + TestTimeBasedPatterns_Example(t) + fmt.Println() + + TestAdvancedHistogramFeatures_Example(t) + fmt.Println() + + TestCurrentPublishingCapabilities_Example(t) + fmt.Println() + +} + +// TestHistogramGenerator_GenerateHistogram tests the core histogram generation functionality +func TestHistogramGenerator_GenerateHistogram(t *testing.T) { + gen := generator.NewHistogramGenerator(generator.GenerationOptions{ + Seed: 12345, // Fixed seed for reproducible tests + }) + + input := generator.HistogramInput{ + Count: 1000, + Min: ptr(10.0), + Max: ptr(200.0), + Boundaries: []float64{25, 50, 75, 100, 150}, + Attributes: map[string]string{ + "service.name": "test-service", + }, + } + + result, err := gen.GenerateHistogram(input, func(rnd *rand.Rand, t time.Time) float64 { + return generator.NormalRandom(rnd, 75, 25) // mean=75, stddev=25 + }) + + // Test basic functionality + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + // Test sample count + if result.Expected.Count != 1000 { + t.Errorf("Expected 1000 samples, got %d", result.Expected.Count) + } + + // Test min/max constraints + if result.Expected.Min == nil || *result.Expected.Min < 10.0 { + t.Errorf("Expected min >= 10.0, got %v", result.Expected.Min) + } + if result.Expected.Max == nil || *result.Expected.Max > 200.0 { + t.Errorf("Expected max <= 200.0, got %v", result.Expected.Max) + } + + // Test that average is reasonable for normal distribution (mean=75) + if result.Expected.Average < 60 || result.Expected.Average > 90 { + t.Errorf("Expected average around 75 (60-90), got %.2f", result.Expected.Average) + } + + // Test bucket counts + if len(result.Input.Counts) != len(input.Boundaries)+1 { + t.Errorf("Expected %d buckets, got %d", len(input.Boundaries)+1, len(result.Input.Counts)) + } + + // Test that sum equals count * average (approximately) + expectedSum := float64(result.Expected.Count) * result.Expected.Average + if abs(result.Expected.Sum-expectedSum) > 1.0 { + t.Errorf("Sum/average mismatch: sum=%.2f, count*avg=%.2f", result.Expected.Sum, expectedSum) + } + + // Test attributes are preserved + if result.Input.Attributes["service.name"] != "test-service" { + t.Errorf("Expected service.name=test-service, got %s", result.Input.Attributes["service.name"]) + } +} + +// TestHistogramGenerator_Distributions tests all statistical distributions +func TestHistogramGenerator_Distributions(t *testing.T) { + gen := generator.NewHistogramGenerator(generator.GenerationOptions{Seed: 42}) + boundaries := []float64{10, 25, 50, 75, 100} + input := generator.HistogramInput{ + Count: 1000, Boundaries: boundaries, + } + + distributions := []struct { + name string + valueFunc func(*rand.Rand, time.Time) float64 + minAvg float64 + maxAvg float64 + }{ + { + name: "Normal", + valueFunc: func(rnd *rand.Rand, t time.Time) float64 { + return generator.NormalRandom(rnd, 50, 10) + }, + minAvg: 40, maxAvg: 60, + }, + { + name: "Exponential", + valueFunc: func(rnd *rand.Rand, t time.Time) float64 { + return generator.ExponentialRandom(rnd, 0.02) + }, + minAvg: 30, maxAvg: 70, + }, + { + name: "Gamma", + valueFunc: func(rnd *rand.Rand, t time.Time) float64 { + return generator.GammaRandom(rnd, 2.0, 25.0) + }, + minAvg: 40, maxAvg: 60, + }, + } + + for _, dist := range distributions { + t.Run(dist.name, func(t *testing.T) { + result, err := gen.GenerateHistogram(input, dist.valueFunc) + + if err != nil { + t.Fatalf("Distribution %s failed: %v", dist.name, err) + } + + if result.Expected.Count != 1000 { + t.Errorf("Distribution %s: expected 1000 samples, got %d", dist.name, result.Expected.Count) + } + + if result.Expected.Average < dist.minAvg || result.Expected.Average > dist.maxAvg { + t.Errorf("Distribution %s: average %.2f outside expected range [%.1f-%.1f]", + dist.name, result.Expected.Average, dist.minAvg, dist.maxAvg) + } + }) + } +} + +// TestOTLPPublisher_Creation tests OTLP publisher creation +func TestOTLPPublisher_Creation(t *testing.T) { + t.Skip() + publisher := generator.NewOTLPPublisher("localhost:4318") + + if publisher == nil { + t.Fatal("Expected publisher to be created, got nil") + } + + // Test that publisher has the expected endpoint (this would require exposing the field or adding a getter) + // For now, just test that it was created successfully +} + +// TestGenerationOptions tests the generation options +func TestGenerationOptions(t *testing.T) { + // Test with seed + gen1 := generator.NewHistogramGenerator(generator.GenerationOptions{Seed: 123}) + gen2 := generator.NewHistogramGenerator(generator.GenerationOptions{Seed: 123}) + + input := generator.HistogramInput{Count: 100, Boundaries: []float64{50, 100}} + + result1, err1 := gen1.GenerateHistogram(input, func(rnd *rand.Rand, t time.Time) float64 { + return generator.NormalRandom(rnd, 75, 10) + }) + + result2, err2 := gen2.GenerateHistogram(input, func(rnd *rand.Rand, t time.Time) float64 { + return generator.NormalRandom(rnd, 75, 10) + }) + + if err1 != nil || err2 != nil { + t.Fatalf("Expected no errors, got %v, %v", err1, err2) + } + + // With same seed, results should be identical + if result1.Expected.Sum != result2.Expected.Sum { + t.Errorf("Expected identical sums with same seed, got %.2f vs %.2f", + result1.Expected.Sum, result2.Expected.Sum) + } +} + +// TestTimeBasedPatterns tests time-based value functions +func TestTimeBasedPatterns(t *testing.T) { + gen := generator.NewHistogramGenerator(generator.GenerationOptions{Seed: 999}) + input := generator.HistogramInput{Count: 100, Boundaries: []float64{25, 50, 75, 100}} + + patterns := []struct { + name string + valueFunc func(*rand.Rand, time.Time) float64 + }{ + { + name: "Sinusoidal", + valueFunc: func(rnd *rand.Rand, t time.Time) float64 { + return generator.SinusoidalValue(rnd, t, 20, 3600, 0, 50) + }, + }, + { + name: "Spiky", + valueFunc: func(rnd *rand.Rand, t time.Time) float64 { + return generator.SpikyValue(rnd, 50, 100, 0.1) + }, + }, + { + name: "Trending", + valueFunc: func(rnd *rand.Rand, t time.Time) float64 { + return generator.TrendingValue(rnd, t, 40, 0.001, 5) + }, + }, + } + + for _, pattern := range patterns { + t.Run(pattern.name, func(t *testing.T) { + result, err := gen.GenerateHistogram(input, pattern.valueFunc) + + if err != nil { + t.Fatalf("Pattern %s failed: %v", pattern.name, err) + } + + if result.Expected.Count != 100 { + t.Errorf("Pattern %s: expected 100 samples, got %d", pattern.name, result.Expected.Count) + } + + // Test that we got reasonable values (not all zeros or infinities) + if result.Expected.Sum <= 0 || result.Expected.Average <= 0 { + t.Errorf("Pattern %s: got unreasonable values sum=%.2f, avg=%.2f", + pattern.name, result.Expected.Sum, result.Expected.Average) + } + }) + } +} + +// TestEdgeCases tests edge cases and error conditions +func TestEdgeCases(t *testing.T) { + gen := generator.NewHistogramGenerator(generator.GenerationOptions{Seed: 1}) + + t.Run("ZeroCount", func(t *testing.T) { + input := generator.HistogramInput{Count: 0} + result, err := gen.GenerateHistogram(input, nil) + + if err != nil { + t.Fatalf("Expected no error with zero count, got %v", err) + } + + // Should default to some reasonable count + if result.Expected.Count == 0 { + t.Error("Expected non-zero count when input count is 0") + } + }) + + t.Run("NilValueFunc", func(t *testing.T) { + input := generator.HistogramInput{Count: 10} + result, err := gen.GenerateHistogram(input, nil) + + if err != nil { + t.Fatalf("Expected no error with nil value func, got %v", err) + } + + if result.Expected.Count == 0 { + t.Error("Expected samples even with nil value function") + } + }) + + t.Run("EmptyBoundaries", func(t *testing.T) { + input := generator.HistogramInput{Count: 10, Boundaries: []float64{}} + result, err := gen.GenerateHistogram(input, nil) + + if err != nil { + t.Fatalf("Expected no error with empty boundaries, got %v", err) + } + + // Should generate default boundaries + if len(result.Input.Boundaries) == 0 { + t.Error("Expected default boundaries when none provided") + } + }) +} + +func TestGenerateAccuracyDataset(t *testing.T) { + rng := rand.New(rand.NewSource(0xFEEDBEEF)) + datapoints := make([]float64, 10000) + for i := range 10000 { + datapoints[i] = generator.LogNormalRandom(rng, -4.894, 1.176) + } + slices.Sort(datapoints) + for i, v := range datapoints { + if i > 0 { + fmt.Print(", ") + } + fmt.Printf("%.3e", v) + } + fmt.Println() +} + +// Helper function for absolute value +func abs(x float64) float64 { + if x < 0 { + return -x + } + return x +} + +// Helper function to create float64 pointers +func ptr(f float64) *float64 { + return &f +} diff --git a/cmd/generator/generator/generator.go b/cmd/generator/generator/generator.go new file mode 100644 index 0000000000000..86c6f5bd42480 --- /dev/null +++ b/cmd/generator/generator/generator.go @@ -0,0 +1,33 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +// Package generator provides histogram generation capabilities for OpenTelemetry metrics testing. +// This package is designed to generate realistic histogram data using various statistical distributions +// and can optionally publish the generated metrics to OTLP endpoints. +// +// The package is organized into several focused modules: +// - types.go: Core data structures and types +// - histogram_generator.go: Main histogram generation logic +// - distributions.go: Statistical distribution functions +// - otlp_publisher.go: OTLP endpoint publishing functionality +// +// Example usage: +// +// generator := NewHistogramGenerator(GenerationOptions{ +// Seed: 12345, +// Endpoint: "localhost:4318", +// }) +// +// result, err := generator.GenerateAndPublishHistograms( +// HistogramInput{ +// Count: 1000, +// Min: ptr(10.0), +// Max: ptr(200.0), +// Boundaries: []float64{25, 50, 75, 100, 150}, +// Attributes: map[string]string{"service.name": "test-service"}, +// }, +// func(rnd *rand.Rand, t time.Time) float64 { +// return NormalRandom(rnd, 50, 15) +// }, +// ) +package generator diff --git a/cmd/generator/generator/histogram_generator.go b/cmd/generator/generator/histogram_generator.go new file mode 100644 index 0000000000000..0912a7abbf95a --- /dev/null +++ b/cmd/generator/generator/histogram_generator.go @@ -0,0 +1,222 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package generator + +import ( + "math" + "math/rand" + "sort" + "time" +) + +// HistogramGenerator generates histogram test cases using statistical distributions +type HistogramGenerator struct { + rand *rand.Rand + endpoint string +} + +// NewHistogramGenerator creates a new histogram generator with deterministic seed +func NewHistogramGenerator(opt ...GenerationOptions) *HistogramGenerator { + var seed int64 = time.Now().UnixNano() + var endpoint string + + if len(opt) > 0 { + if opt[0].Seed != 0 { + seed = opt[0].Seed + } + endpoint = opt[0].Endpoint + } + + return &HistogramGenerator{ + rand: rand.New(rand.NewSource(seed)), + endpoint: endpoint, + } +} + +// GenerateHistogram generates histogram data from individual values using a value function +func (g *HistogramGenerator) GenerateHistogram(input HistogramInput, valueFunc func(*rand.Rand, time.Time) float64) (HistogramResult, error) { + timestamp := time.Now() + sampleCount := int(input.Count) + + if sampleCount <= 0 { + sampleCount = 1000 // default sample count + } + + // Generate individual values using the value function + values := make([]float64, sampleCount) + for i := 0; i < sampleCount; i++ { + if valueFunc != nil { + values[i] = valueFunc(g.rand, timestamp) + } else { + values[i] = g.rand.Float64() * 100 // default random value + } + } + + // Sort values to find min/max + sort.Float64s(values) + + // Calculate basic stats + var sum float64 + for _, v := range values { + sum += v + } + + generatedMin := values[0] + generatedMax := values[len(values)-1] + average := sum / float64(len(values)) + + // Determine final min/max values + var finalMin, finalMax float64 + if input.Min != nil { + finalMin = *input.Min + } else { + finalMin = generatedMin + } + if input.Max != nil { + finalMax = *input.Max + } else { + finalMax = generatedMax + } + + // Use provided boundaries or generate them based on min/max + boundaries := input.Boundaries + if len(boundaries) == 0 { + boundaries = generateBoundariesBetween(finalMin, finalMax, 10) + } + + counts := make([]uint64, len(boundaries)+1) + for _, value := range values { + bucketIndex := len(boundaries) // default to overflow bucket + for i, boundary := range boundaries { + if value <= boundary { + bucketIndex = i + break + } + } + counts[bucketIndex]++ + } + + // Calculate percentile ranges + percentileRanges := g.calculatePercentileRangesFromValues(values, boundaries) + + // Use input min/max if provided, otherwise use generated values + var resultMin, resultMax *float64 + if input.Min != nil { + resultMin = input.Min + } else { + resultMin = &generatedMin + } + if input.Max != nil { + resultMax = input.Max + } else { + resultMax = &generatedMax + } + + generatedInput := HistogramInput{ + Count: uint64(len(values)), + Sum: sum, + Min: resultMin, + Max: resultMax, + Boundaries: boundaries, + Counts: counts, + Attributes: input.Attributes, + } + + expected := ExpectedMetrics{ + Count: uint64(len(values)), + Sum: sum, + Average: average, + Min: resultMin, + Max: resultMax, + PercentileRanges: percentileRanges, + } + + return HistogramResult{ + Input: generatedInput, + Expected: expected, + }, nil +} + +// GenerateAndPublishHistograms generates and optionally publishes histogram data +func (g *HistogramGenerator) GenerateAndPublishHistograms(input HistogramInput, valueFunc func(*rand.Rand, time.Time) float64) (HistogramResult, error) { + res, err := g.GenerateHistogram(input, valueFunc) + if err != nil { + return HistogramResult{}, err + } + + if g.endpoint == "" { + return res, nil + } + + publisher := NewOTLPPublisher(g.endpoint) + err = publisher.SendHistogramMetric("TelemetryGen", res) + if err != nil { + return HistogramResult{}, err + } + + return res, nil +} + +// calculatePercentileRangesFromValues calculates percentile ranges for sorted values +func (g *HistogramGenerator) calculatePercentileRangesFromValues(sortedValues []float64, boundaries []float64) map[float64]PercentileRange { + percentiles := []float64{0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99} + ranges := make(map[float64]PercentileRange) + + for _, p := range percentiles { + index := int(p * float64(len(sortedValues))) + if index >= len(sortedValues) { + index = len(sortedValues) - 1 + } + + value := sortedValues[index] + + // Find which bucket this percentile value falls into + var low, high float64 + + // Check if value falls in any boundary bucket + bucketFound := false + for i, boundary := range boundaries { + if value <= boundary { + if i > 0 { + low = boundaries[i-1] + } else { + low = math.Inf(-1) + } + high = boundary + bucketFound = true + break + } + } + + // If not found in any boundary bucket, it's in the overflow bucket + if !bucketFound { + if len(boundaries) > 0 { + low = boundaries[len(boundaries)-1] + } else { + low = math.Inf(-1) + } + high = math.Inf(1) + } + + ranges[p] = PercentileRange{Low: low, High: high} + } + + return ranges +} + +// generateBoundariesBetween creates evenly spaced boundaries between min and max +func generateBoundariesBetween(min, max float64, numBuckets int) []float64 { + if numBuckets <= 0 { + numBuckets = 10 + } + + boundaries := make([]float64, numBuckets-1) + step := (max - min) / float64(numBuckets) + + for i := 0; i < numBuckets-1; i++ { + boundaries[i] = min + float64(i+1)*step + } + + return boundaries +} diff --git a/cmd/generator/generator/otlp_publisher.go b/cmd/generator/generator/otlp_publisher.go new file mode 100644 index 0000000000000..231fded29ef7a --- /dev/null +++ b/cmd/generator/generator/otlp_publisher.go @@ -0,0 +1,74 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package generator + +import ( + "fmt" + + "github.com/open-telemetry/opentelemetry-collector-contrib/cmd/telemetrygen/pkg/metrics" +) + +// OTLPPublisher handles publishing metrics to OTLP endpoints using telemetrygen +type OTLPPublisher struct { + endpoint string +} + +// NewOTLPPublisher creates a new OTLP publisher using telemetrygen +func NewOTLPPublisher(endpoint string) *OTLPPublisher { + return &OTLPPublisher{ + endpoint: endpoint, + } +} + +// SendHistogramMetric sends a histogram metric using telemetrygen +func (p *OTLPPublisher) SendHistogramMetric(metricName string, result HistogramResult) error { + return p.SendMetric(metricName, "Histogram", 0) // Histogram value doesn't matter for telemetrygen +} + +// SendMetric sends a metric using telemetrygen with the specified type and value +func (p *OTLPPublisher) SendMetric(metricName string, metricType string, value float64) error { + // Create telemetrygen config + cfg := metrics.NewConfig() + cfg.CustomEndpoint = p.endpoint + cfg.UseHTTP = true + cfg.Insecure = true + cfg.NumMetrics = 1 + cfg.Rate = 1 + cfg.MetricName = metricName + + // Set metric type + switch metricType { + case "Sum": + cfg.MetricType = metrics.MetricTypeSum + case "Gauge": + cfg.MetricType = metrics.MetricTypeGauge + case "Histogram": + cfg.MetricType = metrics.MetricTypeHistogram + default: + cfg.MetricType = metrics.MetricTypeGauge // default to gauge + } + + // Start the metrics generation + err := metrics.Start(cfg) + if err != nil { + return fmt.Errorf("failed to send metric via telemetrygen: %v", err) + } + + return nil +} + +// SendGaugeMetric sends a gauge metric +func (p *OTLPPublisher) SendGaugeMetric(metricName string, value float64) error { + return p.SendMetric(metricName, "Gauge", value) +} + +// SendSumMetric sends a sum/counter metric +func (p *OTLPPublisher) SendSumMetric(metricName string, value float64) error { + return p.SendMetric(metricName, "Sum", value) +} + +// SendHistogramMetricSimple sends a histogram metric (telemetrygen will generate histogram data) +func (p *OTLPPublisher) SendHistogramMetricSimple(metricName string) error { + return p.SendMetric(metricName, "Histogram", 0) +} diff --git a/cmd/generator/generator/types.go b/cmd/generator/generator/types.go new file mode 100644 index 0000000000000..6660fd684d52e --- /dev/null +++ b/cmd/generator/generator/types.go @@ -0,0 +1,43 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package generator + +// PercentileRange represents a range for percentile calculations +type PercentileRange struct { + Low float64 + High float64 +} + +// HistogramInput represents the input data for a histogram metric +type HistogramInput struct { + Count uint64 + Sum float64 + Min *float64 + Max *float64 + Boundaries []float64 + Counts []uint64 + Attributes map[string]string +} + +// ExpectedMetrics represents the expected calculated metrics from histogram data +type ExpectedMetrics struct { + Count uint64 + Sum float64 + Average float64 + Min *float64 + Max *float64 + PercentileRanges map[float64]PercentileRange +} + +// HistogramResult combines input and expected metrics +type HistogramResult struct { + Input HistogramInput + Expected ExpectedMetrics +} + +// GenerationOptions configures histogram generation +type GenerationOptions struct { + Seed int64 + Endpoint string +} diff --git a/cmd/generator/go.mod b/cmd/generator/go.mod new file mode 100644 index 0000000000000..a0756ea3c6f41 --- /dev/null +++ b/cmd/generator/go.mod @@ -0,0 +1,46 @@ +module github.com/amazon-contributing/opentelemetry-collector-contrib/cmd/generator + +go 1.25.0 + +replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/aws => /local/home/dricross/workplace/classichistograms/opentelemetry-collector-contrib/pkg/aws + +require ( + github.com/open-telemetry/opentelemetry-collector-contrib/cmd/telemetrygen v0.137.0 + github.com/open-telemetry/opentelemetry-collector-contrib/pkg/aws v0.0.0-00010101000000-000000000000 + github.com/spf13/pflag v1.0.10 + go.opentelemetry.io/otel v1.38.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.38.0 + go.opentelemetry.io/otel/sdk v1.38.0 + go.opentelemetry.io/otel/sdk/metric v1.38.0 + go.uber.org/zap v1.27.0 + golang.org/x/time v0.13.0 +) + +require ( + github.com/cenkalti/backoff/v5 v5.0.3 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect + github.com/hashicorp/go-version v1.7.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/lightstep/go-expohisto v1.0.0 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/collector/featuregate v1.43.0 // indirect + go.opentelemetry.io/collector/pdata v1.43.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0 // indirect + go.opentelemetry.io/otel/metric v1.38.0 // indirect + go.opentelemetry.io/otel/trace v1.38.0 // indirect + go.opentelemetry.io/proto/otlp v1.8.0 // indirect + go.uber.org/multierr v1.11.0 // indirect + golang.org/x/net v0.46.0 // indirect + golang.org/x/sys v0.37.0 // indirect + golang.org/x/text v0.30.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20251007200510-49b9836ed3ff // indirect + google.golang.org/grpc v1.76.0 // indirect + google.golang.org/protobuf v1.36.10 // indirect +) diff --git a/cmd/generator/go.sum b/cmd/generator/go.sum new file mode 100644 index 0000000000000..d071adc237d1e --- /dev/null +++ b/cmd/generator/go.sum @@ -0,0 +1,179 @@ +<<<<<<< HEAD +github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= +github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +======= +github.com/cenkalti/backoff/v5 v5.0.2 h1:rIfFVxEf1QsI7E1ZHfp/B4DF/6QBAUhmgkxc0H7Zss8= +github.com/cenkalti/backoff/v5 v5.0.2/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +>>>>>>> 18b7de606a (Add OpenTelemetry Histogram Generator (#366)) +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +<<<<<<< HEAD +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs= +github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= +github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/lightstep/go-expohisto v1.0.0 h1:UPtTS1rGdtehbbAF7o/dhkWLTDI73UifG8LbfQI7cA4= +github.com/lightstep/go-expohisto v1.0.0/go.mod h1:xDXD0++Mu2FOaItXtdDfksfgxfV0z1TMPa+e/EUd0cs= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/open-telemetry/opentelemetry-collector-contrib/cmd/telemetrygen v0.137.0 h1:+rU6PWPc7Jcy39hnf08pHq6DsDhqpF2NYpMFPUKEIgw= +github.com/open-telemetry/opentelemetry-collector-contrib/cmd/telemetrygen v0.137.0/go.mod h1:Ll36/tgcetzSkSQR4D9bFcCEXLqGvr82vuhuxoh7h2Q= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/collector/featuregate v1.43.0 h1:Aq8UR5qv1zNlbbkTyqv8kLJtnoQMq/sG1/jS9o1cCJI= +go.opentelemetry.io/collector/featuregate v1.43.0/go.mod h1:d0tiRzVYrytB6LkcYgz2ESFTv7OktRPQe0QEQcPt1L4= +go.opentelemetry.io/collector/pdata v1.43.0 h1:zVkj2hcjiMLwX+QDDNwb7iTh3LBjNXKv2qPSgj1Rzb4= +go.opentelemetry.io/collector/pdata v1.43.0/go.mod h1:KsJzdDG9e5BaHlmYr0sqdSEKeEiSfKzoF+rdWU7J//w= +go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= +go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0 h1:vl9obrcoWVKp/lwl8tRE33853I8Xru9HFbw/skNeLs8= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0/go.mod h1:GAXRxmLJcVM3u22IjTg74zWBrRCKq8BnOqUVLodpcpw= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.38.0 h1:Oe2z/BCg5q7k4iXC3cqJxKYg0ieRiOqF0cecFYdPTwk= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.38.0/go.mod h1:ZQM5lAJpOsKnYagGg/zV2krVqTtaVdYdDkhMoX6Oalg= +go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= +go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= +go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= +go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= +go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM= +go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= +go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= +go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= +go.opentelemetry.io/proto/otlp v1.8.0 h1:fRAZQDcAFHySxpJ1TwlA1cJ4tvcrw7nXl9xWWC8N5CE= +go.opentelemetry.io/proto/otlp v1.8.0/go.mod h1:tIeYOeNBU4cvmPqpaji1P+KbB4Oloai8wN4rWzRrFF0= +go.opentelemetry.io/proto/slim/otlp v1.8.0 h1:afcLwp2XOeCbGrjufT1qWyruFt+6C9g5SOuymrSPUXQ= +go.opentelemetry.io/proto/slim/otlp v1.8.0/go.mod h1:Yaa5fjYm1SMCq0hG0x/87wV1MP9H5xDuG/1+AhvBcsI= +go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.1.0 h1:Uc+elixz922LHx5colXGi1ORbsW8DTIGM+gg+D9V7HE= +go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.1.0/go.mod h1:VyU6dTWBWv6h9w/+DYgSZAPMabWbPTFTuxp25sM8+s0= +go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.1.0 h1:i8YpvWGm/Uq1koL//bnbJ/26eV3OrKWm09+rDYo7keU= +go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.1.0/go.mod h1:pQ70xHY/ZVxNUBPn+qUWPl8nwai87eWdqL3M37lNi9A= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +======= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1 h1:X5VWvz21y3gzm9Nw/kaUeku/1+uBhcekkmy4IkffJww= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1/go.mod h1:Zanoh4+gvIgluNqcfMVTJueD4wSS5hT7zTt4Mrutd90= +github.com/open-telemetry/opentelemetry-collector-contrib/cmd/telemetrygen v0.135.0 h1:/PSf7CIVu//VV7zYeYhnOLIgMsrONH37XV2mzeVtjZk= +github.com/open-telemetry/opentelemetry-collector-contrib/cmd/telemetrygen v0.135.0/go.mod h1:RpbRtcf6cXpgn8aJOf6SvIuwGo2ycRUEI2HASamCjlw= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= +go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.37.0 h1:zG8GlgXCJQd5BU98C0hZnBbElszTmUgCNCfYneaDL0A= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.37.0/go.mod h1:hOfBCz8kv/wuq73Mx2H2QnWokh/kHZxkh6SNF2bdKtw= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.37.0 h1:9PgnL3QNlj10uGxExowIDIZu66aVBwWhXmbOp1pa6RA= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.37.0/go.mod h1:0ineDcLELf6JmKfuo0wvvhAVMuxWFYvkTin2iV4ydPQ= +go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= +go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= +go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI= +go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg= +go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFhbjxHHspCPc= +go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps= +go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= +go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= +go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4= +go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE= +>>>>>>> 18b7de606a (Add OpenTelemetry Histogram Generator (#366)) +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +<<<<<<< HEAD +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4= +golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= +golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= +golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= +golang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI= +golang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 h1:BIRfGDEjiHRrk0QKZe3Xv2ieMhtgRGeLcZQ0mIVn4EY= +google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5/go.mod h1:j3QtIyytwqGr1JUDtYXwtMXWPKsEa5LtzIFN1Wn5WvE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251007200510-49b9836ed3ff h1:A90eA31Wq6HOMIQlLfzFwzqGKBTuaVztYu/g8sn+8Zc= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251007200510-49b9836ed3ff/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk= +google.golang.org/grpc v1.76.0 h1:UnVkv1+uMLYXoIz6o7chp59WfQUYA2ex/BXQ9rHZu7A= +google.golang.org/grpc v1.76.0/go.mod h1:Ju12QI8M6iQJtbcsV+awF5a4hfJMLi4X0JLo94ULZ6c= +google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= +google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +======= +golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs= +golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= +golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= +golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= +golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= +google.golang.org/genproto/googleapis/api v0.0.0-20250728155136-f173205681a0 h1:0UOBWO4dC+e51ui0NFKSPbkHHiQ4TmrEfEZMLDyRmY8= +google.golang.org/genproto/googleapis/api v0.0.0-20250728155136-f173205681a0/go.mod h1:8ytArBbtOy2xfht+y2fqKd5DRDJRUQhqbyEnQ4bDChs= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0 h1:MAKi5q709QWfnkkpNQ0M12hYJ1+e8qYVDyowc4U1XZM= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= +google.golang.org/grpc v1.75.0 h1:+TW+dqTd2Biwe6KKfhE5JpiYIBWq865PhKGSXiivqt4= +google.golang.org/grpc v1.75.0/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +>>>>>>> 18b7de606a (Add OpenTelemetry Histogram Generator (#366)) diff --git a/cmd/generator/histogram_generator.go b/cmd/generator/histogram_generator.go new file mode 100644 index 0000000000000..0912a7abbf95a --- /dev/null +++ b/cmd/generator/histogram_generator.go @@ -0,0 +1,222 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package generator + +import ( + "math" + "math/rand" + "sort" + "time" +) + +// HistogramGenerator generates histogram test cases using statistical distributions +type HistogramGenerator struct { + rand *rand.Rand + endpoint string +} + +// NewHistogramGenerator creates a new histogram generator with deterministic seed +func NewHistogramGenerator(opt ...GenerationOptions) *HistogramGenerator { + var seed int64 = time.Now().UnixNano() + var endpoint string + + if len(opt) > 0 { + if opt[0].Seed != 0 { + seed = opt[0].Seed + } + endpoint = opt[0].Endpoint + } + + return &HistogramGenerator{ + rand: rand.New(rand.NewSource(seed)), + endpoint: endpoint, + } +} + +// GenerateHistogram generates histogram data from individual values using a value function +func (g *HistogramGenerator) GenerateHistogram(input HistogramInput, valueFunc func(*rand.Rand, time.Time) float64) (HistogramResult, error) { + timestamp := time.Now() + sampleCount := int(input.Count) + + if sampleCount <= 0 { + sampleCount = 1000 // default sample count + } + + // Generate individual values using the value function + values := make([]float64, sampleCount) + for i := 0; i < sampleCount; i++ { + if valueFunc != nil { + values[i] = valueFunc(g.rand, timestamp) + } else { + values[i] = g.rand.Float64() * 100 // default random value + } + } + + // Sort values to find min/max + sort.Float64s(values) + + // Calculate basic stats + var sum float64 + for _, v := range values { + sum += v + } + + generatedMin := values[0] + generatedMax := values[len(values)-1] + average := sum / float64(len(values)) + + // Determine final min/max values + var finalMin, finalMax float64 + if input.Min != nil { + finalMin = *input.Min + } else { + finalMin = generatedMin + } + if input.Max != nil { + finalMax = *input.Max + } else { + finalMax = generatedMax + } + + // Use provided boundaries or generate them based on min/max + boundaries := input.Boundaries + if len(boundaries) == 0 { + boundaries = generateBoundariesBetween(finalMin, finalMax, 10) + } + + counts := make([]uint64, len(boundaries)+1) + for _, value := range values { + bucketIndex := len(boundaries) // default to overflow bucket + for i, boundary := range boundaries { + if value <= boundary { + bucketIndex = i + break + } + } + counts[bucketIndex]++ + } + + // Calculate percentile ranges + percentileRanges := g.calculatePercentileRangesFromValues(values, boundaries) + + // Use input min/max if provided, otherwise use generated values + var resultMin, resultMax *float64 + if input.Min != nil { + resultMin = input.Min + } else { + resultMin = &generatedMin + } + if input.Max != nil { + resultMax = input.Max + } else { + resultMax = &generatedMax + } + + generatedInput := HistogramInput{ + Count: uint64(len(values)), + Sum: sum, + Min: resultMin, + Max: resultMax, + Boundaries: boundaries, + Counts: counts, + Attributes: input.Attributes, + } + + expected := ExpectedMetrics{ + Count: uint64(len(values)), + Sum: sum, + Average: average, + Min: resultMin, + Max: resultMax, + PercentileRanges: percentileRanges, + } + + return HistogramResult{ + Input: generatedInput, + Expected: expected, + }, nil +} + +// GenerateAndPublishHistograms generates and optionally publishes histogram data +func (g *HistogramGenerator) GenerateAndPublishHistograms(input HistogramInput, valueFunc func(*rand.Rand, time.Time) float64) (HistogramResult, error) { + res, err := g.GenerateHistogram(input, valueFunc) + if err != nil { + return HistogramResult{}, err + } + + if g.endpoint == "" { + return res, nil + } + + publisher := NewOTLPPublisher(g.endpoint) + err = publisher.SendHistogramMetric("TelemetryGen", res) + if err != nil { + return HistogramResult{}, err + } + + return res, nil +} + +// calculatePercentileRangesFromValues calculates percentile ranges for sorted values +func (g *HistogramGenerator) calculatePercentileRangesFromValues(sortedValues []float64, boundaries []float64) map[float64]PercentileRange { + percentiles := []float64{0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99} + ranges := make(map[float64]PercentileRange) + + for _, p := range percentiles { + index := int(p * float64(len(sortedValues))) + if index >= len(sortedValues) { + index = len(sortedValues) - 1 + } + + value := sortedValues[index] + + // Find which bucket this percentile value falls into + var low, high float64 + + // Check if value falls in any boundary bucket + bucketFound := false + for i, boundary := range boundaries { + if value <= boundary { + if i > 0 { + low = boundaries[i-1] + } else { + low = math.Inf(-1) + } + high = boundary + bucketFound = true + break + } + } + + // If not found in any boundary bucket, it's in the overflow bucket + if !bucketFound { + if len(boundaries) > 0 { + low = boundaries[len(boundaries)-1] + } else { + low = math.Inf(-1) + } + high = math.Inf(1) + } + + ranges[p] = PercentileRange{Low: low, High: high} + } + + return ranges +} + +// generateBoundariesBetween creates evenly spaced boundaries between min and max +func generateBoundariesBetween(min, max float64, numBuckets int) []float64 { + if numBuckets <= 0 { + numBuckets = 10 + } + + boundaries := make([]float64, numBuckets-1) + step := (max - min) / float64(numBuckets) + + for i := 0; i < numBuckets-1; i++ { + boundaries[i] = min + float64(i+1)*step + } + + return boundaries +} diff --git a/cmd/generator/main.go b/cmd/generator/main.go new file mode 100644 index 0000000000000..8bbbada35495d --- /dev/null +++ b/cmd/generator/main.go @@ -0,0 +1,236 @@ +package main + +import ( + "context" + "errors" + "fmt" + "log" + "strconv" + "strings" + "time" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/aws/cloudwatch/histograms" + "github.com/spf13/pflag" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + "go.opentelemetry.io/otel/sdk/resource" + semconv "go.opentelemetry.io/otel/semconv/v1.26.0" + "go.uber.org/zap" +) + +const ( + defaultGRPCEndpoint = "localhost:4317" + defaultHTTPEndpoint = "localhost:4318" +) + +var ( + errFormatOTLPAttributes = errors.New("value should be in one of the following formats: key=\"value\", key=true, key=false, or key=") + errDoubleQuotesOTLPAttributes = errors.New("value should be a string wrapped in double quotes") +) + +type KeyValue map[string]any + +var _ pflag.Value = (*KeyValue)(nil) + +func (*KeyValue) String() string { + return "" +} + +func (v *KeyValue) Set(s string) error { + kv := strings.SplitN(s, "=", 2) + if len(kv) != 2 { + return errFormatOTLPAttributes + } + val := kv[1] + if val == "true" { + (*v)[kv[0]] = true + return nil + } + if val == "false" { + (*v)[kv[0]] = false + return nil + } + if intVal, err := strconv.Atoi(val); err == nil { + (*v)[kv[0]] = intVal + return nil + } + if len(val) < 2 || !strings.HasPrefix(val, "\"") || !strings.HasSuffix(val, "\"") { + return errDoubleQuotesOTLPAttributes + } + + (*v)[kv[0]] = val[1 : len(val)-1] + return nil +} + +func (*KeyValue) Type() string { + return "map[string]any" +} + +// Config describes the test scenario. +type Config struct { + CustomEndpoint string + Insecure bool + UseHTTP bool + HTTPPath string + Headers KeyValue +} + +// Endpoint returns the appropriate endpoint URL based on the selected communication mode (gRPC or HTTP) +// or custom endpoint provided in the configuration. +func (c *Config) Endpoint() string { + if c.CustomEndpoint != "" { + return c.CustomEndpoint + } + if c.UseHTTP { + return defaultHTTPEndpoint + } + return defaultGRPCEndpoint +} + +func (c *Config) GetHeaders() map[string]string { + m := make(map[string]string, len(c.Headers)) + + for k, t := range c.Headers { + switch v := t.(type) { + case bool: + m[k] = strconv.FormatBool(v) + case string: + m[k] = v + } + } + + return m +} + +func main() { + + exporter, err := createExporter(&Config{ + UseHTTP: true, + Insecure: true, + }) + if err != nil { + log.Fatal(err) + } + + res := resource.NewWithAttributes(semconv.SchemaURL) + + startTime := time.Now() + + go func() { + testCases := histograms.TestCases() + ticker := time.NewTicker(time.Second * 10) + for range ticker.C { + for _, tc := range testCases { + metrics := []metricdata.Metrics{{ + Name: tc.Name, + Data: metricdata.Histogram[float64]{ + Temporality: metricdata.DeltaTemporality, + DataPoints: []metricdata.HistogramDataPoint[float64]{ + tcToDatapoint(tc, startTime), + }, + }, + }} + rm := metricdata.ResourceMetrics{ + Resource: res, + ScopeMetrics: []metricdata.ScopeMetrics{{Metrics: metrics}}, + } + + if err := exporter.Export(context.Background(), &rm); err != nil { + log.Fatal("exporter failed", zap.Error(err)) + } + } + } + }() + + ticker := time.NewTicker(time.Second * 10) + testCases := histograms.InvalidTestCases() + for range ticker.C { + for _, tc := range testCases { + metrics := []metricdata.Metrics{{ + Name: tc.Name, + Data: metricdata.Histogram[float64]{ + Temporality: metricdata.DeltaTemporality, + DataPoints: []metricdata.HistogramDataPoint[float64]{ + tcToDatapoint(tc, startTime), + }, + }, + }} + rm := metricdata.ResourceMetrics{ + Resource: res, + ScopeMetrics: []metricdata.ScopeMetrics{{Metrics: metrics}}, + } + + if err := exporter.Export(context.Background(), &rm); err != nil { + log.Fatal("exporter failed", zap.Error(err)) + } + } + } + +} + +func createExporter(cfg *Config) (sdkmetric.Exporter, error) { + var exp sdkmetric.Exporter + var err error + if cfg.UseHTTP { + var exporterOpts []otlpmetrichttp.Option + + log.Print("starting HTTP exporter") + exporterOpts, err = httpExporterOptions(cfg) + if err != nil { + return nil, err + } + exp, err = otlpmetrichttp.New(context.Background(), exporterOpts...) + if err != nil { + return nil, fmt.Errorf("failed to obtain OTLP HTTP exporter: %w", err) + } + } else { + return nil, fmt.Errorf("NotYetImplemented") + } + return exp, err +} + +// httpExporterOptions creates the configuration options for an HTTP-based OTLP metric exporter. +// It configures the exporter with the provided endpoint, URL path, connection security settings, and headers. +func httpExporterOptions(cfg *Config) ([]otlpmetrichttp.Option, error) { + httpExpOpt := []otlpmetrichttp.Option{ + otlpmetrichttp.WithEndpoint(cfg.Endpoint()), + otlpmetrichttp.WithURLPath(cfg.HTTPPath), + } + + if cfg.Insecure { + httpExpOpt = append(httpExpOpt, otlpmetrichttp.WithInsecure()) + } + + if len(cfg.Headers) > 0 { + httpExpOpt = append(httpExpOpt, otlpmetrichttp.WithHeaders(cfg.GetHeaders())) + } + + return httpExpOpt, nil +} + +func tcToDatapoint(tc histograms.HistogramTestCase, startTime time.Time) metricdata.HistogramDataPoint[float64] { + attrs := []attribute.KeyValue{} + for k, v := range tc.Input.Attributes { + attrs = append(attrs, attribute.String(k, v)) + } + + dp := metricdata.HistogramDataPoint[float64]{ + StartTime: startTime, + Time: time.Now(), + Attributes: attribute.NewSet(attrs...), + Count: tc.Input.Count, + Sum: tc.Input.Sum, + Bounds: tc.Input.Boundaries, + BucketCounts: tc.Input.Counts, + } + + if tc.Input.Min != nil { + dp.Min = metricdata.NewExtrema(*tc.Input.Min) + } + if tc.Input.Max != nil { + dp.Max = metricdata.NewExtrema(*tc.Input.Max) + } + return dp +} diff --git a/cmd/generator/otlp_publisher.go b/cmd/generator/otlp_publisher.go new file mode 100644 index 0000000000000..231fded29ef7a --- /dev/null +++ b/cmd/generator/otlp_publisher.go @@ -0,0 +1,74 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package generator + +import ( + "fmt" + + "github.com/open-telemetry/opentelemetry-collector-contrib/cmd/telemetrygen/pkg/metrics" +) + +// OTLPPublisher handles publishing metrics to OTLP endpoints using telemetrygen +type OTLPPublisher struct { + endpoint string +} + +// NewOTLPPublisher creates a new OTLP publisher using telemetrygen +func NewOTLPPublisher(endpoint string) *OTLPPublisher { + return &OTLPPublisher{ + endpoint: endpoint, + } +} + +// SendHistogramMetric sends a histogram metric using telemetrygen +func (p *OTLPPublisher) SendHistogramMetric(metricName string, result HistogramResult) error { + return p.SendMetric(metricName, "Histogram", 0) // Histogram value doesn't matter for telemetrygen +} + +// SendMetric sends a metric using telemetrygen with the specified type and value +func (p *OTLPPublisher) SendMetric(metricName string, metricType string, value float64) error { + // Create telemetrygen config + cfg := metrics.NewConfig() + cfg.CustomEndpoint = p.endpoint + cfg.UseHTTP = true + cfg.Insecure = true + cfg.NumMetrics = 1 + cfg.Rate = 1 + cfg.MetricName = metricName + + // Set metric type + switch metricType { + case "Sum": + cfg.MetricType = metrics.MetricTypeSum + case "Gauge": + cfg.MetricType = metrics.MetricTypeGauge + case "Histogram": + cfg.MetricType = metrics.MetricTypeHistogram + default: + cfg.MetricType = metrics.MetricTypeGauge // default to gauge + } + + // Start the metrics generation + err := metrics.Start(cfg) + if err != nil { + return fmt.Errorf("failed to send metric via telemetrygen: %v", err) + } + + return nil +} + +// SendGaugeMetric sends a gauge metric +func (p *OTLPPublisher) SendGaugeMetric(metricName string, value float64) error { + return p.SendMetric(metricName, "Gauge", value) +} + +// SendSumMetric sends a sum/counter metric +func (p *OTLPPublisher) SendSumMetric(metricName string, value float64) error { + return p.SendMetric(metricName, "Sum", value) +} + +// SendHistogramMetricSimple sends a histogram metric (telemetrygen will generate histogram data) +func (p *OTLPPublisher) SendHistogramMetricSimple(metricName string) error { + return p.SendMetric(metricName, "Histogram", 0) +} diff --git a/cmd/generator/types.go b/cmd/generator/types.go new file mode 100644 index 0000000000000..6660fd684d52e --- /dev/null +++ b/cmd/generator/types.go @@ -0,0 +1,43 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package generator + +// PercentileRange represents a range for percentile calculations +type PercentileRange struct { + Low float64 + High float64 +} + +// HistogramInput represents the input data for a histogram metric +type HistogramInput struct { + Count uint64 + Sum float64 + Min *float64 + Max *float64 + Boundaries []float64 + Counts []uint64 + Attributes map[string]string +} + +// ExpectedMetrics represents the expected calculated metrics from histogram data +type ExpectedMetrics struct { + Count uint64 + Sum float64 + Average float64 + Min *float64 + Max *float64 + PercentileRanges map[float64]PercentileRange +} + +// HistogramResult combines input and expected metrics +type HistogramResult struct { + Input HistogramInput + Expected ExpectedMetrics +} + +// GenerationOptions configures histogram generation +type GenerationOptions struct { + Seed int64 + Endpoint string +} diff --git a/cmd/promgen/__pycache__/metrics_pb2.cpython-37.pyc b/cmd/promgen/__pycache__/metrics_pb2.cpython-37.pyc new file mode 100644 index 0000000000000..2c27dbaac451c Binary files /dev/null and b/cmd/promgen/__pycache__/metrics_pb2.cpython-37.pyc differ diff --git a/cmd/promgen/go.mod b/cmd/promgen/go.mod new file mode 100644 index 0000000000000..29f03d410bd43 --- /dev/null +++ b/cmd/promgen/go.mod @@ -0,0 +1,23 @@ +module github.com/amazon-contributing/opentelemetry-collector-contrib/cmd/promgen + +go 1.25.0 + +replace github.com/amazon-contributing/opentelemetry-collector-contrib/share/testdata/histograms => /local/home/dricross/workplace/classichistograms/opentelemetry-collector-contrib/share/testdata/histograms + +require ( + github.com/amazon-contributing/opentelemetry-collector-contrib/share/testdata/histograms v0.124.1 + github.com/prometheus/client_golang v1.23.2 + google.golang.org/protobuf v1.36.9 +) + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.66.1 // indirect + github.com/prometheus/procfs v0.16.1 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect + golang.org/x/sys v0.35.0 // indirect +) diff --git a/cmd/promgen/go.sum b/cmd/promgen/go.sum new file mode 100644 index 0000000000000..ef6be1bb660cd --- /dev/null +++ b/cmd/promgen/go.sum @@ -0,0 +1,46 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= +github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= +github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw= +google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/cmd/promgen/metrics.go b/cmd/promgen/metrics.go new file mode 100644 index 0000000000000..9c6707c1f942c --- /dev/null +++ b/cmd/promgen/metrics.go @@ -0,0 +1,248 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "math" + "math/rand" + "net/http" + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + "google.golang.org/protobuf/proto" +) + +// MetricType represents supported Prometheus metric types +type MetricType string + +const ( + TypeUntyped MetricType = "untyped" + TypeCounter MetricType = "counter" + TypeGauge MetricType = "gauge" + TypeSummary MetricType = "summary" + TypeHistogram MetricType = "histogram" + TypeNativeHistogram MetricType = "native_histogram" +) + +// MetricDefinition defines a metric and its time series +type MetricDefinition struct { + Name string + Type MetricType + Help string + Labels []string + CreateCollector func() (prometheus.Collector, error) + Update func(prometheus.Collector, time.Time) error +} + +type Metric struct { + Collector prometheus.Collector + Update func(prometheus.Collector, time.Time) error +} + +// Generator manages the metrics and their generation +type Generator struct { + metrics map[string]Metric + registry *prometheus.Registry + mu sync.RWMutex + rand *rand.Rand +} + +var _ http.Handler = (*Generator)(nil) + +// NewGenerator creates a new metrics generator +func NewGenerator() *Generator { + return &Generator{ + metrics: make(map[string]Metric), + registry: prometheus.NewRegistry(), + rand: rand.New(rand.NewSource(0xFEEDBEEF)), // for deterministic results + } +} + +// AddMetric adds a new metric definition to the generator +func (g *Generator) AddMetric(def MetricDefinition) error { + g.mu.Lock() + defer g.mu.Unlock() + + var collector prometheus.Collector + var err error + if def.CreateCollector != nil { + collector, err = def.CreateCollector() + } else { + collector, err = g.defaultCollector(def) + } + if err != nil { + return fmt.Errorf("unable to create collector: %w", err) + } + + if err := g.registry.Register(collector); err != nil { + return fmt.Errorf("failed to register metric: %w", err) + } + + g.metrics[def.Name] = Metric{ + Collector: collector, + Update: def.Update, + } + return nil +} + +// UpdateMetrics updates metric values based on the current timestamp +func (g *Generator) UpdateMetrics(timestamp time.Time) error { + g.mu.Lock() + defer g.mu.Unlock() + + for name, m := range g.metrics { + if m.Update != nil { + if err := m.Update(m.Collector, timestamp); err != nil { + return fmt.Errorf("failed to update metric %s: %w", name, err) + } + } + } + + return nil +} + +// ServeHTTP implements http.Handler +func (g *Generator) ServeHTTP(w http.ResponseWriter, r *http.Request) { + format := r.Header.Get("Accept") + log.Printf("receiver %s request\n", format) + switch format { + case "application/json": + g.serveJSON(w, r) + case "application/vnd.google.protobuf": + g.serveProtobuf(w, r) + default: + promhttp.HandlerFor(g.registry, promhttp.HandlerOpts{}).ServeHTTP(w, r) + } +} + +// serveJSON serves metrics in JSON format +func (g *Generator) serveJSON(w http.ResponseWriter, r *http.Request) { + metrics, err := g.registry.Gather() + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(metrics) +} + +// serveProtobuf serves metrics in Protobuf format +func (g *Generator) serveProtobuf(w http.ResponseWriter, r *http.Request) { + metrics, err := g.registry.Gather() + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + for _, metric := range metrics { + data, err := proto.Marshal(metric) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/vnd.google.protobuf") + w.Write(data) + } +} + +func (g *Generator) defaultCollector(def MetricDefinition) (prometheus.Collector, error) { + switch def.Type { + case TypeUntyped: + return prometheus.NewUntypedVec( + prometheus.UntypedOpts{ + Name: def.Name, + Help: def.Help, + }, + def.Labels, + ), nil + case TypeCounter: + return prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: def.Name, + Help: def.Help, + }, + def.Labels, + ), nil + + case TypeGauge: + return prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: def.Name, + Help: def.Help, + }, + def.Labels, + ), nil + + case TypeHistogram: + return prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: def.Name, + Help: def.Help, + }, + def.Labels, + ), nil + + case TypeSummary: + return prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Name: def.Name, + Help: def.Help, + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + }, + def.Labels, + ), nil + + case TypeNativeHistogram: + return prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: def.Name, + Help: def.Help, + NativeHistogramBucketFactor: 1.1, + NativeHistogramZeroThreshold: 1e-6, + }, + def.Labels, + ), nil + default: + return nil, fmt.Errorf("unsupported metric type: %s", def.Type) + } +} + +// GammaRandom generates a random number from a Gamma distribution +// shape (k) and scale (theta) are the parameters +func GammaRandom(rand *rand.Rand, shape, scale float64) float64 { + // Implementation of Marsaglia and Tsang's method + if shape < 1 { + // Use transformation for shape < 1 + return GammaRandom(rand, shape+1, scale) * math.Pow(rand.Float64(), 1.0/shape) + } + + d := shape - 1.0/3.0 + c := 1.0 / math.Sqrt(9.0*d) + + for { + x := 0.0 + v := 0.0 + for { + x = rand.NormFloat64() + v = 1.0 + c*x + if v > 0 { + break + } + } + + v = v * v * v + u := rand.Float64() + + if u < 1.0-0.331*math.Pow(x, 4) { + return d * v * scale + } + + if math.Log(u) < 0.5*x*x+d*(1.0-v+math.Log(v)) { + return d * v * scale + } + } +} diff --git a/cmd/promgen/metrics.proto b/cmd/promgen/metrics.proto new file mode 100644 index 0000000000000..3e9168e66a5c4 --- /dev/null +++ b/cmd/promgen/metrics.proto @@ -0,0 +1,157 @@ +// Copyright 2013 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package io.prometheus.client; +option java_package = "io.prometheus.client"; +option go_package = "github.com/prometheus/client_model/go;io_prometheus_client"; + +import "google/protobuf/timestamp.proto"; + +message LabelPair { + optional string name = 1; + optional string value = 2; +} + +enum MetricType { + // COUNTER must use the Metric field "counter". + COUNTER = 0; + // GAUGE must use the Metric field "gauge". + GAUGE = 1; + // SUMMARY must use the Metric field "summary". + SUMMARY = 2; + // UNTYPED must use the Metric field "untyped". + UNTYPED = 3; + // HISTOGRAM must use the Metric field "histogram". + HISTOGRAM = 4; + // GAUGE_HISTOGRAM must use the Metric field "histogram". + GAUGE_HISTOGRAM = 5; +} + +message Gauge { + optional double value = 1; +} + +message Counter { + optional double value = 1; + optional Exemplar exemplar = 2; + + optional google.protobuf.Timestamp created_timestamp = 3; +} + +message Quantile { + optional double quantile = 1; + optional double value = 2; +} + +message Summary { + optional uint64 sample_count = 1; + optional double sample_sum = 2; + repeated Quantile quantile = 3; + + optional google.protobuf.Timestamp created_timestamp = 4; +} + +message Untyped { + optional double value = 1; +} + +message Histogram { + optional uint64 sample_count = 1; + optional double sample_count_float = 4; // Overrides sample_count if > 0. + optional double sample_sum = 2; + // Buckets for the conventional histogram. + repeated Bucket bucket = 3; // Ordered in increasing order of upper_bound, +Inf bucket is optional. + + optional google.protobuf.Timestamp created_timestamp = 15; + + // Everything below here is for native histograms (also known as sparse histograms). + // Native histograms are an experimental feature without stability guarantees. + + // schema defines the bucket schema. Currently, valid numbers are -4 <= n <= 8. + // They are all for base-2 bucket schemas, where 1 is a bucket boundary in each case, and + // then each power of two is divided into 2^n logarithmic buckets. + // Or in other words, each bucket boundary is the previous boundary times 2^(2^-n). + // In the future, more bucket schemas may be added using numbers < -4 or > 8. + optional sint32 schema = 5; + optional double zero_threshold = 6; // Breadth of the zero bucket. + optional uint64 zero_count = 7; // Count in zero bucket. + optional double zero_count_float = 8; // Overrides sb_zero_count if > 0. + + // Negative buckets for the native histogram. + repeated BucketSpan negative_span = 9; + // Use either "negative_delta" or "negative_count", the former for + // regular histograms with integer counts, the latter for float + // histograms. + repeated sint64 negative_delta = 10; // Count delta of each bucket compared to previous one (or to zero for 1st bucket). + repeated double negative_count = 11; // Absolute count of each bucket. + + // Positive buckets for the native histogram. + // Use a no-op span (offset 0, length 0) for a native histogram without any + // observations yet and with a zero_threshold of 0. Otherwise, it would be + // indistinguishable from a classic histogram. + repeated BucketSpan positive_span = 12; + // Use either "positive_delta" or "positive_count", the former for + // regular histograms with integer counts, the latter for float + // histograms. + repeated sint64 positive_delta = 13; // Count delta of each bucket compared to previous one (or to zero for 1st bucket). + repeated double positive_count = 14; // Absolute count of each bucket. + + // Only used for native histograms. These exemplars MUST have a timestamp. + repeated Exemplar exemplars = 16; +} + +// A Bucket of a conventional histogram, each of which is treated as +// an individual counter-like time series by Prometheus. +message Bucket { + optional uint64 cumulative_count = 1; // Cumulative in increasing order. + optional double cumulative_count_float = 4; // Overrides cumulative_count if > 0. + optional double upper_bound = 2; // Inclusive. + optional Exemplar exemplar = 3; +} + +// A BucketSpan defines a number of consecutive buckets in a native +// histogram with their offset. Logically, it would be more +// straightforward to include the bucket counts in the Span. However, +// the protobuf representation is more compact in the way the data is +// structured here (with all the buckets in a single array separate +// from the Spans). +message BucketSpan { + optional sint32 offset = 1; // Gap to previous span, or starting point for 1st span (which can be negative). + optional uint32 length = 2; // Length of consecutive buckets. +} + +message Exemplar { + repeated LabelPair label = 1; + optional double value = 2; + optional google.protobuf.Timestamp timestamp = 3; // OpenMetrics-style. +} + +message Metric { + repeated LabelPair label = 1; + optional Gauge gauge = 2; + optional Counter counter = 3; + optional Summary summary = 4; + optional Untyped untyped = 5; + optional Histogram histogram = 7; + optional int64 timestamp_ms = 6; +} + +message MetricFamily { + optional string name = 1; + optional string help = 2; + optional MetricType type = 3; + repeated Metric metric = 4; + optional string unit = 5; +} diff --git a/cmd/promgen/metrics_pb2.py b/cmd/promgen/metrics_pb2.py new file mode 100644 index 0000000000000..c2e274be8b4fe --- /dev/null +++ b/cmd/promgen/metrics_pb2.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE +# source: metrics.proto +# Protobuf Python Version: 6.30.2 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, + 6, + 30, + 2, + '', + 'metrics.proto' +) +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rmetrics.proto\x12\x14io.prometheus.client\x1a\x1fgoogle/protobuf/timestamp.proto\"(\n\tLabelPair\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t\"\x16\n\x05Gauge\x12\r\n\x05value\x18\x01 \x01(\x01\"\x81\x01\n\x07\x43ounter\x12\r\n\x05value\x18\x01 \x01(\x01\x12\x30\n\x08\x65xemplar\x18\x02 \x01(\x0b\x32\x1e.io.prometheus.client.Exemplar\x12\x35\n\x11\x63reated_timestamp\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"+\n\x08Quantile\x12\x10\n\x08quantile\x18\x01 \x01(\x01\x12\r\n\x05value\x18\x02 \x01(\x01\"\x9c\x01\n\x07Summary\x12\x14\n\x0csample_count\x18\x01 \x01(\x04\x12\x12\n\nsample_sum\x18\x02 \x01(\x01\x12\x30\n\x08quantile\x18\x03 \x03(\x0b\x32\x1e.io.prometheus.client.Quantile\x12\x35\n\x11\x63reated_timestamp\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"\x18\n\x07Untyped\x12\r\n\x05value\x18\x01 \x01(\x01\"\x91\x04\n\tHistogram\x12\x14\n\x0csample_count\x18\x01 \x01(\x04\x12\x1a\n\x12sample_count_float\x18\x04 \x01(\x01\x12\x12\n\nsample_sum\x18\x02 \x01(\x01\x12,\n\x06\x62ucket\x18\x03 \x03(\x0b\x32\x1c.io.prometheus.client.Bucket\x12\x35\n\x11\x63reated_timestamp\x18\x0f \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0e\n\x06schema\x18\x05 \x01(\x11\x12\x16\n\x0ezero_threshold\x18\x06 \x01(\x01\x12\x12\n\nzero_count\x18\x07 \x01(\x04\x12\x18\n\x10zero_count_float\x18\x08 \x01(\x01\x12\x37\n\rnegative_span\x18\t \x03(\x0b\x32 .io.prometheus.client.BucketSpan\x12\x16\n\x0enegative_delta\x18\n \x03(\x12\x12\x16\n\x0enegative_count\x18\x0b \x03(\x01\x12\x37\n\rpositive_span\x18\x0c \x03(\x0b\x32 .io.prometheus.client.BucketSpan\x12\x16\n\x0epositive_delta\x18\r \x03(\x12\x12\x16\n\x0epositive_count\x18\x0e \x03(\x01\x12\x31\n\texemplars\x18\x10 \x03(\x0b\x32\x1e.io.prometheus.client.Exemplar\"\x89\x01\n\x06\x42ucket\x12\x18\n\x10\x63umulative_count\x18\x01 \x01(\x04\x12\x1e\n\x16\x63umulative_count_float\x18\x04 \x01(\x01\x12\x13\n\x0bupper_bound\x18\x02 \x01(\x01\x12\x30\n\x08\x65xemplar\x18\x03 \x01(\x0b\x32\x1e.io.prometheus.client.Exemplar\",\n\nBucketSpan\x12\x0e\n\x06offset\x18\x01 \x01(\x11\x12\x0e\n\x06length\x18\x02 \x01(\r\"x\n\x08\x45xemplar\x12.\n\x05label\x18\x01 \x03(\x0b\x32\x1f.io.prometheus.client.LabelPair\x12\r\n\x05value\x18\x02 \x01(\x01\x12-\n\ttimestamp\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"\xbe\x02\n\x06Metric\x12.\n\x05label\x18\x01 \x03(\x0b\x32\x1f.io.prometheus.client.LabelPair\x12*\n\x05gauge\x18\x02 \x01(\x0b\x32\x1b.io.prometheus.client.Gauge\x12.\n\x07\x63ounter\x18\x03 \x01(\x0b\x32\x1d.io.prometheus.client.Counter\x12.\n\x07summary\x18\x04 \x01(\x0b\x32\x1d.io.prometheus.client.Summary\x12.\n\x07untyped\x18\x05 \x01(\x0b\x32\x1d.io.prometheus.client.Untyped\x12\x32\n\thistogram\x18\x07 \x01(\x0b\x32\x1f.io.prometheus.client.Histogram\x12\x14\n\x0ctimestamp_ms\x18\x06 \x01(\x03\"\x96\x01\n\x0cMetricFamily\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04help\x18\x02 \x01(\t\x12.\n\x04type\x18\x03 \x01(\x0e\x32 .io.prometheus.client.MetricType\x12,\n\x06metric\x18\x04 \x03(\x0b\x32\x1c.io.prometheus.client.Metric\x12\x0c\n\x04unit\x18\x05 \x01(\t*b\n\nMetricType\x12\x0b\n\x07\x43OUNTER\x10\x00\x12\t\n\x05GAUGE\x10\x01\x12\x0b\n\x07SUMMARY\x10\x02\x12\x0b\n\x07UNTYPED\x10\x03\x12\r\n\tHISTOGRAM\x10\x04\x12\x13\n\x0fGAUGE_HISTOGRAM\x10\x05\x42R\n\x14io.prometheus.clientZ:github.com/prometheus/client_model/go;io_prometheus_client') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'metrics_pb2', _globals) +if not _descriptor._USE_C_DESCRIPTORS: + _globals['DESCRIPTOR']._loaded_options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\024io.prometheus.clientZ:github.com/prometheus/client_model/go;io_prometheus_client' + _globals['_METRICTYPE']._serialized_start=1814 + _globals['_METRICTYPE']._serialized_end=1912 + _globals['_LABELPAIR']._serialized_start=72 + _globals['_LABELPAIR']._serialized_end=112 + _globals['_GAUGE']._serialized_start=114 + _globals['_GAUGE']._serialized_end=136 + _globals['_COUNTER']._serialized_start=139 + _globals['_COUNTER']._serialized_end=268 + _globals['_QUANTILE']._serialized_start=270 + _globals['_QUANTILE']._serialized_end=313 + _globals['_SUMMARY']._serialized_start=316 + _globals['_SUMMARY']._serialized_end=472 + _globals['_UNTYPED']._serialized_start=474 + _globals['_UNTYPED']._serialized_end=498 + _globals['_HISTOGRAM']._serialized_start=501 + _globals['_HISTOGRAM']._serialized_end=1030 + _globals['_BUCKET']._serialized_start=1033 + _globals['_BUCKET']._serialized_end=1170 + _globals['_BUCKETSPAN']._serialized_start=1172 + _globals['_BUCKETSPAN']._serialized_end=1216 + _globals['_EXEMPLAR']._serialized_start=1218 + _globals['_EXEMPLAR']._serialized_end=1338 + _globals['_METRIC']._serialized_start=1341 + _globals['_METRIC']._serialized_end=1659 + _globals['_METRICFAMILY']._serialized_start=1662 + _globals['_METRICFAMILY']._serialized_end=1812 +# @@protoc_insertion_point(module_scope) diff --git a/cmd/promgen/promgen.go b/cmd/promgen/promgen.go new file mode 100644 index 0000000000000..49e7455f5c84c --- /dev/null +++ b/cmd/promgen/promgen.go @@ -0,0 +1,242 @@ +package main + +import ( + "log" + "maps" + "math" + "net/http" + "slices" + "strings" + "time" + + "github.com/amazon-contributing/opentelemetry-collector-contrib/share/testdata/histograms" + "github.com/prometheus/client_golang/prometheus" +) + +const updatePeriod = time.Second + +func main() { + + start := time.Now() + generator := NewGenerator() + + untypedMetric := MetricDefinition{ + Name: "untyped_metric", + Type: TypeUntyped, + Help: "An untyped metric that always returns 42", + Update: func(collector prometheus.Collector, timestamp time.Time) error { + return nil + }, + } + + if err := generator.AddMetric(untypedMetric); err != nil { + log.Fatalf("unable to add metric: %v", err) + } + + monotonicCounter := MetricDefinition{ + Name: "monotonic_counter", + Type: TypeCounter, + Help: "A counter that increases forever", + Update: func(collector prometheus.Collector, timestamp time.Time) error { + counter, err := collector.(*prometheus.CounterVec).GetMetricWith(prometheus.Labels{}) + if err != nil { + return err + } + counter.Inc() + return nil + }, + } + + if err := generator.AddMetric(monotonicCounter); err != nil { + log.Fatalf("unable to add metric: %v", err) + } + + sinusoidalGauge := MetricDefinition{ + Name: "sinusoidal_gauge", + Type: TypeGauge, + Help: "A gauge that oscillates between -1 and 1", + Update: func(collector prometheus.Collector, timestamp time.Time) error { + gauge, err := collector.(*prometheus.GaugeVec).GetMetricWith(prometheus.Labels{}) + if err != nil { + return err + } + newVal := math.Sin(2 * math.Pi * float64(timestamp.Unix()) / 20) + gauge.Set(newVal) + return nil + }, + } + + if err := generator.AddMetric(sinusoidalGauge); err != nil { + log.Fatalf("unable to add metric: %v", err) + } + + gammaHistogram := MetricDefinition{ + Name: "gamma_histogram", + Type: TypeHistogram, + Help: "A histogram whose values follow a gamma distribution", + Update: func(collector prometheus.Collector, timestamp time.Time) error { + histogram, err := collector.(*prometheus.HistogramVec).GetMetricWith(prometheus.Labels{}) + if err != nil { + return err + } + numObservations := generator.rand.Int() % 10 + for range numObservations { + histogram.Observe(GammaRandom(generator.rand, 2.0, 2.0)) + } + return nil + }, + } + + if err := generator.AddMetric(gammaHistogram); err != nil { + log.Fatalf("unable to add metric: %v", err) + } + + exponentialSummary := MetricDefinition{ + Name: "exponential_summary", + Type: TypeSummary, + Help: "A summary whose values follow an exponential distribution", + Update: func(collector prometheus.Collector, timestamp time.Time) error { + summary, err := collector.(*prometheus.SummaryVec).GetMetricWith(prometheus.Labels{}) + if err != nil { + return err + } + numObservations := generator.rand.Int() % 10 + for range numObservations { + summary.Observe(generator.rand.ExpFloat64()) + } + return nil + }, + } + + if err := generator.AddMetric(exponentialSummary); err != nil { + log.Fatalf("unable to add metric: %v", err) + } + + gammaNativeHistogram := MetricDefinition{ + Name: "gamma_native_histogram", + Type: TypeNativeHistogram, + Help: "A native histogram whose values follow a gamma distribution", + Update: func(collector prometheus.Collector, timestamp time.Time) error { + histogram, err := collector.(*prometheus.HistogramVec).GetMetricWith(prometheus.Labels{}) + if err != nil { + return err + } + numObservations := generator.rand.Int() % 10 + for range numObservations { + histogram.Observe(GammaRandom(generator.rand, 2.0, 2.0)) + } + return nil + }, + } + + if err := generator.AddMetric(gammaNativeHistogram); err != nil { + log.Fatalf("unable to add metric: %v", err) + } + + testCases := histograms.TestCases() + for _, tc := range testCases { + tName := "tc_" + strings.ToLower(strings.ReplaceAll(tc.Name, " ", "_")) + tMetricDefinition := MetricDefinition{ + Name: tName, + Type: TypeHistogram, + Help: tc.Name, + CreateCollector: func() (prometheus.Collector, error) { + // prometheus gives default buckets if boundaries is empty. we want one big bucket instead + boundaries := tc.Input.Boundaries + if len(boundaries) == 0 { + boundaries = []float64{math.Inf(1)} + } + + return prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: tName, + Help: "My first test case", + Buckets: boundaries, + }, + slices.Collect(maps.Keys(tc.Input.Attributes)), + ), nil + }, + Update: func(collector prometheus.Collector, timestamp time.Time) error { + // Only update once + if time.Since(start) > 2*updatePeriod { + return nil + } + histogram, err := collector.(*prometheus.HistogramVec).GetMetricWith(tc.Input.Attributes) + if err != nil { + return err + } + for _, v := range generateDatapoints(tc.Input) { + histogram.Observe(v) + } + return nil + }, + } + + if err := generator.AddMetric(tMetricDefinition); err != nil { + log.Fatalf("unable to add metric: %v", err) + } + } + + // Start updating metrics periodically + go func() { + ticker := time.NewTicker(updatePeriod) + defer ticker.Stop() + for t := range ticker.C { + if err := generator.UpdateMetrics(t); err != nil { + log.Printf("Error updating metrics: %v", err) + } + } + }() + + // Start HTTP server + http.Handle("/metrics", generator) + log.Printf("Starting server on :8080") + log.Fatal(http.ListenAndServe(":8080", nil)) +} + +func generateDatapoints(in histograms.HistogramInput) []float64 { + if in.Count == 0 { + return []float64{} + } + + dps := []float64{} + totalGenerated := 0.0 + + for i, count := range in.Counts { + if count == 0 { + continue + } + + var bucketValue float64 + if len(in.Boundaries) == 0 { + bucketValue = in.Sum / float64(in.Count) + } else if i == 0 { + if in.Min != nil { + bucketValue = (*in.Min + in.Boundaries[0]) / 2 + } else { + bucketValue = in.Boundaries[0] - 1 + } + } else if i < len(in.Boundaries) { + bucketValue = (in.Boundaries[i-1] + in.Boundaries[i]) / 2 + } else { + if in.Max != nil { + bucketValue = (in.Boundaries[len(in.Boundaries)-1] + *in.Max) / 2 + } else { + bucketValue = in.Boundaries[len(in.Boundaries)-1] + 1 + } + } + + for j := uint64(0); j < count; j++ { + dps = append(dps, bucketValue) + totalGenerated += bucketValue + } + } + + if len(dps) > 0 && totalGenerated != 0 && len(in.Boundaries) > 0 { + ratio := in.Sum / totalGenerated + for i := range dps { + dps[i] *= ratio + } + } + return dps +} diff --git a/cmd/promgen/proto-reader.py b/cmd/promgen/proto-reader.py new file mode 100644 index 0000000000000..cf7def5c5a077 --- /dev/null +++ b/cmd/promgen/proto-reader.py @@ -0,0 +1,32 @@ +import sys +from google.protobuf import text_format +# The following import will be based on your compiled proto file name +# For example, if your proto file is named "message.proto", it will generate "message_pb2.py" +import metrics_pb2 + +def read_proto_binary(binary_file_path, proto_class): + # Create an instance of your message class + message = proto_class() + + # Read the binary file + with open(binary_file_path, 'rb') as f: + message.ParseFromString(f.read()) + + # Print the message in human-readable format + print(text_format.MessageToString(message)) + +def main(): + if len(sys.argv) != 2: + print("Usage: python script.py ") + sys.exit(1) + + binary_file_path = sys.argv[1] + + try: + # Replace YourProtoClass with the actual class name from your compiled proto + read_proto_binary(binary_file_path, YourProtoClass) + except Exception as e: + print(f"Error: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/cmd/promgen/protobuf.bin b/cmd/promgen/protobuf.bin new file mode 100644 index 0000000000000..474700965489a Binary files /dev/null and b/cmd/promgen/protobuf.bin differ diff --git a/go.mod b/go.mod index daff7f0ef1b51..35fae10821064 100644 --- a/go.mod +++ b/go.mod @@ -16,3 +16,33 @@ retract ( v0.65.0 v0.37.0 // Contains dependencies on v0.36.0 components, which should have been updated to v0.37.0. ) + +require github.com/open-telemetry/opentelemetry-collector-contrib/cmd/telemetrygen v0.135.0 + +require ( + github.com/cenkalti/backoff/v5 v5.0.2 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1 // indirect + github.com/spf13/pflag v1.0.10 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/otel v1.37.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.37.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.37.0 // indirect + go.opentelemetry.io/otel/metric v1.37.0 // indirect + go.opentelemetry.io/otel/sdk v1.37.0 // indirect + go.opentelemetry.io/otel/sdk/metric v1.37.0 // indirect + go.opentelemetry.io/otel/trace v1.37.0 // indirect + go.opentelemetry.io/proto/otlp v1.7.1 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.uber.org/zap v1.27.0 // indirect + golang.org/x/net v0.42.0 // indirect + golang.org/x/sys v0.34.0 // indirect + golang.org/x/text v0.27.0 // indirect + golang.org/x/time v0.12.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250728155136-f173205681a0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0 // indirect + google.golang.org/grpc v1.75.0 // indirect + google.golang.org/protobuf v1.36.8 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000000000..e158940b12a56 --- /dev/null +++ b/go.sum @@ -0,0 +1,83 @@ +github.com/cenkalti/backoff/v5 v5.0.2 h1:rIfFVxEf1QsI7E1ZHfp/B4DF/6QBAUhmgkxc0H7Zss8= +github.com/cenkalti/backoff/v5 v5.0.2/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1 h1:X5VWvz21y3gzm9Nw/kaUeku/1+uBhcekkmy4IkffJww= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1/go.mod h1:Zanoh4+gvIgluNqcfMVTJueD4wSS5hT7zTt4Mrutd90= +github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= +github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/open-telemetry/opentelemetry-collector-contrib/cmd/telemetrygen v0.135.0 h1:/PSf7CIVu//VV7zYeYhnOLIgMsrONH37XV2mzeVtjZk= +github.com/open-telemetry/opentelemetry-collector-contrib/cmd/telemetrygen v0.135.0/go.mod h1:RpbRtcf6cXpgn8aJOf6SvIuwGo2ycRUEI2HASamCjlw= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/collector/featuregate v1.41.0 h1:CL4UMsMQj35nMJC3/jUu8VvYB4MHirbAX4B0Z/fCVLY= +go.opentelemetry.io/collector/featuregate v1.41.0/go.mod h1:A72x92glpH3zxekaUybml1vMSv94BH6jQRn5+/htcjw= +go.opentelemetry.io/collector/pdata v1.41.0 h1:2zurAaY0FkURbLa1x7f7ag6HaNZYZKSmI4wgzDegLgo= +go.opentelemetry.io/collector/pdata v1.41.0/go.mod h1:h0OghaTYe4oRvLxK31Ny7gkyjJ1p8oniM5MiCzluQjc= +go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= +go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.37.0 h1:zG8GlgXCJQd5BU98C0hZnBbElszTmUgCNCfYneaDL0A= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.37.0/go.mod h1:hOfBCz8kv/wuq73Mx2H2QnWokh/kHZxkh6SNF2bdKtw= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.37.0 h1:9PgnL3QNlj10uGxExowIDIZu66aVBwWhXmbOp1pa6RA= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.37.0/go.mod h1:0ineDcLELf6JmKfuo0wvvhAVMuxWFYvkTin2iV4ydPQ= +go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= +go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= +go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI= +go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg= +go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFhbjxHHspCPc= +go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps= +go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= +go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= +go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4= +go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs= +golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= +golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= +golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= +golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +google.golang.org/genproto/googleapis/api v0.0.0-20250728155136-f173205681a0 h1:0UOBWO4dC+e51ui0NFKSPbkHHiQ4TmrEfEZMLDyRmY8= +google.golang.org/genproto/googleapis/api v0.0.0-20250728155136-f173205681a0/go.mod h1:8ytArBbtOy2xfht+y2fqKd5DRDJRUQhqbyEnQ4bDChs= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0 h1:MAKi5q709QWfnkkpNQ0M12hYJ1+e8qYVDyowc4U1XZM= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= +google.golang.org/grpc v1.75.0 h1:+TW+dqTd2Biwe6KKfhE5JpiYIBWq865PhKGSXiivqt4= +google.golang.org/grpc v1.75.0/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/aws/cloudwatch/histograms/conversion.go b/pkg/aws/cloudwatch/histograms/conversion.go new file mode 100644 index 0000000000000..2dfb1988592c3 --- /dev/null +++ b/pkg/aws/cloudwatch/histograms/conversion.go @@ -0,0 +1,320 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package histograms // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/aws/cloudwatch/histograms" + +import ( + "math" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/aws/cloudwatch" + "go.opentelemetry.io/collector/pdata/pmetric" +) + +type ExponentialMapping struct { + maximum float64 + minimum float64 + sampleCount float64 + sum float64 + values []float64 + counts []float64 +} + +var _ (cloudwatch.HistogramDataPoint) = (*ExponentialMapping)(nil) + +// ConvertOTelToCloudWatch converts an OpenTelemetry histogram datapoint to a CloudWatch histogram datapoint using +// exponential mapping +func ConvertOTelToCloudWatch(dp pmetric.HistogramDataPoint) cloudwatch.HistogramDataPoint { + // maximumInnerBucketCount is the maximum number of inner buckets that each outer bucket can be represented with + // + // A larger values increase the resolution at which the data is sub-sampled while also incurring additional memory + // allocation, processing time, and the maximum number of value/count pairs that are sent to CloudWatch which could + // cause a CloudWatch PutMetricData / PutLogEvent request to be split into multiple requests due to the 100/150 + // metric datapoint limit. + const maximumInnerBucketCount = 10 + + // No validations - assuming valid input histogram + + em := &ExponentialMapping{ + maximum: dp.Max(), + minimum: dp.Min(), + sampleCount: float64(dp.Count()), + sum: dp.Sum(), + } + + // bounds specifies the boundaries between buckets + // bucketCounts specifies the number of datapoints in each bucket + // there is always 1 more bucket count than there is boundaries + // len(bucketCounts) = len(bounds) + 1 + bounds := dp.ExplicitBounds() + lenBounds := bounds.Len() + bucketCounts := dp.BucketCounts() + lenBucketCounts := bucketCounts.Len() + + // Special case: no boundaries implies a single bucket + if lenBounds == 0 { + em.counts = append(em.counts, float64(bucketCounts.At(0))) // recall that len(bucketCounts) = len(bounds)+1 + if dp.HasMax() && dp.HasMin() { + em.values = append(em.values, em.minimum/2.0+em.maximum/2.0) + } else if dp.HasMax() { + em.values = append(em.values, em.maximum) // only data point we have is the maximum + } else if dp.HasMin() { + em.values = append(em.values, em.minimum) // only data point we have is the minimum + } else { + em.values = append(em.values, 0) // arbitrary value + } + return em + } + + // To create inner buckets, all outer buckets need to have defined boundaries. The first and last bucket use the + // min and max and their lower and upper bounds respectively. The min and max are optional on the OTel datapoint. + // When min and max are not defined, make some reasonable about about what the min/max could be + if !dp.HasMin() { + + // Find the first bucket which contains some data points. The min must be in that bucket + minBucketIdx := 0 + for i := 0; i < lenBucketCounts; i++ { + if bucketCounts.At(i) > 0 { + minBucketIdx = i + break + } + } + + // take the lower bound of the bucket. lower bound of bucket index n is boundary index n-1 + if minBucketIdx != 0 { + em.minimum = bounds.At(minBucketIdx - 1) + } else { + bucketWidth := 0.001 // arbitrary width - there's no information about this histogram to make an inference with if there are no bounds + if lenBounds > 1 { + bucketWidth = bounds.At(1) - bounds.At(0) + } + em.minimum = bounds.At(0) - bucketWidth + + // if all boundaries are positive, assume all data is positive. this covers use cases where Prometheus + // histogram metrics for non-zero values like request durations have their first bucket start at 0. for + // these metrics, a negative minimum will cause percentile metrics to be unavailable + if bounds.At(0) >= 0 { + em.minimum = max(em.minimum, 0.0) + } + } + + } + + if !dp.HasMax() { + + // Find the last bucket with some data in it. The max must be in that bucket + maxBucketIdx := lenBounds - 1 + for i := lenBucketCounts - 1; i >= 0; i-- { + if bucketCounts.At(i) > 0 { + maxBucketIdx = i + break + } + } + + // we want the upper bound of the bucket. the upper bound of bucket index n is boundary index n + if maxBucketIdx <= lenBounds-1 { + em.maximum = bounds.At(maxBucketIdx) + } else { + bucketWidth := 0.01 // arbitrary width - there's no information about this histogram to make an inference with + if lenBounds > 1 { + bucketWidth = bounds.At(lenBounds-1) - bounds.At(lenBounds-2) + } + em.maximum = bounds.At(lenBounds-1) + bucketWidth + } + + } + + // Pre-calculate total output size to avoid dynamic growth + totalOutputSize := 0 + for i := 0; i < lenBucketCounts; i++ { + sampleCount := bucketCounts.At(i) + if sampleCount > 0 { + totalOutputSize += int(min(sampleCount, maximumInnerBucketCount)) + } + } + if totalOutputSize == 0 { + // No samples in any bucket + return em + } + + em.values = make([]float64, 0, totalOutputSize) + em.counts = make([]float64, 0, totalOutputSize) + + for i := 0; i < lenBucketCounts; i++ { + sampleCount := int(bucketCounts.At(i)) + if sampleCount == 0 { + // No need to operate on a bucket with no samples + continue + } + + lowerBound := em.minimum + if i > 0 { + lowerBound = bounds.At(i - 1) + } + upperBound := em.maximum + if i < lenBucketCounts-1 { + upperBound = bounds.At(i) + } + + // This algorithm creates "inner buckets" between user-defined bucket based on the sample count, up to a + // maximum. A logarithmic ratio (named "magnitude") compares the density between the current bucket and the + // next bucket. This logarithmic ratio is used to decide how to spread samples amongst inner buckets. + // + // case 1: magnitude < 0 + // * What this means: Current bucket is denser than the next bucket -> density is decreasing. + // * What we do: Use inverse quadratic distribution to spread the samples. This allocates more samples towards + // the lower bound of the bucket. + // case 2: 0 <= magnitude < 1 + // * What this means: Current bucket and next bucket has similar densities -> density is not changing much. + // * What we do: Use inform distribution to spread the samples. Extra samples that can't be spread evenly are + // (arbitrarily) allocated towards the start of the bucket. + // case 3: 1 <= magnitude + // * What this means: Current bucket is less dense than the next bucket -> density is increasing. + // * What we do: Use quadratic distribution to spread the samples. This allocates more samples toward the end + // of the bucket. + // + // As a small optimization, we omit the logarithm invocation and change the thresholds. + ratio := 0.0 + if i < lenBucketCounts-1 { + nextSampleCount := bucketCounts.At(i + 1) + // If next bucket is empty, than density is surely decreasing + if nextSampleCount == 0 { + ratio = 0.0 + } else { + var nextUpperBound float64 + if i+1 == lenBucketCounts-1 { + nextUpperBound = em.maximum + } else { + nextUpperBound = bounds.At(i + 1) + } + + //currentBucketDensity := float64(sampleCount) / (upperBound - lowerBound) + //nextBucketDensity := float64(nextSampleCount) / (nextUpperBound - upperBound) + //ratio = nextBucketDensity / currentBucketDensity + + // the following calculations are the same but improves speed by ~1% in benchmark tests + denom := (nextUpperBound - upperBound) * float64(sampleCount) + numerator := (upperBound - lowerBound) * float64(nextSampleCount) + ratio = numerator / denom + } + } + + // innerBucketCount is how many "inner buckets" to spread the sample count amongst + innerBucketCount := min(sampleCount, maximumInnerBucketCount) + delta := (upperBound - lowerBound) / float64(innerBucketCount) + + if ratio < 1.0/math.E { // magnitude < 0: Use -yx^2 (inverse quadratic) + sigma := float64(sumOfSquares(innerBucketCount)) + epsilon := float64(sampleCount) / sigma + entryStart := len(em.counts) + + runningSum := 0 + for j := 0; j < innerBucketCount; j++ { + innerBucketSampleCount := epsilon * float64((j-innerBucketCount)*(j-innerBucketCount)) + innerBucketSampleCountAdjusted := int(math.Floor(innerBucketSampleCount)) + if innerBucketSampleCountAdjusted > 0 { + runningSum += innerBucketSampleCountAdjusted + em.values = append(em.values, lowerBound+delta*float64(j+1)) + em.counts = append(em.counts, float64(innerBucketSampleCountAdjusted)) + } + } + + // distribute the remainder towards the front + remainder := sampleCount - runningSum + // make sure there's room for the remainder + if len(em.counts) < entryStart+remainder { + em.counts = append(em.counts, make([]float64, remainder)...) + em.values = append(em.values, make([]float64, remainder)...) + } + for j := 0; j < remainder; j++ { + em.counts[entryStart] += 1 + entryStart += 1 + } + + } else if ratio < math.E { // 0 <= magnitude < 1: Use x + // Distribute samples evenly with integer counts + baseCount := sampleCount / innerBucketCount + remainder := sampleCount % innerBucketCount + for j := 1; j <= innerBucketCount; j++ { + count := baseCount + + // Distribute remainder to first few buckets + if j <= remainder { + count++ + } + em.values = append(em.values, lowerBound+delta*float64(j)) + em.counts = append(em.counts, float64(count)) + } + + } else { // magnitude >= 1: Use yx^2 (quadratic) + sigma := float64(sumOfSquares(innerBucketCount)) + epsilon := float64(sampleCount) / sigma + entryStart := len(em.counts) + + runningSum := 0 + for j := 1; j <= innerBucketCount; j++ { + innerBucketSampleCount := epsilon * float64(j*j) + innerBucketSampleCountAdjusted := int(math.Floor(innerBucketSampleCount)) + if innerBucketSampleCountAdjusted > 0 { + runningSum += innerBucketSampleCountAdjusted + em.values = append(em.values, lowerBound+delta*float64(j)) + em.counts = append(em.counts, float64(innerBucketSampleCountAdjusted)) + } + } + + // distribute the remainder towards the end + remainder := sampleCount - runningSum + // make sure there's room for the remainder + if len(em.counts) < entryStart+remainder { + em.counts = append(em.counts, make([]float64, remainder)...) + em.values = append(em.values, make([]float64, remainder)...) + } + entryStart = len(em.counts) - 1 + for j := 0; j < remainder; j++ { + em.counts[entryStart] += 1 + entryStart -= 1 + } + } + + } + + // Move last entry to maximum if needed + if dp.HasMax() && len(em.values) > 0 { + lastIdx := len(em.values) - 1 + for i := lastIdx; i >= 0; i-- { + if em.counts[i] > 0 { + lastIdx = i + break + } + } + em.values[lastIdx] = em.maximum + em.values = em.values[:lastIdx+1] + em.counts = em.counts[:lastIdx+1] + } + + return em +} + +func (em *ExponentialMapping) ValuesAndCounts() ([]float64, []float64) { + return em.values, em.counts +} + +func (em *ExponentialMapping) Minimum() float64 { + return em.minimum +} + +func (em *ExponentialMapping) Maximum() float64 { + return em.maximum +} + +func (em *ExponentialMapping) SampleCount() float64 { + return em.sampleCount +} + +func (em *ExponentialMapping) Sum() float64 { + return em.sum +} + +// sumOfSquares is a closed form calculation of Σx^2, for 1 to n +func sumOfSquares(n int) int { + return n * (n + 1) * (2*n + 1) / 6 +} diff --git a/pkg/aws/cloudwatch/histograms/conversion_test.go b/pkg/aws/cloudwatch/histograms/conversion_test.go new file mode 100644 index 0000000000000..a2fa871c0fa83 --- /dev/null +++ b/pkg/aws/cloudwatch/histograms/conversion_test.go @@ -0,0 +1,303 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package histograms // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/aws/cloudwatch/histograms" + +import ( + "encoding/csv" + "encoding/json" + "fmt" + "math" + "os" + "sort" + "strconv" + "strings" + "testing" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/aws/cloudwatch" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pmetric" +) + +var filenameReplacer = strings.NewReplacer( + " ", "_", + "/", "_", +) + +func TestWriteInputHistograms(t *testing.T) { + t.Skip("only used to create test data for visualization") + for _, tc := range TestCases() { + jsonData, err := json.MarshalIndent(tc.Input, "", " ") + require.NoError(t, err) + os.Mkdir("testdata/input", os.ModePerm) + require.NoError(t, os.WriteFile("testdata/input/"+filenameReplacer.Replace(tc.Name)+".json", jsonData, 0644)) + } +} + +func TestConvertOTelToCloudWatch(t *testing.T) { + + for _, tc := range TestCases() { + t.Run(tc.Name, func(t *testing.T) { + dp := setupDatapoint(tc.Input) + dist := ConvertOTelToCloudWatch(dp) + verifyDist(t, dist, tc.Expected) + + // uncomment next lines to write datapoint to JSON file for visual inspection + // use histogram_mappings.py to create graphs + os.Mkdir("testdata/exponential", os.ModePerm) + assert.NoError(t, writeValuesAndCountsToJson(dist, "testdata/exponential/"+filenameReplacer.Replace(tc.Name+".json"))) + }) + } + + t.Run("accuracy test - lognormal", func(t *testing.T) { + verifyDistAccuracy(t, ConvertOTelToCloudWatch, "testdata/lognormal_10000.csv") + }) + + t.Run("accuracy test - weibull", func(t *testing.T) { + verifyDistAccuracy(t, ConvertOTelToCloudWatch, "testdata/weibull_10000.csv") + }) + +} + +func BenchmarkLogNormal(b *testing.B) { + // arrange + boundaries := []float64{ + 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, + 0.011, 0.012, 0.013, 0.014, 0.015, 0.016, 0.017, 0.018, 0.019, 0.02, + 0.021, 0.022, 0.023, 0.024, 0.025, 0.026, 0.027, 0.028, 0.029, 0.03, + 0.031, 0.032, 0.033, 0.034, 0.035, 0.036, 0.037, 0.038, 0.039, 0.04, + 0.041, 0.042, 0.043, 0.044, 0.045, 0.046, 0.047, 0.048, 0.049, 0.05, + 0.1, 0.2, + } + + data, err := loadCsvData("testdata/lognormal_10000.csv") + require.NoError(b, err) + require.Len(b, data, 10000) + + dp := createHistogramDatapointFromData(data, boundaries) + require.Equal(b, int(dp.Count()), 10000) + + b.Run("NewExponentialMappingCWFromOtel", func(b *testing.B) { + for i := 0; i < b.N; i++ { + dist := ConvertOTelToCloudWatch(dp) + values, counts := dist.ValuesAndCounts() + assert.NotNil(b, values) + assert.NotNil(b, counts) + } + }) + +} + +func BenchmarkWeibull(b *testing.B) { + // arrange + boundaries := []float64{ + 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, + 0.011, 0.012, 0.013, 0.014, 0.015, 0.016, 0.017, 0.018, 0.019, 0.02, + 0.021, 0.022, 0.023, 0.024, 0.025, 0.026, 0.027, 0.028, 0.029, 0.03, + 0.031, 0.032, 0.033, 0.034, 0.035, 0.036, 0.037, 0.038, 0.039, 0.04, + 0.041, 0.042, 0.043, 0.044, 0.045, 0.046, 0.047, 0.048, 0.049, 0.05, + 0.1, 0.2, + } + + data, err := loadCsvData("testdata/weibull_10000.csv") + require.NoError(b, err) + require.Len(b, data, 10000) + + dp := createHistogramDatapointFromData(data, boundaries) + require.Equal(b, int(dp.Count()), 10000) + + b.Run("NewExponentialMappingCWFromOtel", func(b *testing.B) { + for i := 0; i < b.N; i++ { + dist := ConvertOTelToCloudWatch(dp) + values, counts := dist.ValuesAndCounts() + assert.NotNil(b, values) + assert.NotNil(b, counts) + } + }) + +} + +func setupDatapoint(input HistogramInput) pmetric.HistogramDataPoint { + dp := pmetric.NewHistogramDataPoint() + dp.SetCount(input.Count) + dp.SetSum(input.Sum) + if input.Min != nil { + dp.SetMin(*input.Min) + } + if input.Max != nil { + dp.SetMax(*input.Max) + } + dp.ExplicitBounds().FromRaw(input.Boundaries) + dp.BucketCounts().FromRaw(input.Counts) + return dp +} + +func verifyDist(t *testing.T, dist cloudwatch.HistogramDataPoint, expected ExpectedMetrics) { + + if expected.Min != nil { + assert.Equal(t, *expected.Min, dist.Minimum(), "min does not match expected") + } + if expected.Max != nil { + assert.Equal(t, *expected.Max, dist.Maximum(), "max does not match expected") + } + assert.Equal(t, int(expected.Count), int(dist.SampleCount()), "samplecount does not match expected") + assert.Equal(t, expected.Sum, dist.Sum(), "sum does not match expected") + + values, counts := dist.ValuesAndCounts() + + calculatedCount := 0.0 + for _, count := range counts { + calculatedCount += count + //fmt.Printf("%7.2f = %4d (%d)\n", values[i], int(counts[i]), calculatedCount) + } + assert.InDelta(t, float64(expected.Count), calculatedCount, 1e-6, "calculated count does not match expected") + + for p, r := range expected.PercentileRanges { + x := int(math.Round(float64(dist.SampleCount()) * p)) + + soFar := 0 + for i, count := range counts { + soFar += int(count) + if soFar >= x { + //fmt.Printf("Found p%.f at bucket %0.2f. Expected range: %+v\n", p*100, values[i], r) + assert.GreaterOrEqual(t, values[i], r.Low, "percentile %0.2f", p) + assert.LessOrEqual(t, values[i], r.High, "percentile %0.2f", p) + break + } + } + } +} + +func loadCsvData(filename string) ([]float64, error) { + file, err := os.Open(filename) + if err != nil { + return nil, err + } + defer file.Close() + + reader := csv.NewReader(file) + records, err := reader.ReadAll() + if err != nil { + return nil, err + } + + var data []float64 + for _, value := range records[0] { + f, err := strconv.ParseFloat(strings.TrimSpace(value), 64) + if err != nil { + return nil, err + } + data = append(data, f) + } + return data, nil +} + +func createHistogramDatapointFromData(data []float64, boundaries []float64) pmetric.HistogramDataPoint { + dp := pmetric.NewHistogramDataPoint() + + // Calculate basic stats + var sum float64 + min := math.Inf(1) + max := math.Inf(-1) + + for _, v := range data { + sum += v + if v < min { + min = v + } + if v > max { + max = v + } + } + + dp.SetCount(uint64(len(data))) + dp.SetSum(sum) + dp.SetMin(min) + dp.SetMax(max) + + // Create bucket counts + bucketCounts := make([]uint64, len(boundaries)+1) + + for _, v := range data { + bucket := sort.SearchFloat64s(boundaries, v) + bucketCounts[bucket]++ + } + + dp.ExplicitBounds().FromRaw(boundaries) + dp.BucketCounts().FromRaw(bucketCounts) + + return dp +} + +func verifyDistAccuracy(t *testing.T, newDistFunc func(pmetric.HistogramDataPoint) cloudwatch.HistogramDataPoint, filename string) { + // arrange + percentiles := []float64{0.1, 0.25, 0.5, 0.75, 0.9, 0.99, 0.999} + boundaries := []float64{ + 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, + 0.011, 0.012, 0.013, 0.014, 0.015, 0.016, 0.017, 0.018, 0.019, 0.02, + 0.021, 0.022, 0.023, 0.024, 0.025, 0.026, 0.027, 0.028, 0.029, 0.03, + 0.031, 0.032, 0.033, 0.034, 0.035, 0.036, 0.037, 0.038, 0.039, 0.04, + 0.041, 0.042, 0.043, 0.044, 0.045, 0.046, 0.047, 0.048, 0.049, 0.05, + 0.1, 0.2, + } + + data, err := loadCsvData(filename) + require.NoError(t, err) + assert.Len(t, data, 10000) + + dp := createHistogramDatapointFromData(data, boundaries) + assert.Equal(t, int(dp.Count()), 10000) + calculatedTotal := 0 + for _, count := range dp.BucketCounts().All() { + calculatedTotal += int(count) + } + assert.Equal(t, calculatedTotal, 10000) + + // act + dist := newDistFunc(dp) + values, counts := dist.ValuesAndCounts() + + // assert + calculatedCount := 0.0 + for _, count := range counts { + calculatedCount += count + } + assert.InDelta(t, 10000, calculatedCount, 1e-6, "calculated count does not match expected") + + for _, p := range percentiles { + x1 := int(math.Round(float64(dp.Count()) * p)) + x2 := int(math.Round(calculatedCount * p)) + + exactPercentileValue := data[x1] + + soFar := 0 + for i, count := range counts { + soFar += int(count) + if soFar >= x2 { + calculatedPercentileValue := values[i] + errorPercent := (exactPercentileValue - calculatedPercentileValue) / exactPercentileValue * 100 + fmt.Printf("P%.1f: exact=%.6f, calculated=%.6f, error=%.2f%%\n", p*100, exactPercentileValue, calculatedPercentileValue, errorPercent) + break + } + + } + + } +} + +func writeValuesAndCountsToJson(dist cloudwatch.HistogramDataPoint, filename string) error { + values, counts := dist.ValuesAndCounts() + + data := make(map[string]any) + data["values"] = values + data["counts"] = counts + data["sum"] = dist.Sum() + + jsonData, err := json.MarshalIndent(data, "", " ") + if err != nil { + return err + } + + return os.WriteFile(filename, jsonData, 0644) +} diff --git a/pkg/aws/cloudwatch/histograms/test_cases_test.go b/pkg/aws/cloudwatch/histograms/test_cases_test.go index 5aad6d90996d8..920f835a1a263 100644 --- a/pkg/aws/cloudwatch/histograms/test_cases_test.go +++ b/pkg/aws/cloudwatch/histograms/test_cases_test.go @@ -96,6 +96,40 @@ func checkFeasibility(histogramInput HistogramInput) (bool, string) { } } + if hi.Max != nil { + if math.IsNaN(*hi.Max) { + return false, "max is NaN" + } + if math.IsInf(*hi.Max, 0) { + return false, "max is +/-inf" + } + } + + if hi.Max != nil { + if math.IsNaN(*hi.Min) { + return false, "min is NaN" + } + if math.IsInf(*hi.Min, 0) { + return false, "min is +/-inf" + } + } + + if math.IsNaN(hi.Sum) { + return false, "sum is NaN" + } + if math.IsInf(hi.Sum, 0) { + return false, "sum is +/-inf" + } + + for _, bound := range hi.Boundaries { + if math.IsNaN(bound) { + return false, "boundary is NaN" + } + if math.IsInf(bound, 0) { + return false, "boundary is +/-inf" + } + } + // Rest of checks only apply if we have boundaries/counts if lenBoundaries > 0 || lenCounts > 0 { // Check boundaries are in ascending order diff --git a/pkg/aws/cloudwatch/histograms/testdata/.gitignore b/pkg/aws/cloudwatch/histograms/testdata/.gitignore new file mode 100644 index 0000000000000..0f0d6bdaf849c --- /dev/null +++ b/pkg/aws/cloudwatch/histograms/testdata/.gitignore @@ -0,0 +1,3 @@ +comparisons/ +exponential/ +input/ \ No newline at end of file diff --git a/pkg/aws/cloudwatch/histograms/testdata/histogram_mappings.py b/pkg/aws/cloudwatch/histograms/testdata/histogram_mappings.py new file mode 100644 index 0000000000000..6fa92abf6be25 --- /dev/null +++ b/pkg/aws/cloudwatch/histograms/testdata/histogram_mappings.py @@ -0,0 +1,169 @@ +import argparse +import json +import math +import matplotlib.pyplot as plt +import numpy as np +import os +import pdb + +from pathlib import Path +from typing import Dict, List, Tuple + +def plot_input_histogram(data, ax, title: str, color: str): + """Plot input histogram using exact bucket boundaries.""" + boundaries = data.get('Boundaries', []) + counts = data['Counts'] + min_val = data.get('Min') + max_val = data.get('Max') + summ = data.get('Sum') + total_count = sum(counts) + + # Handle case with no boundaries (single bucket) + if not boundaries or len(boundaries) == 0: + if min_val is not None and max_val is not None: + left_edges = [min_val] + widths = [max_val - min_val] + else: + # Use arbitrary range if no min/max + left_edges = [-10] + widths = [20] + else: + # Calculate exact bucket edges and widths + left_edges = [] + widths = [] + + for i in range(len(counts)): + if i == 0: + # First bucket: from min to first boundary + left = min_val if min_val is not None else boundaries[0] - (boundaries[1] - boundaries[0]) if len(boundaries) > 1 else boundaries[0] - 10 + right = boundaries[0] + elif i == len(counts) - 1: + # Last bucket: from last boundary to max + left = boundaries[i-1] + right = max_val if max_val is not None else boundaries[i-1] + (boundaries[i-1] - boundaries[i-2]) if len(boundaries) > 1 else boundaries[i-1] + 10 + else: + # Middle buckets: between boundaries + left = boundaries[i-1] + right = boundaries[i] + + left_edges.append(left) + widths.append(right - left) + + ax.bar(left_edges, counts, width=widths, alpha=0.7, edgecolor='black', linewidth=0.8, color=color, align='edge') + ax.set_title(f'{title} (Count: {total_count}, Sum: {summ})') + ax.set_ylabel('Counts') + ax.grid(True, alpha=0.3) + +def plot_cw_histogram_bars(histogram: Dict[float, float], histogram_min: float, histogram_max: float, histogram_sum: float, ax, title: str, color: str): + """Plot histogram bars on given axes.""" + values = sorted(histogram.keys()) + counts = [histogram[v] for v in values] + total_count = sum(counts) + + if len(values) == 1: + # Single bar case + width = (histogram_max - histogram_min) * 0.8 + ax.bar(values, counts, width=width, alpha=0.7, edgecolor='black', linewidth=1.5, color=color) + else: + # Calculate minimum gap to prevent overlaps + gaps = [values[i+1] - values[i] for i in range(len(values)-1)] + min_gap = min(gaps) + max_width = min_gap * 0.8 # Use 80% of minimum gap + + widths = [] + for i in range(len(values)): + if i == 0: + # First bar: extend to histogram_min or use half-gap to next + left_space = values[0] - histogram_min + right_space = (values[1] - values[0]) / 2 if len(values) > 1 else (histogram_max - values[0]) + width = min(left_space + right_space, max_width) + elif i == len(values) - 1: + # Last bar: extend to histogram_max or use half-gap from previous + left_space = (values[i] - values[i-1]) / 2 + right_space = histogram_max - values[i] + width = min(left_space + right_space, max_width) + else: + # Middle bars: use half-gaps on both sides + left_space = (values[i] - values[i-1]) / 2 + right_space = (values[i+1] - values[i]) / 2 + width = min(left_space + right_space, max_width) + + widths.append(width) + + ax.bar(values, counts, width=widths, alpha=0.7, edgecolor='black', linewidth=0.8, color=color) + + ax.scatter(values, counts, color='red', s=50, zorder=5) + ax.set_title(f'{title} (Count: {total_count}, Sum: {histogram_sum})') + ax.set_ylabel('Counts') + ax.grid(True, alpha=0.3) + +def load_json_data(filepath): + """Load histogram data from JSON file.""" + with open(filepath, 'r') as f: + data = json.load(f) + return data['values'], data['counts'], data['sum'] + +def load_input_histogram(filepath): + """Load input histogram format.""" + with open(filepath, 'r') as f: + data = json.load(f) + return data + +def plot_all_folders_comparison(json_filename): + """Plot the same JSON file from all folders for comparison.""" + base_path = Path('.') + folders = ['input', 'exponential'] + colors = ['black', 'green'] + + fig, ax = plt.subplots(len(folders), 1, figsize=(12, 20)) + + i = -1 + for folder, color in zip(folders, colors): + i += 1 + filepath = base_path / folder / (json_filename+".json") + if filepath.exists(): + try: + if folder == 'input': + data = load_input_histogram(filepath) + plot_input_histogram(data, ax[i], f'{folder.capitalize()} Mapping', color) + else: + values, counts, summ = load_json_data(filepath) + if not values: # Skip if no values + continue + hist = {values[j]: counts[j] for j in range(len(values))} + plot_cw_histogram_bars(hist, min(values), max(values), summ, ax[i], f'{folder.capitalize()} Mapping', color) + except Exception as e: + print(f"Error processing {filepath}: {e}") + + plt.tight_layout() + plt.savefig(f"comparisons/{json_filename}.png", dpi=300, bbox_inches='tight') + plt.show() + +# Example usage +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Process histogram mappings') + parser.add_argument('dataset', nargs='?', help='Optional dataset name to process') + args = parser.parse_args() + + os.makedirs('comparisons', exist_ok=True) + + input_path = Path('./input') + if input_path.exists(): + if args.dataset: + # Process specific dataset if provided + dataset_file = input_path / f"{args.dataset}.json" + if dataset_file.exists(): + print(f"Processing {args.dataset}...") + plot_all_folders_comparison(args.dataset) + else: + print(f"Dataset '{args.dataset}' not found in input folder.") + else: + # Process all datasets if no specific dataset provided + json_files = [f.stem for f in input_path.iterdir() if f.suffix == '.json'] + for json_file in json_files: + print(f"Processing {json_file}...") + plot_all_folders_comparison(json_file) + else: + print("Input folder not found.") + + diff --git a/pkg/aws/tools/generator.go b/pkg/aws/tools/generator.go index ff5e44c78389f..698e109d936dc 100644 --- a/pkg/aws/tools/generator.go +++ b/pkg/aws/tools/generator.go @@ -86,20 +86,6 @@ func generateSamples(config DistributionConfig, rng *rand.Rand) []float64 { return data } -// gammaRandom generates a random sample from a gamma distribution with the given shape parameter. -// It uses the Marsaglia and Tsang method (2000) for efficient gamma random number generation. -// -// For shape < 1, it uses the transformation property: if X ~ Gamma(shape+1, 1), then -// X * U^(1/shape) ~ Gamma(shape, 1) where U ~ Uniform(0,1). -// -// For shape >= 1, it uses the squeeze acceptance method which is highly efficient -// with an acceptance rate > 95% for most shape values. -// -// Parameters: -// - shape: the shape parameter (α) of the gamma distribution, must be > 0 -// - rng: random number generator for sampling -// -// Returns: a random sample from Gamma(shape, 1) distribution func gammaRandom(shape float64, rng *rand.Rand) float64 { if shape < 1 { return gammaRandom(shape+1, rng) * math.Pow(rng.Float64(), 1/shape)