feat(errors): Add error upsampling aggregation functions with feature flag

yuvmen · yuvmen · commit f63e995250f4 · 2025-07-18T15:40:48.000-07:00
Allow projects with error upsampling to use new sample_count(), sample_eps() and
sample_epm() function columns in Discover, returning the non extrapolated versions of these functions.
diff --git a/src/sentry/api/bases/organization_events.py b/src/sentry/api/bases/organization_events.py
@@ -435,26 +435,24 @@ def handle_error_upsampling(self, project_ids: Sequence[int], results: dict[str,
             data = results.get("data", [])
             fields_meta = results.get("meta", {}).get("fields", {})
 
-            for result in data:
-                if "count" in result:
-                    result["count()"] = result["count"]
-                    del result["count"]
-                if "eps" in result:
-                    result["eps()"] = result["eps"]
-                    del result["eps"]
-                if "epm" in result:
-                    result["epm()"] = result["epm"]
-                    del result["epm"]
-
-            if "count" in fields_meta:
-                fields_meta["count()"] = fields_meta["count"]
-                del fields_meta["count"]
-            if "eps" in fields_meta:
-                fields_meta["eps()"] = fields_meta["eps"]
-                del fields_meta["eps"]
-            if "epm" in fields_meta:
-                fields_meta["epm()"] = fields_meta["epm"]
-                del fields_meta["epm"]
+            upsampling_affected_functions = [
+                "count",
+                "eps",
+                "epm",
+                "sample_count",
+                "sample_eps",
+                "sample_epm",
+            ]
+            for function in upsampling_affected_functions:
+                for result in data:
+                    if function in result:
+                        result[f"{function}()"] = result[function]
+                        del result[function]
+
+            for function in upsampling_affected_functions:
+                if function in fields_meta:
+                    fields_meta[f"{function}()"] = fields_meta[function]
+                    del fields_meta[function]
 
     def handle_issues(
         self, results: Sequence[Any], project_ids: Sequence[int], organization: Organization
diff --git a/src/sentry/api/helpers/error_upsampling.py b/src/sentry/api/helpers/error_upsampling.py
@@ -55,12 +55,16 @@ def transform_query_columns_for_error_upsampling(query_columns: Sequence[str]) -
 
         if column_lower == "count()":
             transformed_columns.append("upsampled_count() as count")
-
         elif column_lower == "eps()":
             transformed_columns.append("upsampled_eps() as eps")
-
         elif column_lower == "epm()":
             transformed_columns.append("upsampled_epm() as epm")
+        elif column_lower == "sample_count()":
+            transformed_columns.append("count() as sample_count")
+        elif column_lower == "sample_eps()":
+            transformed_columns.append("eps() as sample_eps")
+        elif column_lower == "sample_epm()":
+            transformed_columns.append("epm() as sample_epm")
         else:
             transformed_columns.append(column)
 
diff --git a/tests/snuba/api/endpoints/test_organization_events.py b/tests/snuba/api/endpoints/test_organization_events.py
@@ -6822,6 +6822,161 @@ def test_error_upsampling_with_partial_allowlist(self):
             # Expect upsampling since any project is allowlisted (both events upsampled: 10 + 10 = 20)
             assert response.data["data"][0]["count()"] == 20
 
+    def test_sample_count_with_allowlisted_project(self):
+        """Test that sample_count() returns raw sample count (not upsampled) for allowlisted projects."""
+        # Set up allowlisted project
+        with self.options({"issues.client_error_sampling.project_allowlist": [self.project.id]}):
+            # Store error event with error_sampling context
+            self.store_event(
+                data={
+                    "event_id": "a" * 32,
+                    "message": "Error event for sample_count",
+                    "type": "error",
+                    "exception": [{"type": "ValueError", "value": "Something went wrong"}],
+                    "timestamp": self.ten_mins_ago_iso,
+                    "fingerprint": ["group1"],
+                    "contexts": {"error_sampling": {"client_sample_rate": 0.1}},
+                },
+                project_id=self.project.id,
+            )
+
+            # Store error event without error_sampling context (sample_weight = null should count as 1)
+            self.store_event(
+                data={
+                    "event_id": "a1" * 16,
+                    "message": "Error event without sampling",
+                    "type": "error",
+                    "exception": [{"type": "ValueError", "value": "Something else went wrong"}],
+                    "timestamp": self.ten_mins_ago_iso,
+                    "fingerprint": ["group1_no_sampling"],
+                },
+                project_id=self.project.id,
+            )
+
+            # Test with errors dataset - sample_count() should return raw count, not upsampled
+            query = {
+                "field": ["sample_count()"],
+                "statsPeriod": "2h",
+                "query": "event.type:error",
+                "dataset": "errors",
+            }
+            response = self.do_request(query)
+            assert response.status_code == 200, response.content
+            # Expect sample_count to return raw count: 2 events (not upsampled 11)
+            assert response.data["data"][0]["sample_count()"] == 2
+
+            # Check meta information
+            meta = response.data["meta"]
+            assert "fields" in meta
+            assert "sample_count()" in meta["fields"]
+            assert meta["fields"]["sample_count()"] == "integer"
+
+    def test_sample_eps_with_allowlisted_project(self):
+        """Test that sample_eps() returns raw sample rate (not upsampled) for allowlisted projects."""
+        # Set up allowlisted project
+        with self.options({"issues.client_error_sampling.project_allowlist": [self.project.id]}):
+            # Store error event with error_sampling context
+            self.store_event(
+                data={
+                    "event_id": "b" * 32,
+                    "message": "Error event for sample_eps",
+                    "type": "error",
+                    "exception": [{"type": "ValueError", "value": "Something went wrong"}],
+                    "timestamp": self.ten_mins_ago_iso,
+                    "fingerprint": ["group2"],
+                    "contexts": {"error_sampling": {"client_sample_rate": 0.1}},
+                },
+                project_id=self.project.id,
+            )
+
+            # Store error event without error_sampling context (sample_weight = null should count as 1)
+            self.store_event(
+                data={
+                    "event_id": "b1" * 16,
+                    "message": "Error event without sampling for sample_eps",
+                    "type": "error",
+                    "exception": [{"type": "ValueError", "value": "Something else went wrong"}],
+                    "timestamp": self.ten_mins_ago_iso,
+                    "fingerprint": ["group2_no_sampling"],
+                },
+                project_id=self.project.id,
+            )
+
+            # Test with errors dataset - sample_eps() should return raw rate, not upsampled
+            query = {
+                "field": ["sample_eps()"],
+                "statsPeriod": "2h",
+                "query": "event.type:error",
+                "dataset": "errors",
+            }
+            response = self.do_request(query)
+            assert response.status_code == 200, response.content
+            # Expect sample_eps to return raw rate: 2 events / 7200 seconds = 2/7200
+            expected_sample_eps = 2 / 7200
+            actual_sample_eps = response.data["data"][0]["sample_eps()"]
+            assert (
+                abs(actual_sample_eps - expected_sample_eps) < 0.0001
+            )  # Allow small rounding differences
+
+            # Check meta information
+            meta = response.data["meta"]
+            assert "fields" in meta
+            assert "sample_eps()" in meta["fields"]
+            assert meta["fields"]["sample_eps()"] == "rate"
+
+    def test_sample_epm_with_allowlisted_project(self):
+        """Test that sample_epm() returns raw sample rate (not upsampled) for allowlisted projects."""
+        # Set up allowlisted project
+        with self.options({"issues.client_error_sampling.project_allowlist": [self.project.id]}):
+            # Store error event with error_sampling context
+            self.store_event(
+                data={
+                    "event_id": "c" * 32,
+                    "message": "Error event for sample_epm",
+                    "type": "error",
+                    "exception": [{"type": "ValueError", "value": "Something went wrong"}],
+                    "timestamp": self.ten_mins_ago_iso,
+                    "fingerprint": ["group3"],
+                    "contexts": {"error_sampling": {"client_sample_rate": 0.1}},
+                },
+                project_id=self.project.id,
+            )
+
+            # Store error event without error_sampling context (sample_weight = null should count as 1)
+            self.store_event(
+                data={
+                    "event_id": "c1" * 16,
+                    "message": "Error event without sampling for sample_epm",
+                    "type": "error",
+                    "exception": [{"type": "ValueError", "value": "Something else went wrong"}],
+                    "timestamp": self.ten_mins_ago_iso,
+                    "fingerprint": ["group3_no_sampling"],
+                },
+                project_id=self.project.id,
+            )
+
+            # Test with errors dataset - sample_epm() should return raw rate, not upsampled
+            query = {
+                "field": ["sample_epm()"],
+                "statsPeriod": "2h",
+                "query": "event.type:error",
+                "dataset": "errors",
+            }
+            response = self.do_request(query)
+            assert response.status_code == 200, response.content
+            # Expect sample_epm to return raw rate: 2 events / 120 minutes = 2/120
+            expected_sample_epm = 2 / 120
+            actual_sample_epm = response.data["data"][0]["sample_epm()"]
+            assert (
+                abs(actual_sample_epm - expected_sample_epm) < 0.001
+            )  # Allow small rounding differences
+
+            # Check meta information
+            meta = response.data["meta"]
+            assert "fields" in meta
+            assert "sample_epm()" in meta["fields"]
+            assert meta["fields"]["sample_epm()"] == "rate"
+
     def test_is_status(self):
         self.store_event(
             data={