From f63e995250f4d597bb44023b30ccf330fac8ca77 Mon Sep 17 00:00:00 2001
From: Yuval Mandelboum <yuval.mandelboum@sentry.io>
Date: Fri, 18 Jul 2025 15:40:48 -0700
Subject: [PATCH 1/4] feat(errors): Add error upsampling aggregation functions
 with feature flag

Allow projects with error upsampling to use new sample_count(), sample_eps() and
sample_epm() function columns in Discover, returning the non extrapolated versions of these functions.
---
 src/sentry/api/bases/organization_events.py   |  38 ++---
 src/sentry/api/helpers/error_upsampling.py    |   8 +-
 .../api/endpoints/test_organization_events.py | 155 ++++++++++++++++++
 3 files changed, 179 insertions(+), 22 deletions(-)

diff --git a/src/sentry/api/bases/organization_events.py b/src/sentry/api/bases/organization_events.py
index d18a53e92fcc60..e86c8a2d8f6f66 100644
--- a/src/sentry/api/bases/organization_events.py
+++ b/src/sentry/api/bases/organization_events.py
@@ -435,26 +435,24 @@ def handle_error_upsampling(self, project_ids: Sequence[int], results: dict[str,
             data = results.get("data", [])
             fields_meta = results.get("meta", {}).get("fields", {})
 
-            for result in data:
-                if "count" in result:
-                    result["count()"] = result["count"]
-                    del result["count"]
-                if "eps" in result:
-                    result["eps()"] = result["eps"]
-                    del result["eps"]
-                if "epm" in result:
-                    result["epm()"] = result["epm"]
-                    del result["epm"]
-
-            if "count" in fields_meta:
-                fields_meta["count()"] = fields_meta["count"]
-                del fields_meta["count"]
-            if "eps" in fields_meta:
-                fields_meta["eps()"] = fields_meta["eps"]
-                del fields_meta["eps"]
-            if "epm" in fields_meta:
-                fields_meta["epm()"] = fields_meta["epm"]
-                del fields_meta["epm"]
+            upsampling_affected_functions = [
+                "count",
+                "eps",
+                "epm",
+                "sample_count",
+                "sample_eps",
+                "sample_epm",
+            ]
+            for function in upsampling_affected_functions:
+                for result in data:
+                    if function in result:
+                        result[f"{function}()"] = result[function]
+                        del result[function]
+
+            for function in upsampling_affected_functions:
+                if function in fields_meta:
+                    fields_meta[f"{function}()"] = fields_meta[function]
+                    del fields_meta[function]
 
     def handle_issues(
         self, results: Sequence[Any], project_ids: Sequence[int], organization: Organization
diff --git a/src/sentry/api/helpers/error_upsampling.py b/src/sentry/api/helpers/error_upsampling.py
index 990803a4f4d35a..a7c5eb273774ff 100644
--- a/src/sentry/api/helpers/error_upsampling.py
+++ b/src/sentry/api/helpers/error_upsampling.py
@@ -55,12 +55,16 @@ def transform_query_columns_for_error_upsampling(query_columns: Sequence[str]) -
 
         if column_lower == "count()":
             transformed_columns.append("upsampled_count() as count")
-
         elif column_lower == "eps()":
             transformed_columns.append("upsampled_eps() as eps")
-
         elif column_lower == "epm()":
             transformed_columns.append("upsampled_epm() as epm")
+        elif column_lower == "sample_count()":
+            transformed_columns.append("count() as sample_count")
+        elif column_lower == "sample_eps()":
+            transformed_columns.append("eps() as sample_eps")
+        elif column_lower == "sample_epm()":
+            transformed_columns.append("epm() as sample_epm")
         else:
             transformed_columns.append(column)
 
diff --git a/tests/snuba/api/endpoints/test_organization_events.py b/tests/snuba/api/endpoints/test_organization_events.py
index e0c9896310dfae..eef49d6b8e881a 100644
--- a/tests/snuba/api/endpoints/test_organization_events.py
+++ b/tests/snuba/api/endpoints/test_organization_events.py
@@ -6822,6 +6822,161 @@ def test_error_upsampling_with_partial_allowlist(self):
             # Expect upsampling since any project is allowlisted (both events upsampled: 10 + 10 = 20)
             assert response.data["data"][0]["count()"] == 20
 
+    def test_sample_count_with_allowlisted_project(self):
+        """Test that sample_count() returns raw sample count (not upsampled) for allowlisted projects."""
+        # Set up allowlisted project
+        with self.options({"issues.client_error_sampling.project_allowlist": [self.project.id]}):
+            # Store error event with error_sampling context
+            self.store_event(
+                data={
+                    "event_id": "a" * 32,
+                    "message": "Error event for sample_count",
+                    "type": "error",
+                    "exception": [{"type": "ValueError", "value": "Something went wrong"}],
+                    "timestamp": self.ten_mins_ago_iso,
+                    "fingerprint": ["group1"],
+                    "contexts": {"error_sampling": {"client_sample_rate": 0.1}},
+                },
+                project_id=self.project.id,
+            )
+
+            # Store error event without error_sampling context (sample_weight = null should count as 1)
+            self.store_event(
+                data={
+                    "event_id": "a1" * 16,
+                    "message": "Error event without sampling",
+                    "type": "error",
+                    "exception": [{"type": "ValueError", "value": "Something else went wrong"}],
+                    "timestamp": self.ten_mins_ago_iso,
+                    "fingerprint": ["group1_no_sampling"],
+                },
+                project_id=self.project.id,
+            )
+
+            # Test with errors dataset - sample_count() should return raw count, not upsampled
+            query = {
+                "field": ["sample_count()"],
+                "statsPeriod": "2h",
+                "query": "event.type:error",
+                "dataset": "errors",
+            }
+            response = self.do_request(query)
+            assert response.status_code == 200, response.content
+            # Expect sample_count to return raw count: 2 events (not upsampled 11)
+            assert response.data["data"][0]["sample_count()"] == 2
+
+            # Check meta information
+            meta = response.data["meta"]
+            assert "fields" in meta
+            assert "sample_count()" in meta["fields"]
+            assert meta["fields"]["sample_count()"] == "integer"
+
+    def test_sample_eps_with_allowlisted_project(self):
+        """Test that sample_eps() returns raw sample rate (not upsampled) for allowlisted projects."""
+        # Set up allowlisted project
+        with self.options({"issues.client_error_sampling.project_allowlist": [self.project.id]}):
+            # Store error event with error_sampling context
+            self.store_event(
+                data={
+                    "event_id": "b" * 32,
+                    "message": "Error event for sample_eps",
+                    "type": "error",
+                    "exception": [{"type": "ValueError", "value": "Something went wrong"}],
+                    "timestamp": self.ten_mins_ago_iso,
+                    "fingerprint": ["group2"],
+                    "contexts": {"error_sampling": {"client_sample_rate": 0.1}},
+                },
+                project_id=self.project.id,
+            )
+
+            # Store error event without error_sampling context (sample_weight = null should count as 1)
+            self.store_event(
+                data={
+                    "event_id": "b1" * 16,
+                    "message": "Error event without sampling for sample_eps",
+                    "type": "error",
+                    "exception": [{"type": "ValueError", "value": "Something else went wrong"}],
+                    "timestamp": self.ten_mins_ago_iso,
+                    "fingerprint": ["group2_no_sampling"],
+                },
+                project_id=self.project.id,
+            )
+
+            # Test with errors dataset - sample_eps() should return raw rate, not upsampled
+            query = {
+                "field": ["sample_eps()"],
+                "statsPeriod": "2h",
+                "query": "event.type:error",
+                "dataset": "errors",
+            }
+            response = self.do_request(query)
+            assert response.status_code == 200, response.content
+            # Expect sample_eps to return raw rate: 2 events / 7200 seconds = 2/7200
+            expected_sample_eps = 2 / 7200
+            actual_sample_eps = response.data["data"][0]["sample_eps()"]
+            assert (
+                abs(actual_sample_eps - expected_sample_eps) < 0.0001
+            )  # Allow small rounding differences
+
+            # Check meta information
+            meta = response.data["meta"]
+            assert "fields" in meta
+            assert "sample_eps()" in meta["fields"]
+            assert meta["fields"]["sample_eps()"] == "rate"
+
+    def test_sample_epm_with_allowlisted_project(self):
+        """Test that sample_epm() returns raw sample rate (not upsampled) for allowlisted projects."""
+        # Set up allowlisted project
+        with self.options({"issues.client_error_sampling.project_allowlist": [self.project.id]}):
+            # Store error event with error_sampling context
+            self.store_event(
+                data={
+                    "event_id": "c" * 32,
+                    "message": "Error event for sample_epm",
+                    "type": "error",
+                    "exception": [{"type": "ValueError", "value": "Something went wrong"}],
+                    "timestamp": self.ten_mins_ago_iso,
+                    "fingerprint": ["group3"],
+                    "contexts": {"error_sampling": {"client_sample_rate": 0.1}},
+                },
+                project_id=self.project.id,
+            )
+
+            # Store error event without error_sampling context (sample_weight = null should count as 1)
+            self.store_event(
+                data={
+                    "event_id": "c1" * 16,
+                    "message": "Error event without sampling for sample_epm",
+                    "type": "error",
+                    "exception": [{"type": "ValueError", "value": "Something else went wrong"}],
+                    "timestamp": self.ten_mins_ago_iso,
+                    "fingerprint": ["group3_no_sampling"],
+                },
+                project_id=self.project.id,
+            )
+
+            # Test with errors dataset - sample_epm() should return raw rate, not upsampled
+            query = {
+                "field": ["sample_epm()"],
+                "statsPeriod": "2h",
+                "query": "event.type:error",
+                "dataset": "errors",
+            }
+            response = self.do_request(query)
+            assert response.status_code == 200, response.content
+            # Expect sample_epm to return raw rate: 2 events / 120 minutes = 2/120
+            expected_sample_epm = 2 / 120
+            actual_sample_epm = response.data["data"][0]["sample_epm()"]
+            assert (
+                abs(actual_sample_epm - expected_sample_epm) < 0.001
+            )  # Allow small rounding differences
+
+            # Check meta information
+            meta = response.data["meta"]
+            assert "fields" in meta
+            assert "sample_epm()" in meta["fields"]
+            assert meta["fields"]["sample_epm()"] == "rate"
+
     def test_is_status(self):
         self.store_event(
             data={

From 51823eef5a27e5399108385a621c781ef044f6e6 Mon Sep 17 00:00:00 2001
From: Yuval Mandelboum <yuval.mandelboum@sentry.io>
Date: Sat, 19 Jul 2025 18:10:12 -0700
Subject: [PATCH 2/4] minor fix - unified for loops

---
 src/sentry/api/bases/organization_events.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/sentry/api/bases/organization_events.py b/src/sentry/api/bases/organization_events.py
index e86c8a2d8f6f66..b07bb177acd854 100644
--- a/src/sentry/api/bases/organization_events.py
+++ b/src/sentry/api/bases/organization_events.py
@@ -449,7 +449,6 @@ def handle_error_upsampling(self, project_ids: Sequence[int], results: dict[str,
                         result[f"{function}()"] = result[function]
                         del result[function]
 
-            for function in upsampling_affected_functions:
                 if function in fields_meta:
                     fields_meta[f"{function}()"] = fields_meta[function]
                     del fields_meta[function]

From 2f4a0feca8b47fea92aee8c4596f430fc645dfd8 Mon Sep 17 00:00:00 2001
From: Yuval Mandelboum <yuval.mandelboum@sentry.io>
Date: Mon, 21 Jul 2025 15:02:07 -0700
Subject: [PATCH 3/4] refactor handle_error_sampling

fix issues with double conversion and moved logic into helper
---
 src/sentry/api/bases/organization_events.py | 29 +++--------
 src/sentry/api/helpers/error_upsampling.py  | 54 +++++++++++++++++++++
 2 files changed, 62 insertions(+), 21 deletions(-)

diff --git a/src/sentry/api/bases/organization_events.py b/src/sentry/api/bases/organization_events.py
index 38d3f104b09066..c4b09d1312690b 100644
--- a/src/sentry/api/bases/organization_events.py
+++ b/src/sentry/api/bases/organization_events.py
@@ -18,7 +18,10 @@
 from sentry.api.base import CURSOR_LINK_HEADER
 from sentry.api.bases import NoProjects
 from sentry.api.bases.organization import FilterParamsDateNotNull, OrganizationEndpoint
-from sentry.api.helpers.error_upsampling import are_any_projects_error_upsampled
+from sentry.api.helpers.error_upsampling import (
+    are_any_projects_error_upsampled,
+    convert_fields_for_upsampling,
+)
 from sentry.api.helpers.mobile import get_readable_device_name
 from sentry.api.helpers.teams import get_teams
 from sentry.api.serializers.snuba import SnubaTSResultSerializer
@@ -425,30 +428,14 @@ def handle_data(
 
     def handle_error_upsampling(self, project_ids: Sequence[int], results: dict[str, Any]):
         """
-        If the query is for error upsampled projects, we need to rename the fields to include the ()
-        and update the data and meta fields to reflect the new field names. This works around a limitation in
-        how aliases are handled in the SnQL parser.
+        If the query is for error upsampled projects, we convert various functions under the hood.
+        We need to rename these fields before returning the results to the client, to hide the conversion.
+        This is done here to work around a limitation in how aliases are handled in the SnQL parser.
         """
         if are_any_projects_error_upsampled(project_ids):
             data = results.get("data", [])
             fields_meta = results.get("meta", {}).get("fields", {})
-
-            function_conversions = {
-                "upsampled_count()": "count()",
-                "upsampled_eps()": "eps()",
-                "upsampled_epm()": "epm()",
-            }
-
-            # Go over each both data and meta, and convert function names to the non-upsampled version
-            for upsampled_function, count_function in function_conversions.items():
-                for result in data:
-                    if upsampled_function in result:
-                        result[count_function] = result[upsampled_function]
-                        del result[upsampled_function]
-
-                if upsampled_function in fields_meta:
-                    fields_meta[count_function] = fields_meta[upsampled_function]
-                    del fields_meta[upsampled_function]
+            convert_fields_for_upsampling(data, fields_meta)
 
     def handle_issues(
         self, results: Sequence[Any], project_ids: Sequence[int], organization: Organization
diff --git a/src/sentry/api/helpers/error_upsampling.py b/src/sentry/api/helpers/error_upsampling.py
index 169e147eab63e8..6dc2b5ac673a41 100644
--- a/src/sentry/api/helpers/error_upsampling.py
+++ b/src/sentry/api/helpers/error_upsampling.py
@@ -10,6 +10,57 @@
 
 UPSAMPLED_ERROR_AGGREGATION = "upsampled_count"
 
+# Function key conversions for error upsampling results
+_FUNCTION_KEY_CONVERSIONS = {
+    "count()": "sample_count()",
+    "eps()": "sample_eps()",
+    "epm()": "sample_epm()",
+    "upsampled_count()": "count()",
+    "upsampled_eps()": "eps()",
+    "upsampled_epm()": "epm()",
+}
+
+# Pre-computed ordered keys to handle conversion conflicts
+# Keys that are targets of other conversions must be processed first
+_conversion_targets = set(_FUNCTION_KEY_CONVERSIONS.values())
+_ORDERED_CONVERSION_KEYS = sorted(
+    _FUNCTION_KEY_CONVERSIONS.keys(), key=lambda k: k not in _conversion_targets
+)
+
+
+def convert_fields_for_upsampling(data: list[dict[str, Any]], fields_meta: dict[str, str]) -> None:
+    """
+    Convert field names in query results for error upsampled projects.
+    This renames upsampled_* functions to their standard names and standard functions
+    to sample_* equivalents to hide the conversion from the client.
+
+    Args:
+        data: List of result dictionaries to modify in-place
+        fields_meta: Meta fields dictionary to modify in-place
+    """
+    # Collect keys that need conversion and exist in data
+    all_present_keys = set()
+    for result in data:
+        all_present_keys.update(result.keys())
+
+    # Filter the pre-ordered list to only include keys actually present
+    keys_to_convert = [key for key in _ORDERED_CONVERSION_KEYS if key in all_present_keys]
+
+    # Apply conversions to data
+    for result in data:
+        for original_key in keys_to_convert:
+            if original_key in result:
+                converted_key = _FUNCTION_KEY_CONVERSIONS[original_key]
+                result[converted_key] = result[original_key]
+                del result[original_key]
+
+    # Apply conversions to fields_meta
+    for original_key in keys_to_convert:
+        if original_key in fields_meta:
+            converted_key = _FUNCTION_KEY_CONVERSIONS[original_key]
+            fields_meta[converted_key] = fields_meta[original_key]
+            del fields_meta[original_key]
+
 
 def is_errors_query_for_error_upsampled_projects(
     snuba_params: SnubaParams,
@@ -55,6 +106,9 @@ def transform_query_columns_for_error_upsampling(
         "count()": "upsampled_count()",
         "eps()": "upsampled_eps()",
         "epm()": "upsampled_epm()",
+        "sample_count()": "count()",
+        "sample_eps()": "eps()",
+        "sample_epm()": "epm()",
     }
 
     transformed_columns = []

From 54fddecc3a216bac33a9ba24e6e7ca2178cad3bb Mon Sep 17 00:00:00 2001
From: Yuval Mandelboum <yuval.mandelboum@sentry.io>
Date: Mon, 21 Jul 2025 15:11:18 -0700
Subject: [PATCH 4/4] mypy fix

---
 src/sentry/api/helpers/error_upsampling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sentry/api/helpers/error_upsampling.py b/src/sentry/api/helpers/error_upsampling.py
index 6dc2b5ac673a41..b131b407af260a 100644
--- a/src/sentry/api/helpers/error_upsampling.py
+++ b/src/sentry/api/helpers/error_upsampling.py
@@ -39,7 +39,7 @@ def convert_fields_for_upsampling(data: list[dict[str, Any]], fields_meta: dict[
         fields_meta: Meta fields dictionary to modify in-place
     """
     # Collect keys that need conversion and exist in data
-    all_present_keys = set()
+    all_present_keys: set[str] = set()
     for result in data:
         all_present_keys.update(result.keys())