From f63e995250f4d597bb44023b30ccf330fac8ca77 Mon Sep 17 00:00:00 2001 From: Yuval Mandelboum Date: Fri, 18 Jul 2025 15:40:48 -0700 Subject: [PATCH 1/4] feat(errors): Add error upsampling aggregation functions with feature flag Allow projects with error upsampling to use new sample_count(), sample_eps() and sample_epm() function columns in Discover, returning the non extrapolated versions of these functions. --- src/sentry/api/bases/organization_events.py | 38 ++--- src/sentry/api/helpers/error_upsampling.py | 8 +- .../api/endpoints/test_organization_events.py | 155 ++++++++++++++++++ 3 files changed, 179 insertions(+), 22 deletions(-) diff --git a/src/sentry/api/bases/organization_events.py b/src/sentry/api/bases/organization_events.py index d18a53e92fcc60..e86c8a2d8f6f66 100644 --- a/src/sentry/api/bases/organization_events.py +++ b/src/sentry/api/bases/organization_events.py @@ -435,26 +435,24 @@ def handle_error_upsampling(self, project_ids: Sequence[int], results: dict[str, data = results.get("data", []) fields_meta = results.get("meta", {}).get("fields", {}) - for result in data: - if "count" in result: - result["count()"] = result["count"] - del result["count"] - if "eps" in result: - result["eps()"] = result["eps"] - del result["eps"] - if "epm" in result: - result["epm()"] = result["epm"] - del result["epm"] - - if "count" in fields_meta: - fields_meta["count()"] = fields_meta["count"] - del fields_meta["count"] - if "eps" in fields_meta: - fields_meta["eps()"] = fields_meta["eps"] - del fields_meta["eps"] - if "epm" in fields_meta: - fields_meta["epm()"] = fields_meta["epm"] - del fields_meta["epm"] + upsampling_affected_functions = [ + "count", + "eps", + "epm", + "sample_count", + "sample_eps", + "sample_epm", + ] + for function in upsampling_affected_functions: + for result in data: + if function in result: + result[f"{function}()"] = result[function] + del result[function] + + for function in upsampling_affected_functions: + if function in fields_meta: + fields_meta[f"{function}()"] = fields_meta[function] + del fields_meta[function] def handle_issues( self, results: Sequence[Any], project_ids: Sequence[int], organization: Organization diff --git a/src/sentry/api/helpers/error_upsampling.py b/src/sentry/api/helpers/error_upsampling.py index 990803a4f4d35a..a7c5eb273774ff 100644 --- a/src/sentry/api/helpers/error_upsampling.py +++ b/src/sentry/api/helpers/error_upsampling.py @@ -55,12 +55,16 @@ def transform_query_columns_for_error_upsampling(query_columns: Sequence[str]) - if column_lower == "count()": transformed_columns.append("upsampled_count() as count") - elif column_lower == "eps()": transformed_columns.append("upsampled_eps() as eps") - elif column_lower == "epm()": transformed_columns.append("upsampled_epm() as epm") + elif column_lower == "sample_count()": + transformed_columns.append("count() as sample_count") + elif column_lower == "sample_eps()": + transformed_columns.append("eps() as sample_eps") + elif column_lower == "sample_epm()": + transformed_columns.append("epm() as sample_epm") else: transformed_columns.append(column) diff --git a/tests/snuba/api/endpoints/test_organization_events.py b/tests/snuba/api/endpoints/test_organization_events.py index e0c9896310dfae..eef49d6b8e881a 100644 --- a/tests/snuba/api/endpoints/test_organization_events.py +++ b/tests/snuba/api/endpoints/test_organization_events.py @@ -6822,6 +6822,161 @@ def test_error_upsampling_with_partial_allowlist(self): # Expect upsampling since any project is allowlisted (both events upsampled: 10 + 10 = 20) assert response.data["data"][0]["count()"] == 20 + def test_sample_count_with_allowlisted_project(self): + """Test that sample_count() returns raw sample count (not upsampled) for allowlisted projects.""" + # Set up allowlisted project + with self.options({"issues.client_error_sampling.project_allowlist": [self.project.id]}): + # Store error event with error_sampling context + self.store_event( + data={ + "event_id": "a" * 32, + "message": "Error event for sample_count", + "type": "error", + "exception": [{"type": "ValueError", "value": "Something went wrong"}], + "timestamp": self.ten_mins_ago_iso, + "fingerprint": ["group1"], + "contexts": {"error_sampling": {"client_sample_rate": 0.1}}, + }, + project_id=self.project.id, + ) + + # Store error event without error_sampling context (sample_weight = null should count as 1) + self.store_event( + data={ + "event_id": "a1" * 16, + "message": "Error event without sampling", + "type": "error", + "exception": [{"type": "ValueError", "value": "Something else went wrong"}], + "timestamp": self.ten_mins_ago_iso, + "fingerprint": ["group1_no_sampling"], + }, + project_id=self.project.id, + ) + + # Test with errors dataset - sample_count() should return raw count, not upsampled + query = { + "field": ["sample_count()"], + "statsPeriod": "2h", + "query": "event.type:error", + "dataset": "errors", + } + response = self.do_request(query) + assert response.status_code == 200, response.content + # Expect sample_count to return raw count: 2 events (not upsampled 11) + assert response.data["data"][0]["sample_count()"] == 2 + + # Check meta information + meta = response.data["meta"] + assert "fields" in meta + assert "sample_count()" in meta["fields"] + assert meta["fields"]["sample_count()"] == "integer" + + def test_sample_eps_with_allowlisted_project(self): + """Test that sample_eps() returns raw sample rate (not upsampled) for allowlisted projects.""" + # Set up allowlisted project + with self.options({"issues.client_error_sampling.project_allowlist": [self.project.id]}): + # Store error event with error_sampling context + self.store_event( + data={ + "event_id": "b" * 32, + "message": "Error event for sample_eps", + "type": "error", + "exception": [{"type": "ValueError", "value": "Something went wrong"}], + "timestamp": self.ten_mins_ago_iso, + "fingerprint": ["group2"], + "contexts": {"error_sampling": {"client_sample_rate": 0.1}}, + }, + project_id=self.project.id, + ) + + # Store error event without error_sampling context (sample_weight = null should count as 1) + self.store_event( + data={ + "event_id": "b1" * 16, + "message": "Error event without sampling for sample_eps", + "type": "error", + "exception": [{"type": "ValueError", "value": "Something else went wrong"}], + "timestamp": self.ten_mins_ago_iso, + "fingerprint": ["group2_no_sampling"], + }, + project_id=self.project.id, + ) + + # Test with errors dataset - sample_eps() should return raw rate, not upsampled + query = { + "field": ["sample_eps()"], + "statsPeriod": "2h", + "query": "event.type:error", + "dataset": "errors", + } + response = self.do_request(query) + assert response.status_code == 200, response.content + # Expect sample_eps to return raw rate: 2 events / 7200 seconds = 2/7200 + expected_sample_eps = 2 / 7200 + actual_sample_eps = response.data["data"][0]["sample_eps()"] + assert ( + abs(actual_sample_eps - expected_sample_eps) < 0.0001 + ) # Allow small rounding differences + + # Check meta information + meta = response.data["meta"] + assert "fields" in meta + assert "sample_eps()" in meta["fields"] + assert meta["fields"]["sample_eps()"] == "rate" + + def test_sample_epm_with_allowlisted_project(self): + """Test that sample_epm() returns raw sample rate (not upsampled) for allowlisted projects.""" + # Set up allowlisted project + with self.options({"issues.client_error_sampling.project_allowlist": [self.project.id]}): + # Store error event with error_sampling context + self.store_event( + data={ + "event_id": "c" * 32, + "message": "Error event for sample_epm", + "type": "error", + "exception": [{"type": "ValueError", "value": "Something went wrong"}], + "timestamp": self.ten_mins_ago_iso, + "fingerprint": ["group3"], + "contexts": {"error_sampling": {"client_sample_rate": 0.1}}, + }, + project_id=self.project.id, + ) + + # Store error event without error_sampling context (sample_weight = null should count as 1) + self.store_event( + data={ + "event_id": "c1" * 16, + "message": "Error event without sampling for sample_epm", + "type": "error", + "exception": [{"type": "ValueError", "value": "Something else went wrong"}], + "timestamp": self.ten_mins_ago_iso, + "fingerprint": ["group3_no_sampling"], + }, + project_id=self.project.id, + ) + + # Test with errors dataset - sample_epm() should return raw rate, not upsampled + query = { + "field": ["sample_epm()"], + "statsPeriod": "2h", + "query": "event.type:error", + "dataset": "errors", + } + response = self.do_request(query) + assert response.status_code == 200, response.content + # Expect sample_epm to return raw rate: 2 events / 120 minutes = 2/120 + expected_sample_epm = 2 / 120 + actual_sample_epm = response.data["data"][0]["sample_epm()"] + assert ( + abs(actual_sample_epm - expected_sample_epm) < 0.001 + ) # Allow small rounding differences + + # Check meta information + meta = response.data["meta"] + assert "fields" in meta + assert "sample_epm()" in meta["fields"] + assert meta["fields"]["sample_epm()"] == "rate" + def test_is_status(self): self.store_event( data={ From 51823eef5a27e5399108385a621c781ef044f6e6 Mon Sep 17 00:00:00 2001 From: Yuval Mandelboum Date: Sat, 19 Jul 2025 18:10:12 -0700 Subject: [PATCH 2/4] minor fix - unified for loops --- src/sentry/api/bases/organization_events.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sentry/api/bases/organization_events.py b/src/sentry/api/bases/organization_events.py index e86c8a2d8f6f66..b07bb177acd854 100644 --- a/src/sentry/api/bases/organization_events.py +++ b/src/sentry/api/bases/organization_events.py @@ -449,7 +449,6 @@ def handle_error_upsampling(self, project_ids: Sequence[int], results: dict[str, result[f"{function}()"] = result[function] del result[function] - for function in upsampling_affected_functions: if function in fields_meta: fields_meta[f"{function}()"] = fields_meta[function] del fields_meta[function] From 2f4a0feca8b47fea92aee8c4596f430fc645dfd8 Mon Sep 17 00:00:00 2001 From: Yuval Mandelboum Date: Mon, 21 Jul 2025 15:02:07 -0700 Subject: [PATCH 3/4] refactor handle_error_sampling fix issues with double conversion and moved logic into helper --- src/sentry/api/bases/organization_events.py | 29 +++-------- src/sentry/api/helpers/error_upsampling.py | 54 +++++++++++++++++++++ 2 files changed, 62 insertions(+), 21 deletions(-) diff --git a/src/sentry/api/bases/organization_events.py b/src/sentry/api/bases/organization_events.py index 38d3f104b09066..c4b09d1312690b 100644 --- a/src/sentry/api/bases/organization_events.py +++ b/src/sentry/api/bases/organization_events.py @@ -18,7 +18,10 @@ from sentry.api.base import CURSOR_LINK_HEADER from sentry.api.bases import NoProjects from sentry.api.bases.organization import FilterParamsDateNotNull, OrganizationEndpoint -from sentry.api.helpers.error_upsampling import are_any_projects_error_upsampled +from sentry.api.helpers.error_upsampling import ( + are_any_projects_error_upsampled, + convert_fields_for_upsampling, +) from sentry.api.helpers.mobile import get_readable_device_name from sentry.api.helpers.teams import get_teams from sentry.api.serializers.snuba import SnubaTSResultSerializer @@ -425,30 +428,14 @@ def handle_data( def handle_error_upsampling(self, project_ids: Sequence[int], results: dict[str, Any]): """ - If the query is for error upsampled projects, we need to rename the fields to include the () - and update the data and meta fields to reflect the new field names. This works around a limitation in - how aliases are handled in the SnQL parser. + If the query is for error upsampled projects, we convert various functions under the hood. + We need to rename these fields before returning the results to the client, to hide the conversion. + This is done here to work around a limitation in how aliases are handled in the SnQL parser. """ if are_any_projects_error_upsampled(project_ids): data = results.get("data", []) fields_meta = results.get("meta", {}).get("fields", {}) - - function_conversions = { - "upsampled_count()": "count()", - "upsampled_eps()": "eps()", - "upsampled_epm()": "epm()", - } - - # Go over each both data and meta, and convert function names to the non-upsampled version - for upsampled_function, count_function in function_conversions.items(): - for result in data: - if upsampled_function in result: - result[count_function] = result[upsampled_function] - del result[upsampled_function] - - if upsampled_function in fields_meta: - fields_meta[count_function] = fields_meta[upsampled_function] - del fields_meta[upsampled_function] + convert_fields_for_upsampling(data, fields_meta) def handle_issues( self, results: Sequence[Any], project_ids: Sequence[int], organization: Organization diff --git a/src/sentry/api/helpers/error_upsampling.py b/src/sentry/api/helpers/error_upsampling.py index 169e147eab63e8..6dc2b5ac673a41 100644 --- a/src/sentry/api/helpers/error_upsampling.py +++ b/src/sentry/api/helpers/error_upsampling.py @@ -10,6 +10,57 @@ UPSAMPLED_ERROR_AGGREGATION = "upsampled_count" +# Function key conversions for error upsampling results +_FUNCTION_KEY_CONVERSIONS = { + "count()": "sample_count()", + "eps()": "sample_eps()", + "epm()": "sample_epm()", + "upsampled_count()": "count()", + "upsampled_eps()": "eps()", + "upsampled_epm()": "epm()", +} + +# Pre-computed ordered keys to handle conversion conflicts +# Keys that are targets of other conversions must be processed first +_conversion_targets = set(_FUNCTION_KEY_CONVERSIONS.values()) +_ORDERED_CONVERSION_KEYS = sorted( + _FUNCTION_KEY_CONVERSIONS.keys(), key=lambda k: k not in _conversion_targets +) + + +def convert_fields_for_upsampling(data: list[dict[str, Any]], fields_meta: dict[str, str]) -> None: + """ + Convert field names in query results for error upsampled projects. + This renames upsampled_* functions to their standard names and standard functions + to sample_* equivalents to hide the conversion from the client. + + Args: + data: List of result dictionaries to modify in-place + fields_meta: Meta fields dictionary to modify in-place + """ + # Collect keys that need conversion and exist in data + all_present_keys = set() + for result in data: + all_present_keys.update(result.keys()) + + # Filter the pre-ordered list to only include keys actually present + keys_to_convert = [key for key in _ORDERED_CONVERSION_KEYS if key in all_present_keys] + + # Apply conversions to data + for result in data: + for original_key in keys_to_convert: + if original_key in result: + converted_key = _FUNCTION_KEY_CONVERSIONS[original_key] + result[converted_key] = result[original_key] + del result[original_key] + + # Apply conversions to fields_meta + for original_key in keys_to_convert: + if original_key in fields_meta: + converted_key = _FUNCTION_KEY_CONVERSIONS[original_key] + fields_meta[converted_key] = fields_meta[original_key] + del fields_meta[original_key] + def is_errors_query_for_error_upsampled_projects( snuba_params: SnubaParams, @@ -55,6 +106,9 @@ def transform_query_columns_for_error_upsampling( "count()": "upsampled_count()", "eps()": "upsampled_eps()", "epm()": "upsampled_epm()", + "sample_count()": "count()", + "sample_eps()": "eps()", + "sample_epm()": "epm()", } transformed_columns = [] From 54fddecc3a216bac33a9ba24e6e7ca2178cad3bb Mon Sep 17 00:00:00 2001 From: Yuval Mandelboum Date: Mon, 21 Jul 2025 15:11:18 -0700 Subject: [PATCH 4/4] mypy fix --- src/sentry/api/helpers/error_upsampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/api/helpers/error_upsampling.py b/src/sentry/api/helpers/error_upsampling.py index 6dc2b5ac673a41..b131b407af260a 100644 --- a/src/sentry/api/helpers/error_upsampling.py +++ b/src/sentry/api/helpers/error_upsampling.py @@ -39,7 +39,7 @@ def convert_fields_for_upsampling(data: list[dict[str, Any]], fields_meta: dict[ fields_meta: Meta fields dictionary to modify in-place """ # Collect keys that need conversion and exist in data - all_present_keys = set() + all_present_keys: set[str] = set() for result in data: all_present_keys.update(result.keys())