Skip to content

Commit af5153b

Browse files
authored
feat(errors): Events API backend - Add error upsampling aggregation functions (#95940)
Events API now supports new sample_count(), sample_eps() and sample_epm() function columns in Discover for error upsampling projects, which return the original non extrapolated versions of these functions.
1 parent b46477f commit af5153b

File tree

3 files changed

+217
-21
lines changed

3 files changed

+217
-21
lines changed

src/sentry/api/bases/organization_events.py

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818
from sentry.api.base import CURSOR_LINK_HEADER
1919
from sentry.api.bases import NoProjects
2020
from sentry.api.bases.organization import FilterParamsDateNotNull, OrganizationEndpoint
21-
from sentry.api.helpers.error_upsampling import are_any_projects_error_upsampled
21+
from sentry.api.helpers.error_upsampling import (
22+
are_any_projects_error_upsampled,
23+
convert_fields_for_upsampling,
24+
)
2225
from sentry.api.helpers.mobile import get_readable_device_name
2326
from sentry.api.helpers.teams import get_teams
2427
from sentry.api.serializers.snuba import SnubaTSResultSerializer
@@ -425,30 +428,14 @@ def handle_data(
425428

426429
def handle_error_upsampling(self, project_ids: Sequence[int], results: dict[str, Any]):
427430
"""
428-
If the query is for error upsampled projects, we need to rename the fields to include the ()
429-
and update the data and meta fields to reflect the new field names. This works around a limitation in
430-
how aliases are handled in the SnQL parser.
431+
If the query is for error upsampled projects, we convert various functions under the hood.
432+
We need to rename these fields before returning the results to the client, to hide the conversion.
433+
This is done here to work around a limitation in how aliases are handled in the SnQL parser.
431434
"""
432435
if are_any_projects_error_upsampled(project_ids):
433436
data = results.get("data", [])
434437
fields_meta = results.get("meta", {}).get("fields", {})
435-
436-
function_conversions = {
437-
"upsampled_count()": "count()",
438-
"upsampled_eps()": "eps()",
439-
"upsampled_epm()": "epm()",
440-
}
441-
442-
# Go over each both data and meta, and convert function names to the non-upsampled version
443-
for upsampled_function, count_function in function_conversions.items():
444-
for result in data:
445-
if upsampled_function in result:
446-
result[count_function] = result[upsampled_function]
447-
del result[upsampled_function]
448-
449-
if upsampled_function in fields_meta:
450-
fields_meta[count_function] = fields_meta[upsampled_function]
451-
del fields_meta[upsampled_function]
438+
convert_fields_for_upsampling(data, fields_meta)
452439

453440
def handle_issues(
454441
self, results: Sequence[Any], project_ids: Sequence[int], organization: Organization

src/sentry/api/helpers/error_upsampling.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,57 @@
1010

1111
UPSAMPLED_ERROR_AGGREGATION = "upsampled_count"
1212

13+
# Function key conversions for error upsampling results
14+
_FUNCTION_KEY_CONVERSIONS = {
15+
"count()": "sample_count()",
16+
"eps()": "sample_eps()",
17+
"epm()": "sample_epm()",
18+
"upsampled_count()": "count()",
19+
"upsampled_eps()": "eps()",
20+
"upsampled_epm()": "epm()",
21+
}
22+
23+
# Pre-computed ordered keys to handle conversion conflicts
24+
# Keys that are targets of other conversions must be processed first
25+
_conversion_targets = set(_FUNCTION_KEY_CONVERSIONS.values())
26+
_ORDERED_CONVERSION_KEYS = sorted(
27+
_FUNCTION_KEY_CONVERSIONS.keys(), key=lambda k: k not in _conversion_targets
28+
)
29+
30+
31+
def convert_fields_for_upsampling(data: list[dict[str, Any]], fields_meta: dict[str, str]) -> None:
32+
"""
33+
Convert field names in query results for error upsampled projects.
34+
This renames upsampled_* functions to their standard names and standard functions
35+
to sample_* equivalents to hide the conversion from the client.
36+
37+
Args:
38+
data: List of result dictionaries to modify in-place
39+
fields_meta: Meta fields dictionary to modify in-place
40+
"""
41+
# Collect keys that need conversion and exist in data
42+
all_present_keys: set[str] = set()
43+
for result in data:
44+
all_present_keys.update(result.keys())
45+
46+
# Filter the pre-ordered list to only include keys actually present
47+
keys_to_convert = [key for key in _ORDERED_CONVERSION_KEYS if key in all_present_keys]
48+
49+
# Apply conversions to data
50+
for result in data:
51+
for original_key in keys_to_convert:
52+
if original_key in result:
53+
converted_key = _FUNCTION_KEY_CONVERSIONS[original_key]
54+
result[converted_key] = result[original_key]
55+
del result[original_key]
56+
57+
# Apply conversions to fields_meta
58+
for original_key in keys_to_convert:
59+
if original_key in fields_meta:
60+
converted_key = _FUNCTION_KEY_CONVERSIONS[original_key]
61+
fields_meta[converted_key] = fields_meta[original_key]
62+
del fields_meta[original_key]
63+
1364

1465
def is_errors_query_for_error_upsampled_projects(
1566
snuba_params: SnubaParams,
@@ -55,6 +106,9 @@ def transform_query_columns_for_error_upsampling(
55106
"count()": "upsampled_count()",
56107
"eps()": "upsampled_eps()",
57108
"epm()": "upsampled_epm()",
109+
"sample_count()": "count()",
110+
"sample_eps()": "eps()",
111+
"sample_epm()": "epm()",
58112
}
59113

60114
transformed_columns = []

tests/snuba/api/endpoints/test_organization_events.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6822,6 +6822,161 @@ def test_error_upsampling_with_partial_allowlist(self):
68226822
# Expect upsampling since any project is allowlisted (both events upsampled: 10 + 10 = 20)
68236823
assert response.data["data"][0]["count()"] == 20
68246824

6825+
def test_sample_count_with_allowlisted_project(self):
6826+
"""Test that sample_count() returns raw sample count (not upsampled) for allowlisted projects."""
6827+
# Set up allowlisted project
6828+
with self.options({"issues.client_error_sampling.project_allowlist": [self.project.id]}):
6829+
# Store error event with error_sampling context
6830+
self.store_event(
6831+
data={
6832+
"event_id": "a" * 32,
6833+
"message": "Error event for sample_count",
6834+
"type": "error",
6835+
"exception": [{"type": "ValueError", "value": "Something went wrong"}],
6836+
"timestamp": self.ten_mins_ago_iso,
6837+
"fingerprint": ["group1"],
6838+
"contexts": {"error_sampling": {"client_sample_rate": 0.1}},
6839+
},
6840+
project_id=self.project.id,
6841+
)
6842+
6843+
# Store error event without error_sampling context (sample_weight = null should count as 1)
6844+
self.store_event(
6845+
data={
6846+
"event_id": "a1" * 16,
6847+
"message": "Error event without sampling",
6848+
"type": "error",
6849+
"exception": [{"type": "ValueError", "value": "Something else went wrong"}],
6850+
"timestamp": self.ten_mins_ago_iso,
6851+
"fingerprint": ["group1_no_sampling"],
6852+
},
6853+
project_id=self.project.id,
6854+
)
6855+
6856+
# Test with errors dataset - sample_count() should return raw count, not upsampled
6857+
query = {
6858+
"field": ["sample_count()"],
6859+
"statsPeriod": "2h",
6860+
"query": "event.type:error",
6861+
"dataset": "errors",
6862+
}
6863+
response = self.do_request(query)
6864+
assert response.status_code == 200, response.content
6865+
# Expect sample_count to return raw count: 2 events (not upsampled 11)
6866+
assert response.data["data"][0]["sample_count()"] == 2
6867+
6868+
# Check meta information
6869+
meta = response.data["meta"]
6870+
assert "fields" in meta
6871+
assert "sample_count()" in meta["fields"]
6872+
assert meta["fields"]["sample_count()"] == "integer"
6873+
6874+
def test_sample_eps_with_allowlisted_project(self):
6875+
"""Test that sample_eps() returns raw sample rate (not upsampled) for allowlisted projects."""
6876+
# Set up allowlisted project
6877+
with self.options({"issues.client_error_sampling.project_allowlist": [self.project.id]}):
6878+
# Store error event with error_sampling context
6879+
self.store_event(
6880+
data={
6881+
"event_id": "b" * 32,
6882+
"message": "Error event for sample_eps",
6883+
"type": "error",
6884+
"exception": [{"type": "ValueError", "value": "Something went wrong"}],
6885+
"timestamp": self.ten_mins_ago_iso,
6886+
"fingerprint": ["group2"],
6887+
"contexts": {"error_sampling": {"client_sample_rate": 0.1}},
6888+
},
6889+
project_id=self.project.id,
6890+
)
6891+
6892+
# Store error event without error_sampling context (sample_weight = null should count as 1)
6893+
self.store_event(
6894+
data={
6895+
"event_id": "b1" * 16,
6896+
"message": "Error event without sampling for sample_eps",
6897+
"type": "error",
6898+
"exception": [{"type": "ValueError", "value": "Something else went wrong"}],
6899+
"timestamp": self.ten_mins_ago_iso,
6900+
"fingerprint": ["group2_no_sampling"],
6901+
},
6902+
project_id=self.project.id,
6903+
)
6904+
6905+
# Test with errors dataset - sample_eps() should return raw rate, not upsampled
6906+
query = {
6907+
"field": ["sample_eps()"],
6908+
"statsPeriod": "2h",
6909+
"query": "event.type:error",
6910+
"dataset": "errors",
6911+
}
6912+
response = self.do_request(query)
6913+
assert response.status_code == 200, response.content
6914+
# Expect sample_eps to return raw rate: 2 events / 7200 seconds = 2/7200
6915+
expected_sample_eps = 2 / 7200
6916+
actual_sample_eps = response.data["data"][0]["sample_eps()"]
6917+
assert (
6918+
abs(actual_sample_eps - expected_sample_eps) < 0.0001
6919+
) # Allow small rounding differences
6920+
6921+
# Check meta information
6922+
meta = response.data["meta"]
6923+
assert "fields" in meta
6924+
assert "sample_eps()" in meta["fields"]
6925+
assert meta["fields"]["sample_eps()"] == "rate"
6926+
6927+
def test_sample_epm_with_allowlisted_project(self):
6928+
"""Test that sample_epm() returns raw sample rate (not upsampled) for allowlisted projects."""
6929+
# Set up allowlisted project
6930+
with self.options({"issues.client_error_sampling.project_allowlist": [self.project.id]}):
6931+
# Store error event with error_sampling context
6932+
self.store_event(
6933+
data={
6934+
"event_id": "c" * 32,
6935+
"message": "Error event for sample_epm",
6936+
"type": "error",
6937+
"exception": [{"type": "ValueError", "value": "Something went wrong"}],
6938+
"timestamp": self.ten_mins_ago_iso,
6939+
"fingerprint": ["group3"],
6940+
"contexts": {"error_sampling": {"client_sample_rate": 0.1}},
6941+
},
6942+
project_id=self.project.id,
6943+
)
6944+
6945+
# Store error event without error_sampling context (sample_weight = null should count as 1)
6946+
self.store_event(
6947+
data={
6948+
"event_id": "c1" * 16,
6949+
"message": "Error event without sampling for sample_epm",
6950+
"type": "error",
6951+
"exception": [{"type": "ValueError", "value": "Something else went wrong"}],
6952+
"timestamp": self.ten_mins_ago_iso,
6953+
"fingerprint": ["group3_no_sampling"],
6954+
},
6955+
project_id=self.project.id,
6956+
)
6957+
6958+
# Test with errors dataset - sample_epm() should return raw rate, not upsampled
6959+
query = {
6960+
"field": ["sample_epm()"],
6961+
"statsPeriod": "2h",
6962+
"query": "event.type:error",
6963+
"dataset": "errors",
6964+
}
6965+
response = self.do_request(query)
6966+
assert response.status_code == 200, response.content
6967+
# Expect sample_epm to return raw rate: 2 events / 120 minutes = 2/120
6968+
expected_sample_epm = 2 / 120
6969+
actual_sample_epm = response.data["data"][0]["sample_epm()"]
6970+
assert (
6971+
abs(actual_sample_epm - expected_sample_epm) < 0.001
6972+
) # Allow small rounding differences
6973+
6974+
# Check meta information
6975+
meta = response.data["meta"]
6976+
assert "fields" in meta
6977+
assert "sample_epm()" in meta["fields"]
6978+
assert meta["fields"]["sample_epm()"] == "rate"
6979+
68256980
def test_is_status(self):
68266981
self.store_event(
68276982
data={

0 commit comments

Comments
 (0)