From 6c9472d3c393d06c04cf1a2f3c6adb16a84f5250 Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Fri, 18 Jul 2025 14:29:08 -0700 Subject: [PATCH 1/8] Create Seer RPC methods to fetch data needed for index --- src/sentry/seer/endpoints/seer_rpc.py | 10 + src/sentry/seer/explorer/index_data.py | 471 +++++++++++++++ src/sentry/seer/explorer/models.py | 68 +++ src/sentry/seer/explorer/utils.py | 66 +++ tests/sentry/seer/explorer/test_index_data.py | 371 ++++++++++++ tests/sentry/seer/explorer/test_utils.py | 559 ++++++++++++++++++ 6 files changed, 1545 insertions(+) create mode 100644 src/sentry/seer/explorer/index_data.py create mode 100644 src/sentry/seer/explorer/models.py create mode 100644 src/sentry/seer/explorer/utils.py create mode 100644 tests/sentry/seer/explorer/test_index_data.py create mode 100644 tests/sentry/seer/explorer/test_utils.py diff --git a/src/sentry/seer/endpoints/seer_rpc.py b/src/sentry/seer/endpoints/seer_rpc.py index 08b97f6e199c3a..6018bd057645f2 100644 --- a/src/sentry/seer/endpoints/seer_rpc.py +++ b/src/sentry/seer/endpoints/seer_rpc.py @@ -55,6 +55,12 @@ from sentry.search.eap.utils import can_expose_attribute from sentry.search.events.types import SnubaParams from sentry.seer.autofix.autofix_tools import get_error_event_details, get_profile_details +from sentry.seer.explorer.index_data import ( + get_issues_for_transaction, + get_profiles_for_trace, + get_trace_for_transaction, + get_transactions_for_project, +) from sentry.seer.fetch_issues.fetch_issues import ( get_issues_related_to_file_patches, get_issues_related_to_function_names, @@ -557,6 +563,10 @@ def get_github_enterprise_integration_config( "get_attribute_names": get_attribute_names, "get_attribute_values_with_substring": get_attribute_values_with_substring, "get_attributes_and_values": get_attributes_and_values, + "get_transactions_for_project": get_transactions_for_project, + "get_trace_for_transaction": get_trace_for_transaction, + "get_profiles_for_trace": get_profiles_for_trace, + "get_issues_for_transaction": get_issues_for_transaction, "get_github_enterprise_integration_config": get_github_enterprise_integration_config, } diff --git a/src/sentry/seer/explorer/index_data.py b/src/sentry/seer/explorer/index_data.py new file mode 100644 index 00000000000000..d580291f0480b5 --- /dev/null +++ b/src/sentry/seer/explorer/index_data.py @@ -0,0 +1,471 @@ +import logging +from datetime import UTC, datetime, timedelta +from typing import Any + +import orjson + +from sentry import search +from sentry.api.event_search import parse_search_query +from sentry.api.serializers.base import serialize +from sentry.api.serializers.models.event import EventSerializer +from sentry.eventstore import backend as eventstore +from sentry.eventstore.models import GroupEvent +from sentry.models.project import Project +from sentry.profiles.utils import get_from_profiling_service +from sentry.search.eap.types import SearchResolverConfig +from sentry.search.events.types import SnubaParams +from sentry.seer.explorer.models import ( + IssueData, + ProfileData, + Span, + TraceData, + TraceProfiles, + Transaction, + TransactionIssues, +) +from sentry.seer.explorer.utils import convert_profile_to_execution_tree, normalize_description +from sentry.snuba import spans_rpc +from sentry.snuba.referrer import Referrer + +logger = logging.getLogger(__name__) + + +def get_transactions_for_project(project_id: int) -> list[Transaction]: + """ + Get a list of transactions for a project using EAP, sorted by volume/traffic. + + Args: + project_id: The ID of the project to fetch transactions for + + Returns: + List of transactions with name and project id + """ + try: + project = Project.objects.get(id=project_id) + except Project.DoesNotExist: + logger.exception( + "Project does not exist; cannot fetch transactions", extra={"project_id": project_id} + ) + return [] + + end_time = datetime.now(UTC) + start_time = end_time - timedelta(hours=24) + + snuba_params = SnubaParams( + start=start_time, + end=end_time, + projects=[project], + organization=project.organization, + ) + config = SearchResolverConfig( + auto_fields=True, + ) + + # Query EAP for transactions with volume metrics + result = spans_rpc.run_table_query( + params=snuba_params, + query_string=f"is_transaction:true project.id:{project_id}", + selected_columns=[ + "transaction", + "count()", + ], + orderby=["-count()"], # Sort by count descending (highest volume first) + offset=0, + limit=500, + referrer=Referrer.SEER_RPC, + config=config, + sampling_mode="NORMAL", + ) + + # Extract transaction data from the result + transactions = [] + seen_names = set() + for row in result.get("data", []): + name = row.get("transaction") + normalized_name = normalize_description(name) + if normalized_name in seen_names: + continue + seen_names.add(normalized_name) + transactions.append( + Transaction( + name=normalized_name, + project_id=project_id, + ) + ) + + return transactions + + +def get_trace_for_transaction(transaction_name: str, project_id: int) -> TraceData | None: + """ + Get a sample trace for a given transaction, choosing the one with median span count. + + Args: + transaction_name: The name of the transaction to find traces for + project_id: The ID of the project + + Returns: + TraceData with all spans and relationships, or None if no traces found + """ + try: + project = Project.objects.get(id=project_id) + except Project.DoesNotExist: + logger.exception( + "Project does not exist; cannot fetch traces", + extra={"project_id": project_id, "transaction_name": transaction_name}, + ) + return None + + end_time = datetime.now(UTC) + start_time = end_time - timedelta(hours=24) + + snuba_params = SnubaParams( + start=start_time, + end=end_time, + projects=[project], + organization=project.organization, + ) + config = SearchResolverConfig( + auto_fields=True, + ) + + # Step 1: Get trace IDs with their span counts in a single query + traces_result = spans_rpc.run_table_query( + params=snuba_params, + query_string=f"transaction:{transaction_name} project.id:{project_id}", + selected_columns=[ + "trace", + "count()", # This counts all spans in each trace + ], + orderby=["-count()"], + offset=0, + limit=20, # Get more candidates to choose from + referrer=Referrer.SEER_RPC, + config=config, + sampling_mode="NORMAL", + ) + + trace_span_counts = [] + for row in traces_result.get("data", []): + trace_id = row.get("trace") + span_count = row.get("count()", 0) + if trace_id and span_count > 0: + trace_span_counts.append((trace_id, span_count)) + + if not trace_span_counts: + logger.info( + "No traces found for transaction", + extra={"transaction_name": transaction_name, "project_id": project_id}, + ) + return None + + # Choose trace with median span count + trace_span_counts.sort(key=lambda x: x[1]) # Sort by span count + median_index = len(trace_span_counts) // 2 + chosen_trace_id, total_spans = trace_span_counts[median_index] + + # Step 2: Get all spans in the chosen trace + spans_result = spans_rpc.run_table_query( + params=snuba_params, + query_string=f"trace:{chosen_trace_id}", + selected_columns=[ + "span_id", + "parent_span", + "span.op", + "span.description", + "precise.start_ts", + ], + orderby=["precise.start_ts"], + offset=0, + limit=1000, + referrer=Referrer.SEER_RPC, + config=config, + sampling_mode="NORMAL", + ) + + # Step 4: Build span objects + spans = [] + for row in spans_result.get("data", []): + span_id = row.get("span_id") + parent_span_id = row.get("parent_span") + span_op = row.get("span.op") + span_description = row.get("span.description") + + if span_id: + spans.append( + Span( + span_id=span_id, + parent_span_id=parent_span_id, + span_op=span_op, + span_description=normalize_description(span_description or ""), + ) + ) + + return TraceData( + trace_id=chosen_trace_id, + project_id=project_id, + transaction_name=transaction_name, + total_spans=len(spans), + spans=spans, + ) + + +def _fetch_profile_data( + profile_id: str, organization_id: int, project_id: int +) -> dict[str, Any] | None: + """ + Fetch raw profile data from the profiling service. + + Args: + profile_id: The profile ID to fetch + organization_id: Organization ID + project_id: Project ID + + Returns: + Raw profile data or None if not found + """ + response = get_from_profiling_service( + "GET", + f"/organizations/{organization_id}/projects/{project_id}/profiles/{profile_id}", + params={"format": "sample"}, + ) + + if response.status == 200: + return orjson.loads(response.data) + return None + + +def get_profiles_for_trace(trace_id: str, project_id: int) -> TraceProfiles | None: + """ + Get profiles for a given trace, with one profile per unique span/transaction. + + Args: + trace_id: The trace ID to find profiles for + project_id: The ID of the project + + Returns: + TraceProfiles with processed execution trees, or None if no profiles found + """ + try: + project = Project.objects.get(id=project_id) + except Project.DoesNotExist: + logger.exception( + "Project does not exist; cannot fetch profiles", + extra={"project_id": project_id, "trace_id": trace_id}, + ) + return None + + end_time = datetime.now(UTC) + start_time = end_time - timedelta(hours=24) + + snuba_params = SnubaParams( + start=start_time, + end=end_time, + projects=[project], + organization=project.organization, + ) + config = SearchResolverConfig( + auto_fields=True, + ) + + # Step 1: Find spans in the trace that have profile data + profiles_result = spans_rpc.run_table_query( + params=snuba_params, + query_string=f"trace:{trace_id} has:profile.id project.id:{project_id}", + selected_columns=[ + "span_id", + "profile.id", + "transaction", + "span.op", + "is_transaction", + "precise.start_ts", + ], + orderby=["precise.start_ts"], + offset=0, + limit=50, + referrer=Referrer.SEER_RPC, + config=config, + sampling_mode="NORMAL", + ) + + # Step 2: Deduplicate by span_id (one profile per span/transaction) + seen_spans = set() + unique_profiles = [] + + for row in profiles_result.get("data", []): + span_id = row.get("span_id") + profile_id = row.get("profile.id") + transaction_name = row.get("transaction") + + if not span_id or not profile_id or span_id in seen_spans: + continue + + seen_spans.add(span_id) + unique_profiles.append( + { + "span_id": span_id, + "profile_id": profile_id, + "transaction_name": transaction_name, + } + ) + + if not unique_profiles: + logger.info( + "No profiles found for trace", + extra={"trace_id": trace_id, "project_id": project_id}, + ) + return None + + # Step 3: Fetch and process each profile + processed_profiles = [] + + for profile_info in unique_profiles: + profile_id = profile_info["profile_id"] + span_id = profile_info["span_id"] + transaction_name = profile_info["transaction_name"] + + # Fetch raw profile data + raw_profile = _fetch_profile_data( + profile_id=profile_id, + organization_id=project.organization_id, + project_id=project_id, + ) + + if not raw_profile: + logger.warning( + "Failed to fetch profile data", + extra={ + "profile_id": profile_id, + "trace_id": trace_id, + "project_id": project_id, + }, + ) + continue + + # Convert to execution tree + execution_tree = convert_profile_to_execution_tree(raw_profile) + + if execution_tree: + processed_profiles.append( + ProfileData( + profile_id=profile_id, + span_id=span_id, + transaction_name=transaction_name, + execution_tree=execution_tree, + project_id=project_id, + ) + ) + + if not processed_profiles: + logger.info( + "No processable profiles found for trace", + extra={"trace_id": trace_id, "project_id": project_id}, + ) + return None + + return TraceProfiles( + trace_id=trace_id, + project_id=project_id, + profiles=processed_profiles, + ) + + +def get_issues_for_transaction(transaction_name: str, project_id: int) -> TransactionIssues | None: + """ + Get the top 3 issues for a transaction in the last 24 hours, sorted by event count. + + Args: + transaction_name: The name of the transaction to find issues for + project_id: The ID of the project + + Returns: + TransactionIssues with issue data and recommended events, or None if no issues found + """ + try: + project = Project.objects.get(id=project_id) + except Project.DoesNotExist: + logger.exception( + "Project does not exist; cannot fetch issues", + extra={"project_id": project_id, "transaction_name": transaction_name}, + ) + return None + + end_time = datetime.now(UTC) + start_time = end_time - timedelta(hours=24) + + # Step 1: Search for issues using transaction filter + try: + parsed_terms = parse_search_query(f'transaction:"{transaction_name}"') + except Exception: + logger.exception( + "Failed to parse transaction search query", + extra={"transaction_name": transaction_name, "project_id": project_id}, + ) + return None + + # Query for issues using the search backend + query_kwargs = { + "projects": [project], + "date_from": start_time, + "date_to": end_time, + "search_filters": parsed_terms, + "sort_by": "freq", + "limit": 3, + "environments": [], + } + + results_cursor = search.backend.query(**query_kwargs) + issues = list(results_cursor) + + if not issues: + logger.info( + "No issues found for transaction", + extra={"transaction_name": transaction_name, "project_id": project_id}, + ) + return None + + # Step 2: For each issue, get the recommended event and serialize it + issue_data_list = [] + for group in issues: + recommended_event = group.get_recommended_event(start=start_time, end=end_time) + if not recommended_event: + recommended_event = group.get_latest_event(start=start_time, end=end_time) + + if not recommended_event: + logger.warning( + "No event found for issue", + extra={"group_id": group.id, "transaction_name": transaction_name}, + ) + continue + + full_event: GroupEvent = eventstore.get_event_by_id( + project_id=group.project_id, + event_id=recommended_event.event_id, + group_id=group.id, + ) + + serialized_event = serialize(full_event, user=None, serializer=EventSerializer()) + + issue_data_list.append( + IssueData( + issue_id=group.id, + title=group.title, + culprit=group.culprit, + transaction=full_event.transaction, + event_count=group.times_seen, + event_data=serialized_event, + ) + ) + + if not issue_data_list: + logger.info( + "No valid issues with events found for transaction", + extra={"transaction_name": transaction_name, "project_id": project_id}, + ) + return None + + return TransactionIssues( + transaction_name=transaction_name, + project_id=project_id, + issues=issue_data_list, + ) diff --git a/src/sentry/seer/explorer/models.py b/src/sentry/seer/explorer/models.py new file mode 100644 index 00000000000000..9ed45264f01cec --- /dev/null +++ b/src/sentry/seer/explorer/models.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +from typing import Any + +from pydantic import BaseModel + + +class Transaction(BaseModel): + name: str + project_id: int + + +class Span(BaseModel): + span_id: str + parent_span_id: str | None + span_op: str | None + span_description: str | None + + +class TraceData(BaseModel): + trace_id: str + project_id: int + transaction_name: str + total_spans: int + spans: list[Span] + + +class ExecutionTreeNode(BaseModel): + function: str + module: str + filename: str + lineno: int + in_app: bool + children: list[ExecutionTreeNode] + node_id: str | None = None + sample_count: int = 0 + first_seen_ns: int | None = None + last_seen_ns: int | None = None + duration_ns: int | None = None + + +class ProfileData(BaseModel): + profile_id: str + span_id: str + transaction_name: str | None + execution_tree: list[ExecutionTreeNode] + project_id: int + + +class TraceProfiles(BaseModel): + trace_id: str + project_id: int + profiles: list[ProfileData] + + +class IssueData(BaseModel): + issue_id: int + title: str + culprit: str | None + transaction: str | None + event_count: int + event_data: dict[str, Any] # The recommended event data + + +class TransactionIssues(BaseModel): + transaction_name: str + project_id: int + issues: list[IssueData] diff --git a/src/sentry/seer/explorer/utils.py b/src/sentry/seer/explorer/utils.py new file mode 100644 index 00000000000000..5773a182d03757 --- /dev/null +++ b/src/sentry/seer/explorer/utils.py @@ -0,0 +1,66 @@ +import re + +from sentry.seer.autofix.autofix import _convert_profile_to_execution_tree +from sentry.seer.explorer.models import ExecutionTreeNode + + +def normalize_description(description: str) -> str: + """ + Normalize span descriptions by removing UUIDs, long numeric strings, + and other variable identifiers to enable aggregation. + """ + if not description: + return "" + + # Remove UUIDs (32 hex chars with or without dashes) + description = re.sub( + r"\b[a-f0-9]{8}-?[a-f0-9]{4}-?[a-f0-9]{4}-?[a-f0-9]{4}-?[a-f0-9]{12}\b", + "", + description, + flags=re.IGNORECASE, + ) + + # Remove long numeric sequences (6+ digits) + description = re.sub(r"\b\d{6,}\b", "", description) + + # Remove hex strings (8+ hex chars) + description = re.sub(r"0x[a-f0-9]{8,}\b", "0x", description, flags=re.IGNORECASE) + description = re.sub(r"\b[a-f0-9]{8,}\b", "", description, flags=re.IGNORECASE) + + # Remove timestamps + description = re.sub(r"\d{4}-\d{2}-\d{2}[T\s]\d{2}:\d{2}:\d{2}", "", description) + + # Clean up extra whitespace + description = re.sub(r"\s+", " ", description).strip() + + return description + + +def convert_profile_to_execution_tree(profile_data: dict) -> list[ExecutionTreeNode]: + """ + Converts profile data into a hierarchical representation of code execution, + including only items from the MainThread and app frames. + Calculates accurate durations for all nodes based on call stack transitions. + """ + # Use the autofix implementation to get dict-based results + dict_tree = _convert_profile_to_execution_tree(profile_data) + + def dict_to_execution_tree_node(node_dict: dict) -> ExecutionTreeNode: + """Convert a dict node to an ExecutionTreeNode Pydantic object.""" + children = [dict_to_execution_tree_node(child) for child in node_dict.get("children", [])] + + return ExecutionTreeNode( + function=node_dict.get("function", ""), + module=node_dict.get("module", ""), + filename=node_dict.get("filename", ""), + lineno=node_dict.get("lineno", 0), + in_app=node_dict.get("in_app", False), + children=children, + node_id=node_dict.get("node_id"), + sample_count=node_dict.get("sample_count", 0), + first_seen_ns=node_dict.get("first_seen_ns"), + last_seen_ns=node_dict.get("last_seen_ns"), + duration_ns=node_dict.get("duration_ns"), + ) + + return [dict_to_execution_tree_node(node) for node in dict_tree] diff --git a/tests/sentry/seer/explorer/test_index_data.py b/tests/sentry/seer/explorer/test_index_data.py new file mode 100644 index 00000000000000..cd918a7a7489fd --- /dev/null +++ b/tests/sentry/seer/explorer/test_index_data.py @@ -0,0 +1,371 @@ +import uuid +from datetime import timedelta +from unittest import mock + +import orjson + +from sentry.search.snuba.backend import EventsDatasetSnubaSearchBackend +from sentry.seer.explorer.index_data import ( + get_issues_for_transaction, + get_profiles_for_trace, + get_trace_for_transaction, + get_transactions_for_project, +) +from sentry.seer.explorer.models import ExecutionTreeNode +from sentry.testutils.cases import APITransactionTestCase, SnubaTestCase, SpanTestCase +from sentry.testutils.helpers.datetime import before_now +from tests.snuba.search.test_backend import SharedSnubaMixin + + +class TestGetTransactionsForProject(APITransactionTestCase, SnubaTestCase, SpanTestCase): + def setUp(self): + super().setUp() + self.ten_mins_ago = before_now(minutes=10) + + def test_get_transactions_for_project(self): + """Test the full end-to-end happy path for get_transactions_for_project.""" + # Create spans for different transactions with varying volumes + transactions_data = [ + ("api/users/profile", 5), # High volume + ("api/posts/create", 3), # Medium volume + ("api/health", 1), # Low volume + ] + + # Store transaction spans with different volumes + spans = [] + for transaction_name, count in transactions_data: + for i in range(count): + span = self.create_span( + { + "description": f"transaction-span-{i}", + "sentry_tags": {"transaction": transaction_name}, + "is_segment": True, # This marks it as a transaction span + }, + start_ts=self.ten_mins_ago + timedelta(minutes=i), + ) + spans.append(span) + + # Also add some non-transaction spans that should be ignored + if i < 2: # Add 2 non-transaction spans per transaction + non_tx_span = self.create_span( + { + "description": f"regular-span-{i}", + "sentry_tags": {"transaction": transaction_name}, + "is_segment": False, # This marks it as a regular span + }, + start_ts=self.ten_mins_ago + timedelta(minutes=i, seconds=30), + ) + spans.append(non_tx_span) + + self.store_spans(spans, is_eap=True) + + # Call our function + result = get_transactions_for_project(self.project.id) + + # Verify basic structure and data + assert len(result) == 3 + + # Should be sorted by volume (count) descending - only transaction spans count + transaction_names = [t.name for t in result] + assert transaction_names[0] == "api/users/profile" # Highest count (5 transaction spans) + assert transaction_names[1] == "api/posts/create" # Medium count (3 transaction spans) + assert transaction_names[2] == "api/health" # Lowest count (1 transaction span) + + # Verify all transactions have correct project_id and structure + for transaction in result: + assert transaction.project_id == self.project.id + assert hasattr(transaction, "name") + assert isinstance(transaction.name, str) + assert len(transaction.name) > 0 + + def test_get_trace_for_transaction(self): + transaction_name = "api/users/profile" + + # Create multiple traces with different span counts + traces_data = [ + (2, "trace-small"), # 2 spans - smallest + (5, "trace-medium"), # 5 spans - median + (8, "trace-large"), # 8 spans - largest + ] + + spans = [] + trace_ids = [] + + for span_count, trace_suffix in traces_data: + # Generate a unique trace ID + trace_id = uuid.uuid4().hex + trace_ids.append(trace_id) + + for i in range(span_count): + # Create spans for this trace + span = self.create_span( + { + "description": f"span-{i}-{trace_suffix}", + "sentry_tags": {"transaction": transaction_name}, + "trace_id": trace_id, + "parent_span_id": None if i == 0 else f"parent-{i-1}", + "is_segment": i == 0, # First span is the transaction span + }, + start_ts=self.ten_mins_ago + timedelta(minutes=i), + ) + spans.append(span) + + self.store_spans(spans, is_eap=True) + + # Call our function + result = get_trace_for_transaction(transaction_name, self.project.id) + + # Verify basic structure + assert result is not None + assert result.transaction_name == transaction_name + assert result.project_id == self.project.id + assert result.trace_id in trace_ids + + # Should choose the median trace (5 spans) - middle of [2, 5, 8] + assert result.total_spans == 5 + assert len(result.spans) == 5 + + # Verify all spans have correct structure and belong to the chosen trace + for span in result.spans: + assert hasattr(span, "span_id") + assert hasattr(span, "span_description") + assert hasattr(span, "parent_span_id") + assert hasattr(span, "span_op") + assert span.span_description.startswith("span-") + assert "trace-medium" in span.span_description # Should be from the median trace + + # Verify parent-child relationships are preserved + root_spans = [s for s in result.spans if s.parent_span_id is None] + assert len(root_spans) == 1 # Should have exactly one root span + + +class TestGetProfilesForTrace(APITransactionTestCase, SnubaTestCase, SpanTestCase): + def setUp(self): + super().setUp() + self.ten_mins_ago = before_now(minutes=10) + + def test_get_profiles_for_trace(self): + """Test the full end-to-end happy path for get_profiles_for_trace.""" + trace_id = "a" * 32 # Valid 32-char hex trace ID + + # Create spans and then update with profile data + span1_id = "a" * 16 # Valid 16-char hex string + span2_id = "b" * 16 # Valid 16-char hex string + profile1_id = uuid.uuid4().hex + profile2_id = uuid.uuid4().hex + + # Create spans first, then update with profile_id + span1 = self.create_span( + { + "span_id": span1_id, + "trace_id": trace_id, + "description": "GET /api/users/profile", + "sentry_tags": {"transaction": "api/users/profile", "op": "http.server"}, + "is_segment": True, + }, + start_ts=self.ten_mins_ago, + ) + span1.update({"profile_id": profile1_id}) + + span2 = self.create_span( + { + "span_id": span2_id, + "trace_id": trace_id, + "parent_span_id": span1_id, + "description": "SELECT * FROM users", + "sentry_tags": {"transaction": "api/users/profile", "op": "db.query"}, + "is_segment": False, + }, + start_ts=self.ten_mins_ago + timedelta(milliseconds=10), + ) + span2.update({"profile_id": profile2_id}) + + self.store_spans([span1, span2], is_eap=True) + + # Mock the profile service and tree conversion + with ( + mock.patch( + "sentry.seer.explorer.index_data.get_from_profiling_service" + ) as mock_service, + mock.patch( + "sentry.seer.explorer.index_data.convert_profile_to_execution_tree" + ) as mock_convert, + ): + + # Mock profile service responses + mock_response1 = mock.Mock() + mock_response1.status = 200 + mock_response1.data = orjson.dumps({"profile": "data1"}) + + mock_response2 = mock.Mock() + mock_response2.status = 200 + mock_response2.data = orjson.dumps({"profile": "data2"}) + + mock_service.side_effect = [mock_response1, mock_response2] + + # Mock execution tree conversion + mock_tree1 = [ + ExecutionTreeNode( + function="main", + module="app", + filename="main.py", + lineno=10, + in_app=True, + children=[], + node_id="node1", + sample_count=5, + ) + ] + mock_tree2 = [ + ExecutionTreeNode( + function="query", + module="db", + filename="db.py", + lineno=20, + in_app=True, + children=[], + node_id="node2", + sample_count=3, + ) + ] + mock_convert.side_effect = [mock_tree1, mock_tree2] + + # Call the function + result = get_profiles_for_trace(trace_id, self.project.id) + + # Verify the result + assert result is not None + assert result.trace_id == trace_id + assert result.project_id == self.project.id + assert len(result.profiles) == 2 + + # Check first profile + profile1 = result.profiles[0] + assert profile1.profile_id == profile1_id + assert profile1.span_id == span1_id + assert profile1.transaction_name == "api/users/profile" + assert profile1.execution_tree == mock_tree1 + assert profile1.project_id == self.project.id + + # Check second profile + profile2 = result.profiles[1] + assert profile2.profile_id == profile2_id + assert profile2.span_id == span2_id + assert profile2.transaction_name == "api/users/profile" + assert profile2.execution_tree == mock_tree2 + assert profile2.project_id == self.project.id + + # Verify service calls + assert mock_service.call_count == 2 + mock_service.assert_any_call( + "GET", + f"/organizations/{self.organization.id}/projects/{self.project.id}/profiles/{profile1_id}", + params={"format": "sample"}, + ) + mock_service.assert_any_call( + "GET", + f"/organizations/{self.organization.id}/projects/{self.project.id}/profiles/{profile2_id}", + params={"format": "sample"}, + ) + + # Verify conversion calls + assert mock_convert.call_count == 2 + mock_convert.assert_any_call({"profile": "data1"}) + mock_convert.assert_any_call({"profile": "data2"}) + + +class TestGetIssuesForTransaction(APITransactionTestCase, SpanTestCase, SharedSnubaMixin): + @property + def backend(self): + return EventsDatasetSnubaSearchBackend() + + def setUp(self): + super().setUp() + self.ten_mins_ago = before_now(minutes=10) + + def test_get_issues_for_transaction(self): + """Test the full end-to-end happy path for get_issues_for_transaction.""" + transaction_name = "api/users/profile" + + # Create some real events/issues for the transaction + # For error events, transaction should be stored as a tag + event1 = self.store_event( + data={ + "message": "Database connection failed", + "tags": [["transaction", transaction_name]], + "fingerprint": ["database-error"], + "platform": "python", + "timestamp": self.ten_mins_ago.isoformat(), + "level": "error", + }, + project_id=self.project.id, + ) + + event2 = self.store_event( + data={ + "message": "Timeout error", + "tags": [["transaction", transaction_name]], + "fingerprint": ["timeout-error"], + "platform": "python", + "timestamp": self.ten_mins_ago.isoformat(), + "level": "error", + }, + project_id=self.project.id, + ) + + event3 = self.store_event( + data={ + "message": "Permission denied", + "tags": [["transaction", transaction_name]], + "fingerprint": ["permission-error"], + "platform": "python", + "timestamp": self.ten_mins_ago.isoformat(), + "level": "error", + }, + project_id=self.project.id, + ) + + # Since search backend indexing doesn't work reliably in tests, + # let's mock the search backend to return our created groups + groups = [event1.group, event2.group, event3.group] + + # Verify transaction tags are set correctly + for group in groups: + latest_event = group.get_latest_event() + transaction_tag = latest_event.get_tag("transaction") + assert ( + transaction_tag == transaction_name + ), f"Expected transaction tag '{transaction_name}', got '{transaction_tag}'" + + # Mock the search backend to return our groups + with mock.patch("sentry.seer.explorer.index_data.search.backend.query") as mock_search: + mock_search.return_value = iter(groups) + + # Call the function + result = get_issues_for_transaction(transaction_name, self.project.id) + + # Verify the result + assert result is not None + assert result.transaction_name == transaction_name + assert result.project_id == self.project.id + assert len(result.issues) == 3 + + # Get the issues and sort them by ID for consistent ordering + issues = sorted(result.issues, key=lambda x: x.issue_id) + sorted_groups = sorted(groups, key=lambda x: x.id) + + # Check each issue matches the corresponding group + for i, (issue, group) in enumerate(zip(issues, sorted_groups)): + assert issue.issue_id == group.id + assert issue.title == group.title + assert issue.culprit == group.culprit + # transaction field in issue should come from the event tags + assert issue.transaction == transaction_name + assert issue.event_count == group.times_seen + assert "id" in issue.event_data + assert "message" in issue.event_data + # Check that the event has the transaction in its tags or serialized data + assert ( + "tags" in issue.event_data + or issue.event_data.get("transaction") == transaction_name + ) diff --git a/tests/sentry/seer/explorer/test_utils.py b/tests/sentry/seer/explorer/test_utils.py new file mode 100644 index 00000000000000..daae9c83b027ae --- /dev/null +++ b/tests/sentry/seer/explorer/test_utils.py @@ -0,0 +1,559 @@ +from sentry.seer.explorer.utils import convert_profile_to_execution_tree, normalize_description + + +class TestNormalizeDescription: + """Test cases for the normalize_description utility function.""" + + def test_normalize_description_basic(self): + """Test basic functionality without any special patterns.""" + result = normalize_description("simple description") + assert result == "simple description" + + def test_normalize_description_empty_string(self): + """Test with empty string.""" + result = normalize_description("") + assert result == "" + + def test_normalize_description_none_input(self): + """Test with None input.""" + result = normalize_description(None) + assert result == "" + + def test_normalize_description_uuid_with_dashes(self): + """Test UUID normalization with dashes.""" + result = normalize_description( + "GET /api/users/123e4567-e89b-12d3-a456-426614174000/profile" + ) + assert result == "GET /api/users//profile" + + def test_normalize_description_uuid_without_dashes(self): + """Test UUID normalization without dashes.""" + result = normalize_description("Query 123e4567e89b12d3a456426614174000 from cache") + assert result == "Query from cache" + + def test_normalize_description_multiple_uuids(self): + """Test with multiple UUIDs.""" + result = normalize_description( + "Transfer 123e4567-e89b-12d3-a456-426614174000 to 987fcdeb-51a2-43d7-8965-123456789abc" + ) + assert result == "Transfer to " + + def test_normalize_description_long_numbers(self): + """Test long numeric sequence normalization.""" + result = normalize_description("Process transaction 1234567890 with amount 999999") + assert result == "Process transaction with amount " + + def test_normalize_description_short_numbers_preserved(self): + """Test that short numbers are preserved.""" + result = normalize_description("GET /api/v1/users/123") + assert result == "GET /api/v1/users/123" + + def test_normalize_description_hex_strings_with_0x_prefix(self): + """Test hex string normalization with 0x prefix.""" + result = normalize_description("Memory address 0x1a2b3c4d5e6f7890 allocated") + assert result == "Memory address 0x allocated" + + def test_normalize_description_hex_strings_without_prefix(self): + """Test hex string normalization without prefix.""" + result = normalize_description("Hash abcdef123456789 calculated") + assert result == "Hash calculated" + + def test_normalize_description_short_hex_preserved(self): + """Test that short hex strings are preserved.""" + result = normalize_description("Color #ff0000 used") + assert result == "Color #ff0000 used" + + def test_normalize_description_timestamps(self): + """Test timestamp normalization.""" + result = normalize_description("Event at 2023-12-25T10:30:45 was processed") + assert result == "Event at was processed" + + def test_normalize_description_timestamp_with_space(self): + """Test timestamp with space separator.""" + result = normalize_description("Log entry 2023-12-25 10:30:45 created") + assert result == "Log entry created" + + def test_normalize_description_whitespace_cleanup(self): + """Test whitespace cleanup.""" + result = normalize_description(" Multiple spaces here ") + assert result == "Multiple spaces here" + + def test_normalize_description_complex_combination(self): + """Test complex case with multiple patterns.""" + result = normalize_description( + "Process 123e4567-e89b-12d3-a456-426614174000 at 2023-12-25T10:30:45 " + "with ID 1234567890 and hash 0xabcdef123456789" + ) + assert result == "Process at with ID and hash 0x" + + def test_normalize_description_sql_query(self): + """Test with SQL query containing IDs.""" + result = normalize_description( + "SELECT * FROM users WHERE id = 1234567890 AND uuid = '123e4567-e89b-12d3-a456-426614174000'" + ) + assert result == "SELECT * FROM users WHERE id = AND uuid = ''" + + def test_normalize_description_api_path(self): + """Test with API path containing various IDs.""" + result = normalize_description( + "POST /api/v2/organizations/1234567/projects/123e4567-e89b-12d3-a456-426614174000/events" + ) + assert result == "POST /api/v2/organizations//projects//events" + + +class TestConvertProfileToExecutionTree: + """Test cases for the convert_profile_to_execution_tree utility function.""" + + def test_convert_profile_empty_input(self): + """Test with empty profile data.""" + result = convert_profile_to_execution_tree({}) + assert result == [] + + def test_convert_profile_no_profile_key(self): + """Test with missing profile key.""" + result = convert_profile_to_execution_tree({"other": "data"}) + assert result == [] + + def test_convert_profile_missing_required_fields(self): + """Test with missing required fields in profile.""" + profile_data = {"profile": {"frames": []}} + result = convert_profile_to_execution_tree(profile_data) + assert result == [] + + profile_data = {"profile": {"frames": [], "stacks": []}} + result = convert_profile_to_execution_tree(profile_data) + assert result == [] + + def test_convert_profile_empty_profile_data(self): + """Test with empty but valid profile structure.""" + profile_data = { + "profile": { + "frames": [], + "stacks": [], + "samples": [], + "thread_metadata": {}, + } + } + result = convert_profile_to_execution_tree(profile_data) + assert result == [] + + def test_convert_profile_single_frame_single_sample(self): + """Test with minimal valid profile data.""" + profile_data = { + "profile": { + "frames": [ + { + "function": "main", + "module": "app", + "filename": "main.py", + "lineno": 10, + "in_app": True, + } + ], + "stacks": [[0]], # Stack with frame index 0 + "samples": [ + { + "elapsed_since_start_ns": 1000000, + "thread_id": "1", + "stack_id": 0, + } + ], + "thread_metadata": {"1": {"name": "MainThread"}}, + } + } + + result = convert_profile_to_execution_tree(profile_data) + assert len(result) == 1 + + root = result[0] + assert root.function == "main" + assert root.module == "app" + assert root.filename == "main.py" + assert root.lineno == 10 + assert root.in_app is True + assert root.sample_count == 1 + assert root.first_seen_ns == 1000000 + assert root.last_seen_ns == 1000000 + assert root.children == [] + assert root.node_id is not None + + def test_convert_profile_nested_call_stack(self): + """Test with nested call stack.""" + profile_data = { + "profile": { + "frames": [ + { + "function": "main", + "module": "app", + "filename": "main.py", + "lineno": 10, + "in_app": True, + }, + { + "function": "process_data", + "module": "app.utils", + "filename": "utils.py", + "lineno": 25, + "in_app": True, + }, + { + "function": "validate_input", + "module": "app.validators", + "filename": "validators.py", + "lineno": 15, + "in_app": True, + }, + ], + "stacks": [[0, 1, 2]], # All frames in one stack + "samples": [ + { + "elapsed_since_start_ns": 1000000, + "thread_id": "1", + "stack_id": 0, + } + ], + "thread_metadata": {"1": {"name": "MainThread"}}, + } + } + + result = convert_profile_to_execution_tree(profile_data) + assert len(result) == 1 + + root = result[0] + assert root.function == "validate_input" + assert len(root.children) == 1 + + child1 = root.children[0] + assert child1.function == "process_data" + assert len(child1.children) == 1 + + child2 = child1.children[0] + assert child2.function == "main" + assert len(child2.children) == 0 + + def test_convert_profile_multiple_samples_duration_calculation(self): + """Test duration calculation with multiple samples.""" + profile_data = { + "profile": { + "frames": [ + { + "function": "main", + "module": "app", + "filename": "main.py", + "lineno": 10, + "in_app": True, + } + ], + "stacks": [[0]], + "samples": [ + { + "elapsed_since_start_ns": 1000000, + "thread_id": "1", + "stack_id": 0, + }, + { + "elapsed_since_start_ns": 2000000, + "thread_id": "1", + "stack_id": 0, + }, + { + "elapsed_since_start_ns": 3000000, + "thread_id": "1", + "stack_id": 0, + }, + ], + "thread_metadata": {"1": {"name": "MainThread"}}, + } + } + + result = convert_profile_to_execution_tree(profile_data) + assert len(result) == 1 + + root = result[0] + assert root.sample_count == 3 + assert root.first_seen_ns == 1000000 + assert root.last_seen_ns == 3000000 + assert root.duration_ns is not None + assert root.duration_ns > 0 + + def test_convert_profile_filters_non_app_frames(self): + """Test that non-app frames are filtered out.""" + profile_data = { + "profile": { + "frames": [ + { + "function": "app_function", + "module": "app", + "filename": "app.py", + "lineno": 10, + "in_app": True, + }, + { + "function": "stdlib_function", + "module": "stdlib", + "filename": "/usr/lib/python/stdlib.py", + "lineno": 100, + "in_app": False, + }, + { + "function": "another_app_function", + "module": "app", + "filename": "app.py", + "lineno": 20, + "in_app": True, + }, + ], + "stacks": [[0, 1, 2]], # Mixed app and non-app frames + "samples": [ + { + "elapsed_since_start_ns": 1000000, + "thread_id": "1", + "stack_id": 0, + } + ], + "thread_metadata": {"1": {"name": "MainThread"}}, + } + } + + result = convert_profile_to_execution_tree(profile_data) + assert len(result) == 1 + + # Should only have app frames, stdlib_function should be filtered out + root = result[0] + assert root.function == "another_app_function" + assert len(root.children) == 1 + assert root.children[0].function == "app_function" + + def test_convert_profile_filters_generated_frames(self): + """Test that generated frames (with ) are filtered out.""" + profile_data = { + "profile": { + "frames": [ + { + "function": "app_function", + "module": "app", + "filename": "app.py", + "lineno": 10, + "in_app": True, + }, + { + "function": "generated_function", + "module": "generated", + "filename": "", + "lineno": 1, + "in_app": True, # Even though in_app=True, should be filtered + }, + ], + "stacks": [[0, 1]], + "samples": [ + { + "elapsed_since_start_ns": 1000000, + "thread_id": "1", + "stack_id": 0, + } + ], + "thread_metadata": {"1": {"name": "MainThread"}}, + } + } + + result = convert_profile_to_execution_tree(profile_data) + assert len(result) == 1 + + # Should only have the app function, generated function should be filtered + root = result[0] + assert root.function == "app_function" + assert len(root.children) == 0 + + def test_convert_profile_single_thread_fallback(self): + """Test fallback to single thread when no MainThread is found.""" + profile_data = { + "profile": { + "frames": [ + { + "function": "main", + "module": "app", + "filename": "main.py", + "lineno": 10, + "in_app": True, + } + ], + "stacks": [[0]], + "samples": [ + { + "elapsed_since_start_ns": 1000000, + "thread_id": "worker1", + "stack_id": 0, + } + ], + "thread_metadata": {"worker1": {"name": "WorkerThread"}}, # No MainThread + } + } + + result = convert_profile_to_execution_tree(profile_data) + assert len(result) == 1 + assert result[0].function == "main" + + def test_convert_profile_ignores_other_threads(self): + """Test that samples from other threads are ignored.""" + profile_data = { + "profile": { + "frames": [ + { + "function": "main_function", + "module": "app", + "filename": "main.py", + "lineno": 10, + "in_app": True, + }, + { + "function": "worker_function", + "module": "app", + "filename": "worker.py", + "lineno": 20, + "in_app": True, + }, + ], + "stacks": [[0], [1]], + "samples": [ + { + "elapsed_since_start_ns": 1000000, + "thread_id": "1", # MainThread + "stack_id": 0, + }, + { + "elapsed_since_start_ns": 1000000, + "thread_id": "2", # Other thread - should be ignored + "stack_id": 1, + }, + ], + "thread_metadata": { + "1": {"name": "MainThread"}, + "2": {"name": "WorkerThread"}, + }, + } + } + + result = convert_profile_to_execution_tree(profile_data) + assert len(result) == 1 + + # Should only contain the main function from MainThread + root = result[0] + assert root.function == "main_function" + + def test_convert_profile_complex_call_patterns(self): + """Test complex call patterns with function entries and exits.""" + profile_data = { + "profile": { + "frames": [ + { + "function": "main", + "module": "app", + "filename": "main.py", + "lineno": 10, + "in_app": True, + }, + { + "function": "helper_a", + "module": "app", + "filename": "helpers.py", + "lineno": 15, + "in_app": True, + }, + { + "function": "helper_b", + "module": "app", + "filename": "helpers.py", + "lineno": 25, + "in_app": True, + }, + ], + "stacks": [ + [0], # main only + [0, 1], # main -> helper_a + [0, 1, 2], # main -> helper_a -> helper_b + [0, 1], # main -> helper_a (helper_b returned) + [0], # main only (helper_a returned) + ], + "samples": [ + {"elapsed_since_start_ns": 1000000, "thread_id": "1", "stack_id": 0}, + {"elapsed_since_start_ns": 2000000, "thread_id": "1", "stack_id": 1}, + {"elapsed_since_start_ns": 3000000, "thread_id": "1", "stack_id": 2}, + {"elapsed_since_start_ns": 4000000, "thread_id": "1", "stack_id": 3}, + {"elapsed_since_start_ns": 5000000, "thread_id": "1", "stack_id": 4}, + ], + "thread_metadata": {"1": {"name": "MainThread"}}, + } + } + + result = convert_profile_to_execution_tree(profile_data) + # Each unique deepest function becomes a root, so we get 3 roots: + # main (from samples 1 and 5), helper_a (from samples 2 and 4), helper_b (from sample 3) + assert len(result) == 3 + + # Find functions by name + roots_by_function = {node.function: node for node in result} + + main_root = roots_by_function["main"] + assert main_root.sample_count == 2 # Present in samples 1 and 5 + + helper_a_root = roots_by_function["helper_a"] + assert helper_a_root.sample_count == 2 # Present in samples 2 and 4 + assert len(helper_a_root.children) == 1 + assert helper_a_root.children[0].function == "main" + + helper_b_root = roots_by_function["helper_b"] + assert helper_b_root.sample_count == 1 # Present in sample 3 + assert len(helper_b_root.children) == 1 + assert helper_b_root.children[0].function == "helper_a" + + def test_convert_profile_duration_calculation_accuracy(self): + """Test that duration calculations are reasonable.""" + profile_data = { + "profile": { + "frames": [ + { + "function": "short_function", + "module": "app", + "filename": "app.py", + "lineno": 10, + "in_app": True, + }, + { + "function": "long_function", + "module": "app", + "filename": "app.py", + "lineno": 20, + "in_app": True, + }, + ], + "stacks": [ + [0], # short_function + [1], # long_function starts + [1], # long_function continues + [1], # long_function continues + ], + "samples": [ + {"elapsed_since_start_ns": 1000000, "thread_id": "1", "stack_id": 0}, + {"elapsed_since_start_ns": 2000000, "thread_id": "1", "stack_id": 1}, + {"elapsed_since_start_ns": 3000000, "thread_id": "1", "stack_id": 2}, + {"elapsed_since_start_ns": 4000000, "thread_id": "1", "stack_id": 3}, + ], + "thread_metadata": {"1": {"name": "MainThread"}}, + } + } + + result = convert_profile_to_execution_tree(profile_data) + assert len(result) == 2 + + # Find functions by name + short_func = next(node for node in result if node.function == "short_function") + long_func = next(node for node in result if node.function == "long_function") + + # long_function should have longer duration than short_function + assert short_func.duration_ns is not None + assert long_func.duration_ns is not None + assert long_func.duration_ns > short_func.duration_ns + + # Verify sample counts + assert short_func.sample_count == 1 + assert long_func.sample_count == 3 From b7bbf7747c9c67f970701527120bbb91cf15d52f Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Fri, 18 Jul 2025 14:34:07 -0700 Subject: [PATCH 2/8] Use real transaction name --- src/sentry/seer/explorer/index_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/seer/explorer/index_data.py b/src/sentry/seer/explorer/index_data.py index d580291f0480b5..bd630b09939b6a 100644 --- a/src/sentry/seer/explorer/index_data.py +++ b/src/sentry/seer/explorer/index_data.py @@ -88,7 +88,7 @@ def get_transactions_for_project(project_id: int) -> list[Transaction]: seen_names.add(normalized_name) transactions.append( Transaction( - name=normalized_name, + name=name, # Use original name, not normalized project_id=project_id, ) ) From e4a06246595caf5d88a7c49ab7b964b67e156903 Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Fri, 18 Jul 2025 14:37:03 -0700 Subject: [PATCH 3/8] None checks --- src/sentry/seer/explorer/index_data.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/sentry/seer/explorer/index_data.py b/src/sentry/seer/explorer/index_data.py index bd630b09939b6a..6f8dbf8a471cbd 100644 --- a/src/sentry/seer/explorer/index_data.py +++ b/src/sentry/seer/explorer/index_data.py @@ -82,6 +82,9 @@ def get_transactions_for_project(project_id: int) -> list[Transaction]: seen_names = set() for row in result.get("data", []): name = row.get("transaction") + if not name: + continue + normalized_name = normalize_description(name) if normalized_name in seen_names: continue @@ -438,12 +441,19 @@ def get_issues_for_transaction(transaction_name: str, project_id: int) -> Transa ) continue - full_event: GroupEvent = eventstore.get_event_by_id( + full_event: GroupEvent | None = eventstore.get_event_by_id( project_id=group.project_id, event_id=recommended_event.event_id, group_id=group.id, ) + if not full_event: + logger.warning( + "No event found for issue", + extra={"group_id": group.id, "transaction_name": transaction_name}, + ) + continue + serialized_event = serialize(full_event, user=None, serializer=EventSerializer()) issue_data_list.append( From 42893012d0bfeda0088ca337e69689be511eac32 Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Fri, 18 Jul 2025 15:17:43 -0700 Subject: [PATCH 4/8] Empty commit From 807630e302cb1ea62b55a2481cee47972ad0dbac Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Fri, 18 Jul 2025 17:33:43 -0700 Subject: [PATCH 5/8] Rename test file --- .../seer/explorer/{test_utils.py => test_explorer_utils.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/sentry/seer/explorer/{test_utils.py => test_explorer_utils.py} (100%) diff --git a/tests/sentry/seer/explorer/test_utils.py b/tests/sentry/seer/explorer/test_explorer_utils.py similarity index 100% rename from tests/sentry/seer/explorer/test_utils.py rename to tests/sentry/seer/explorer/test_explorer_utils.py From d9de7ac281b13a6cff828064d0a2da968a47d0c3 Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Fri, 18 Jul 2025 18:24:17 -0700 Subject: [PATCH 6/8] Fix mypy --- src/sentry/seer/endpoints/seer_rpc.py | 16 ++--- src/sentry/seer/explorer/index_data.py | 63 ++++++++++++------- .../seer/explorer/test_explorer_utils.py | 29 ++++----- tests/sentry/seer/explorer/test_index_data.py | 15 ++--- 4 files changed, 69 insertions(+), 54 deletions(-) diff --git a/src/sentry/seer/endpoints/seer_rpc.py b/src/sentry/seer/endpoints/seer_rpc.py index 6018bd057645f2..a9299bba1b6db5 100644 --- a/src/sentry/seer/endpoints/seer_rpc.py +++ b/src/sentry/seer/endpoints/seer_rpc.py @@ -56,10 +56,10 @@ from sentry.search.events.types import SnubaParams from sentry.seer.autofix.autofix_tools import get_error_event_details, get_profile_details from sentry.seer.explorer.index_data import ( - get_issues_for_transaction, - get_profiles_for_trace, - get_trace_for_transaction, - get_transactions_for_project, + rpc_get_issues_for_transaction, + rpc_get_profiles_for_trace, + rpc_get_trace_for_transaction, + rpc_get_transactions_for_project, ) from sentry.seer.fetch_issues.fetch_issues import ( get_issues_related_to_file_patches, @@ -563,10 +563,10 @@ def get_github_enterprise_integration_config( "get_attribute_names": get_attribute_names, "get_attribute_values_with_substring": get_attribute_values_with_substring, "get_attributes_and_values": get_attributes_and_values, - "get_transactions_for_project": get_transactions_for_project, - "get_trace_for_transaction": get_trace_for_transaction, - "get_profiles_for_trace": get_profiles_for_trace, - "get_issues_for_transaction": get_issues_for_transaction, + "get_transactions_for_project": rpc_get_transactions_for_project, + "get_trace_for_transaction": rpc_get_trace_for_transaction, + "get_profiles_for_trace": rpc_get_profiles_for_trace, + "get_issues_for_transaction": rpc_get_issues_for_transaction, "get_github_enterprise_integration_config": get_github_enterprise_integration_config, } diff --git a/src/sentry/seer/explorer/index_data.py b/src/sentry/seer/explorer/index_data.py index 6f8dbf8a471cbd..287b3551f9fbcf 100644 --- a/src/sentry/seer/explorer/index_data.py +++ b/src/sentry/seer/explorer/index_data.py @@ -5,11 +5,12 @@ import orjson from sentry import search -from sentry.api.event_search import parse_search_query +from sentry.api.event_search import SearchFilter, parse_search_query +from sentry.api.issue_search import convert_query_values from sentry.api.serializers.base import serialize from sentry.api.serializers.models.event import EventSerializer from sentry.eventstore import backend as eventstore -from sentry.eventstore.models import GroupEvent +from sentry.eventstore.models import Event, GroupEvent from sentry.models.project import Project from sentry.profiles.utils import get_from_profiling_service from sentry.search.eap.types import SearchResolverConfig @@ -397,27 +398,19 @@ def get_issues_for_transaction(transaction_name: str, project_id: int) -> Transa start_time = end_time - timedelta(hours=24) # Step 1: Search for issues using transaction filter - try: - parsed_terms = parse_search_query(f'transaction:"{transaction_name}"') - except Exception: - logger.exception( - "Failed to parse transaction search query", - extra={"transaction_name": transaction_name, "project_id": project_id}, - ) - return None + parsed_terms = parse_search_query(f'transaction:"{transaction_name}"') + converted_terms = convert_query_values(parsed_terms, [project], None, []) + search_filters = [term for term in converted_terms if isinstance(term, SearchFilter)] - # Query for issues using the search backend - query_kwargs = { - "projects": [project], - "date_from": start_time, - "date_to": end_time, - "search_filters": parsed_terms, - "sort_by": "freq", - "limit": 3, - "environments": [], - } - - results_cursor = search.backend.query(**query_kwargs) + results_cursor = search.backend.query( + projects=[project], + date_from=start_time, + date_to=end_time, + search_filters=search_filters, + sort_by="freq", + limit=3, + environments=[], + ) issues = list(results_cursor) if not issues: @@ -441,7 +434,7 @@ def get_issues_for_transaction(transaction_name: str, project_id: int) -> Transa ) continue - full_event: GroupEvent | None = eventstore.get_event_by_id( + full_event: Event | GroupEvent | None = eventstore.get_event_by_id( project_id=group.project_id, event_id=recommended_event.event_id, group_id=group.id, @@ -479,3 +472,27 @@ def get_issues_for_transaction(transaction_name: str, project_id: int) -> Transa project_id=project_id, issues=issue_data_list, ) + + +# RPC wrappers + + +def rpc_get_transactions_for_project(project_id: int) -> dict[str, Any]: + transactions = get_transactions_for_project(project_id) + transaction_dicts = [transaction.dict() for transaction in transactions] + return {"transactions": transaction_dicts} + + +def rpc_get_trace_for_transaction(transaction_name: str, project_id: int) -> dict[str, Any]: + trace = get_trace_for_transaction(transaction_name, project_id) + return trace.dict() if trace else {} + + +def rpc_get_profiles_for_trace(trace_id: str, project_id: int) -> dict[str, Any]: + profiles = get_profiles_for_trace(trace_id, project_id) + return profiles.dict() if profiles else {} + + +def rpc_get_issues_for_transaction(transaction_name: str, project_id: int) -> dict[str, Any]: + issues = get_issues_for_transaction(transaction_name, project_id) + return issues.dict() if issues else {} diff --git a/tests/sentry/seer/explorer/test_explorer_utils.py b/tests/sentry/seer/explorer/test_explorer_utils.py index daae9c83b027ae..413639f7be9ba9 100644 --- a/tests/sentry/seer/explorer/test_explorer_utils.py +++ b/tests/sentry/seer/explorer/test_explorer_utils.py @@ -1,3 +1,5 @@ +from typing import Any + from sentry.seer.explorer.utils import convert_profile_to_execution_tree, normalize_description @@ -14,11 +16,6 @@ def test_normalize_description_empty_string(self): result = normalize_description("") assert result == "" - def test_normalize_description_none_input(self): - """Test with None input.""" - result = normalize_description(None) - assert result == "" - def test_normalize_description_uuid_with_dashes(self): """Test UUID normalization with dashes.""" result = normalize_description( @@ -116,7 +113,7 @@ def test_convert_profile_no_profile_key(self): def test_convert_profile_missing_required_fields(self): """Test with missing required fields in profile.""" - profile_data = {"profile": {"frames": []}} + profile_data: dict[str, Any] = {"profile": {"frames": []}} result = convert_profile_to_execution_tree(profile_data) assert result == [] @@ -126,7 +123,7 @@ def test_convert_profile_missing_required_fields(self): def test_convert_profile_empty_profile_data(self): """Test with empty but valid profile structure.""" - profile_data = { + profile_data: dict[str, Any] = { "profile": { "frames": [], "stacks": [], @@ -139,7 +136,7 @@ def test_convert_profile_empty_profile_data(self): def test_convert_profile_single_frame_single_sample(self): """Test with minimal valid profile data.""" - profile_data = { + profile_data: dict[str, Any] = { "profile": { "frames": [ { @@ -179,7 +176,7 @@ def test_convert_profile_single_frame_single_sample(self): def test_convert_profile_nested_call_stack(self): """Test with nested call stack.""" - profile_data = { + profile_data: dict[str, Any] = { "profile": { "frames": [ { @@ -233,7 +230,7 @@ def test_convert_profile_nested_call_stack(self): def test_convert_profile_multiple_samples_duration_calculation(self): """Test duration calculation with multiple samples.""" - profile_data = { + profile_data: dict[str, Any] = { "profile": { "frames": [ { @@ -278,7 +275,7 @@ def test_convert_profile_multiple_samples_duration_calculation(self): def test_convert_profile_filters_non_app_frames(self): """Test that non-app frames are filtered out.""" - profile_data = { + profile_data: dict[str, Any] = { "profile": { "frames": [ { @@ -326,7 +323,7 @@ def test_convert_profile_filters_non_app_frames(self): def test_convert_profile_filters_generated_frames(self): """Test that generated frames (with ) are filtered out.""" - profile_data = { + profile_data: dict[str, Any] = { "profile": { "frames": [ { @@ -366,7 +363,7 @@ def test_convert_profile_filters_generated_frames(self): def test_convert_profile_single_thread_fallback(self): """Test fallback to single thread when no MainThread is found.""" - profile_data = { + profile_data: dict[str, Any] = { "profile": { "frames": [ { @@ -395,7 +392,7 @@ def test_convert_profile_single_thread_fallback(self): def test_convert_profile_ignores_other_threads(self): """Test that samples from other threads are ignored.""" - profile_data = { + profile_data: dict[str, Any] = { "profile": { "frames": [ { @@ -442,7 +439,7 @@ def test_convert_profile_ignores_other_threads(self): def test_convert_profile_complex_call_patterns(self): """Test complex call patterns with function entries and exits.""" - profile_data = { + profile_data: dict[str, Any] = { "profile": { "frames": [ { @@ -508,7 +505,7 @@ def test_convert_profile_complex_call_patterns(self): def test_convert_profile_duration_calculation_accuracy(self): """Test that duration calculations are reasonable.""" - profile_data = { + profile_data: dict[str, Any] = { "profile": { "frames": [ { diff --git a/tests/sentry/seer/explorer/test_index_data.py b/tests/sentry/seer/explorer/test_index_data.py index cd918a7a7489fd..617a366e29ee67 100644 --- a/tests/sentry/seer/explorer/test_index_data.py +++ b/tests/sentry/seer/explorer/test_index_data.py @@ -126,13 +126,14 @@ def test_get_trace_for_transaction(self): assert len(result.spans) == 5 # Verify all spans have correct structure and belong to the chosen trace - for span in result.spans: - assert hasattr(span, "span_id") - assert hasattr(span, "span_description") - assert hasattr(span, "parent_span_id") - assert hasattr(span, "span_op") - assert span.span_description.startswith("span-") - assert "trace-medium" in span.span_description # Should be from the median trace + for result_span in result.spans: + assert hasattr(result_span, "span_id") + assert hasattr(result_span, "span_description") + assert hasattr(result_span, "parent_span_id") + assert hasattr(result_span, "span_op") + assert result_span.span_description is not None + assert result_span.span_description.startswith("span-") + assert "trace-medium" in result_span.span_description # Should be from the median trace # Verify parent-child relationships are preserved root_spans = [s for s in result.spans if s.parent_span_id is None] From 0cff724d7ffbf06b798280723577f25cc2fd1512 Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Mon, 21 Jul 2025 14:39:18 -0700 Subject: [PATCH 7/8] PR feedback --- src/sentry/seer/explorer/index_data.py | 11 ++- src/sentry/seer/explorer/models.py | 7 +- src/sentry/seer/sentry_data_models.py | 72 +++++++++++++++++++ tests/sentry/seer/explorer/test_index_data.py | 11 ++- 4 files changed, 85 insertions(+), 16 deletions(-) create mode 100644 src/sentry/seer/sentry_data_models.py diff --git a/src/sentry/seer/explorer/index_data.py b/src/sentry/seer/explorer/index_data.py index 287b3551f9fbcf..812a11c1181ae3 100644 --- a/src/sentry/seer/explorer/index_data.py +++ b/src/sentry/seer/explorer/index_data.py @@ -15,8 +15,9 @@ from sentry.profiles.utils import get_from_profiling_service from sentry.search.eap.types import SearchResolverConfig from sentry.search.events.types import SnubaParams -from sentry.seer.explorer.models import ( - IssueData, +from sentry.seer.explorer.utils import convert_profile_to_execution_tree, normalize_description +from sentry.seer.sentry_data_models import ( + IssueDetails, ProfileData, Span, TraceData, @@ -24,7 +25,6 @@ Transaction, TransactionIssues, ) -from sentry.seer.explorer.utils import convert_profile_to_execution_tree, normalize_description from sentry.snuba import spans_rpc from sentry.snuba.referrer import Referrer @@ -450,13 +450,12 @@ def get_issues_for_transaction(transaction_name: str, project_id: int) -> Transa serialized_event = serialize(full_event, user=None, serializer=EventSerializer()) issue_data_list.append( - IssueData( + IssueDetails( issue_id=group.id, title=group.title, culprit=group.culprit, transaction=full_event.transaction, - event_count=group.times_seen, - event_data=serialized_event, + events=[serialized_event], ) ) diff --git a/src/sentry/seer/explorer/models.py b/src/sentry/seer/explorer/models.py index 9ed45264f01cec..803a849a81fce8 100644 --- a/src/sentry/seer/explorer/models.py +++ b/src/sentry/seer/explorer/models.py @@ -53,16 +53,15 @@ class TraceProfiles(BaseModel): profiles: list[ProfileData] -class IssueData(BaseModel): +class IssueDetails(BaseModel): issue_id: int title: str culprit: str | None transaction: str | None - event_count: int - event_data: dict[str, Any] # The recommended event data + events: list[dict[str, Any]] # The recommended event data class TransactionIssues(BaseModel): transaction_name: str project_id: int - issues: list[IssueData] + issues: list[IssueDetails] diff --git a/src/sentry/seer/sentry_data_models.py b/src/sentry/seer/sentry_data_models.py new file mode 100644 index 00000000000000..ffa4e8c2eafce6 --- /dev/null +++ b/src/sentry/seer/sentry_data_models.py @@ -0,0 +1,72 @@ +""" +A collection of human- and LLM-friendly models to represent Sentry data like issues, traces, and profiles. +These should be kept in sync with the models in Seer. +""" + +from __future__ import annotations + +from typing import Any + +from pydantic import BaseModel + + +class Transaction(BaseModel): + name: str + project_id: int + + +class Span(BaseModel): + span_id: str + parent_span_id: str | None + span_op: str | None + span_description: str | None + + +class TraceData(BaseModel): + trace_id: str + project_id: int + transaction_name: str + total_spans: int + spans: list[Span] + + +class ExecutionTreeNode(BaseModel): + function: str + module: str + filename: str + lineno: int + in_app: bool + children: list[ExecutionTreeNode] + node_id: str | None = None + sample_count: int = 0 + first_seen_ns: int | None = None + last_seen_ns: int | None = None + duration_ns: int | None = None + + +class ProfileData(BaseModel): + profile_id: str + span_id: str + transaction_name: str | None + execution_tree: list[ExecutionTreeNode] + project_id: int + + +class TraceProfiles(BaseModel): + trace_id: str + project_id: int + profiles: list[ProfileData] + + +class IssueDetails(BaseModel): + issue_id: int + title: str + culprit: str | None + transaction: str | None + events: list[dict[str, Any]] + + +class TransactionIssues(BaseModel): + transaction_name: str + project_id: int + issues: list[IssueDetails] diff --git a/tests/sentry/seer/explorer/test_index_data.py b/tests/sentry/seer/explorer/test_index_data.py index 617a366e29ee67..c6e9235ea8525a 100644 --- a/tests/sentry/seer/explorer/test_index_data.py +++ b/tests/sentry/seer/explorer/test_index_data.py @@ -11,7 +11,7 @@ get_trace_for_transaction, get_transactions_for_project, ) -from sentry.seer.explorer.models import ExecutionTreeNode +from sentry.seer.sentry_data_models import ExecutionTreeNode from sentry.testutils.cases import APITransactionTestCase, SnubaTestCase, SpanTestCase from sentry.testutils.helpers.datetime import before_now from tests.snuba.search.test_backend import SharedSnubaMixin @@ -362,11 +362,10 @@ def test_get_issues_for_transaction(self): assert issue.culprit == group.culprit # transaction field in issue should come from the event tags assert issue.transaction == transaction_name - assert issue.event_count == group.times_seen - assert "id" in issue.event_data - assert "message" in issue.event_data + assert "id" in issue.events[0] + assert "message" in issue.events[0] # Check that the event has the transaction in its tags or serialized data assert ( - "tags" in issue.event_data - or issue.event_data.get("transaction") == transaction_name + "tags" in issue.events[0] + or issue.events[0].get("transaction") == transaction_name ) From dfa2fe4fe393615a8a579feb42a6d0f954ad092a Mon Sep 17 00:00:00 2001 From: Rohan Agarwal Date: Mon, 21 Jul 2025 14:44:24 -0700 Subject: [PATCH 8/8] Remove duplicate --- src/sentry/seer/explorer/models.py | 67 ------------------------------ src/sentry/seer/explorer/utils.py | 2 +- 2 files changed, 1 insertion(+), 68 deletions(-) delete mode 100644 src/sentry/seer/explorer/models.py diff --git a/src/sentry/seer/explorer/models.py b/src/sentry/seer/explorer/models.py deleted file mode 100644 index 803a849a81fce8..00000000000000 --- a/src/sentry/seer/explorer/models.py +++ /dev/null @@ -1,67 +0,0 @@ -from __future__ import annotations - -from typing import Any - -from pydantic import BaseModel - - -class Transaction(BaseModel): - name: str - project_id: int - - -class Span(BaseModel): - span_id: str - parent_span_id: str | None - span_op: str | None - span_description: str | None - - -class TraceData(BaseModel): - trace_id: str - project_id: int - transaction_name: str - total_spans: int - spans: list[Span] - - -class ExecutionTreeNode(BaseModel): - function: str - module: str - filename: str - lineno: int - in_app: bool - children: list[ExecutionTreeNode] - node_id: str | None = None - sample_count: int = 0 - first_seen_ns: int | None = None - last_seen_ns: int | None = None - duration_ns: int | None = None - - -class ProfileData(BaseModel): - profile_id: str - span_id: str - transaction_name: str | None - execution_tree: list[ExecutionTreeNode] - project_id: int - - -class TraceProfiles(BaseModel): - trace_id: str - project_id: int - profiles: list[ProfileData] - - -class IssueDetails(BaseModel): - issue_id: int - title: str - culprit: str | None - transaction: str | None - events: list[dict[str, Any]] # The recommended event data - - -class TransactionIssues(BaseModel): - transaction_name: str - project_id: int - issues: list[IssueDetails] diff --git a/src/sentry/seer/explorer/utils.py b/src/sentry/seer/explorer/utils.py index 5773a182d03757..4c7a7581007590 100644 --- a/src/sentry/seer/explorer/utils.py +++ b/src/sentry/seer/explorer/utils.py @@ -1,7 +1,7 @@ import re from sentry.seer.autofix.autofix import _convert_profile_to_execution_tree -from sentry.seer.explorer.models import ExecutionTreeNode +from sentry.seer.sentry_data_models import ExecutionTreeNode def normalize_description(description: str) -> str: