diff --git a/ddtrace/_trace/utils_botocore/span_tags.py b/ddtrace/_trace/utils_botocore/span_tags.py index b1d9440581a..e6e4b570274 100644 --- a/ddtrace/_trace/utils_botocore/span_tags.py +++ b/ddtrace/_trace/utils_botocore/span_tags.py @@ -12,11 +12,52 @@ from ddtrace.ext import aws from ddtrace.ext import http from ddtrace.internal.constants import COMPONENT +from ddtrace.internal.serverless import in_aws_lambda from ddtrace.internal.utils.formats import deep_getattr _PAYLOAD_TAGGER = AWSPayloadTagging() +SERVICE_MAP = { + "eventbridge": "events", + "events": "events", + "sqs": "sqs", + "sns": "sns", + "kinesis": "kinesis", + "dynamodb": "dynamodb", + "dynamodbdocument": "dynamodb", +} + + +# Helper to build AWS hostname from service, region and parameters +def _derive_peer_hostname(service: str, region: str, params: Optional[Dict[str, Any]] = None) -> Optional[str]: + """Return hostname for given AWS service according to Datadog peer hostname rules. + + Only returns hostnames for specific AWS services: + - eventbridge/events -> events..amazonaws.com + - sqs -> sqs..amazonaws.com + - sns -> sns..amazonaws.com + - kinesis -> kinesis..amazonaws.com + - dynamodb -> dynamodb..amazonaws.com + - s3 -> .s3..amazonaws.com (if Bucket param present) + s3..amazonaws.com (otherwise) + + Other services return ``None``. + """ + + if not region: + return None + + aws_service = service.lower() + + if aws_service == "s3": + bucket = params.get("Bucket") if params else None + return f"{bucket}.s3.{region}.amazonaws.com" if bucket else f"s3.{region}.amazonaws.com" + + mapped = SERVICE_MAP.get(aws_service) + + return f"{mapped}.{region}.amazonaws.com" if mapped else None + def set_botocore_patched_api_call_span_tags(span: Span, instance, args, params, endpoint_name, operation): span.set_tag_str(COMPONENT, config.botocore.integration_name) @@ -51,6 +92,13 @@ def set_botocore_patched_api_call_span_tags(span: Span, instance, args, params, span.set_tag_str("aws.region", region_name) span.set_tag_str("region", region_name) + # Derive peer hostname only in serverless environments to avoid + # unnecessary tag noise in traditional hosts/containers. + if in_aws_lambda(): + hostname = _derive_peer_hostname(endpoint_name, region_name, params) + if hostname: + span.set_tag_str("peer.service", hostname) + def set_botocore_response_metadata_tags( span: Span, result: Dict[str, Any], is_error_code_fn: Optional[Callable] = None diff --git a/ddtrace/contrib/internal/aiobotocore/patch.py b/ddtrace/contrib/internal/aiobotocore/patch.py index d1eaef0f3e3..558d8e0af60 100644 --- a/ddtrace/contrib/internal/aiobotocore/patch.py +++ b/ddtrace/contrib/internal/aiobotocore/patch.py @@ -5,6 +5,7 @@ import wrapt from ddtrace import config +from ddtrace._trace.utils_botocore.span_tags import _derive_peer_hostname from ddtrace.constants import _SPAN_MEASURED_KEY from ddtrace.constants import SPAN_KIND from ddtrace.contrib.internal.trace_utils import ext_service @@ -16,6 +17,7 @@ from ddtrace.internal.constants import COMPONENT from ddtrace.internal.schema import schematize_cloud_api_operation from ddtrace.internal.schema import schematize_service_name +from ddtrace.internal.serverless import in_aws_lambda from ddtrace.internal.utils import ArgumentError from ddtrace.internal.utils import get_argument_value from ddtrace.internal.utils.formats import asbool @@ -145,6 +147,12 @@ async def _wrapped_api_call(original_func, instance, args, kwargs): region_name = deep_getattr(instance, "meta.region_name") + if in_aws_lambda(): + # Derive the peer hostname now that we have both service and region. + hostname = _derive_peer_hostname(endpoint_name, region_name, params) + if hostname: + span.set_tag_str("peer.service", hostname) + meta = { "aws.agent": "aiobotocore", "aws.operation": operation, diff --git a/ddtrace/contrib/internal/boto/patch.py b/ddtrace/contrib/internal/boto/patch.py index d68fe6e9b56..e0725a5d64d 100644 --- a/ddtrace/contrib/internal/boto/patch.py +++ b/ddtrace/contrib/internal/boto/patch.py @@ -7,6 +7,7 @@ import wrapt from ddtrace import config +from ddtrace._trace.utils_botocore.span_tags import _derive_peer_hostname from ddtrace.constants import _SPAN_MEASURED_KEY from ddtrace.constants import SPAN_KIND from ddtrace.ext import SpanKind @@ -16,6 +17,7 @@ from ddtrace.internal.constants import COMPONENT from ddtrace.internal.schema import schematize_cloud_api_operation from ddtrace.internal.schema import schematize_service_name +from ddtrace.internal.serverless import in_aws_lambda from ddtrace.internal.utils import get_argument_value from ddtrace.internal.utils.formats import asbool from ddtrace.internal.utils.wrappers import unwrap @@ -122,6 +124,12 @@ def patched_query_request(original_func, instance, args, kwargs): meta[aws.REGION] = region_name meta[aws.AWSREGION] = region_name + if in_aws_lambda(): + # Derive the peer hostname now that we have both service and region. + hostname = _derive_peer_hostname(endpoint_name, region_name, params) + if hostname: + meta["peer.service"] = hostname + span.set_tags(meta) # Original func returns a boto.connection.HTTPResponse object @@ -183,6 +191,12 @@ def patched_auth_request(original_func, instance, args, kwargs): meta[aws.REGION] = region_name meta[aws.AWSREGION] = region_name + if in_aws_lambda(): + # Derive the peer hostname + hostname = _derive_peer_hostname(endpoint_name, region_name, None) + if hostname: + meta["peer.service"] = hostname + span.set_tags(meta) # Original func returns a boto.connection.HTTPResponse object diff --git a/ddtrace/internal/schema/processor.py b/ddtrace/internal/schema/processor.py index b83a0187b0c..1cdf204279b 100644 --- a/ddtrace/internal/schema/processor.py +++ b/ddtrace/internal/schema/processor.py @@ -1,5 +1,6 @@ from ddtrace._trace.processor import TraceProcessor from ddtrace.constants import _BASE_SERVICE_KEY +from ddtrace.internal.serverless import in_aws_lambda from ddtrace.settings._config import config from . import schematize_service_name @@ -8,10 +9,13 @@ class BaseServiceProcessor(TraceProcessor): def __init__(self): self._global_service = schematize_service_name((config.service or "").lower()) + self._in_aws_lambda = in_aws_lambda() def process_trace(self, trace): - if not trace: - return + # AWS Lambda spans receive unhelpful base_service value of runtime + # Remove base_service to prevent service overrides in Lambda spans + if not trace or self._in_aws_lambda: + return trace traces_to_process = filter( lambda x: x.service and x.service.lower() != self._global_service, diff --git a/releasenotes/notes/fix-serverless-service-representation-b1f81fe0553a7a45.yaml b/releasenotes/notes/fix-serverless-service-representation-b1f81fe0553a7a45.yaml new file mode 100644 index 00000000000..0535ee9e8cd --- /dev/null +++ b/releasenotes/notes/fix-serverless-service-representation-b1f81fe0553a7a45.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + aws: Set peer.service explictly to improve the accuracy of serverless + service representation. Base_service defaults to unhelpful value "runtime" + in serverless spans. Remove base_service to prevent unwanted service + overrides in Lambda spans. diff --git a/tests/contrib/aiobotocore/test.py b/tests/contrib/aiobotocore/test.py index d21e3fa52e6..72c36461a41 100644 --- a/tests/contrib/aiobotocore/test.py +++ b/tests/contrib/aiobotocore/test.py @@ -553,3 +553,102 @@ async def test_response_context_manager(tracer): assert span.name == "s3.command" assert span.get_tag("component") == "aiobotocore" assert span.get_tag("span.kind") == "client" + + +# Peer service tests +@pytest.mark.asyncio +async def test_sqs_client_peer_service_in_lambda(tracer, monkeypatch): + """Test that peer.service tag is set for SQS when running in AWS Lambda""" + monkeypatch.setenv("AWS_LAMBDA_FUNCTION_NAME", "my-func") + + async with aiobotocore_client("sqs", tracer) as sqs: + await sqs.list_queues() + + traces = tracer.pop_traces() + assert len(traces) == 1 + assert len(traces[0]) == 1 + span = traces[0][0] + # Should have peer.service set to sqs hostname + assert span.get_tag("peer.service") == "sqs.us-west-2.amazonaws.com" + + +@pytest.mark.asyncio +async def test_s3_client_peer_service_in_lambda(tracer, monkeypatch): + """Test that peer.service tag is set for S3 when running in AWS Lambda""" + monkeypatch.setenv("AWS_LAMBDA_FUNCTION_NAME", "my-func") + bucket_name = f"{time.time()}bucket".replace(".", "") + + async with aiobotocore_client("s3", tracer) as s3: + # Test with bucket parameter + await s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={"LocationConstraint": "us-west-2"}) + + traces = tracer.pop_traces() + assert len(traces) == 1 + assert len(traces[0]) == 1 + span = traces[0][0] + # Should have peer.service set to bucket-specific hostname + assert span.get_tag("peer.service") == f"{bucket_name}.s3.us-west-2.amazonaws.com" + + +@pytest.mark.asyncio +async def test_dynamodb_client_peer_service_in_lambda(tracer, monkeypatch): + """Test that peer.service tag is set for DynamoDB when running in AWS Lambda""" + monkeypatch.setenv("AWS_LAMBDA_FUNCTION_NAME", "my-func") + + async with aiobotocore_client("dynamodb", tracer) as dynamodb: + await dynamodb.list_tables() + + traces = tracer.pop_traces() + assert len(traces) == 1 + assert len(traces[0]) == 1 + span = traces[0][0] + # Should have peer.service set to dynamodb hostname + assert span.get_tag("peer.service") == "dynamodb.us-west-2.amazonaws.com" + + +@pytest.mark.asyncio +async def test_kinesis_client_peer_service_in_lambda(tracer, monkeypatch): + """Test that peer.service tag is set for Kinesis when running in AWS Lambda""" + monkeypatch.setenv("AWS_LAMBDA_FUNCTION_NAME", "my-func") + + async with aiobotocore_client("kinesis", tracer) as kinesis: + await kinesis.list_streams() + + traces = tracer.pop_traces() + assert len(traces) == 1 + assert len(traces[0]) == 1 + span = traces[0][0] + # Should have peer.service set to kinesis hostname + assert span.get_tag("peer.service") == "kinesis.us-west-2.amazonaws.com" + + +@pytest.mark.asyncio +async def test_sns_client_peer_service_in_lambda(tracer, monkeypatch): + """Test that peer.service tag is set for SNS when running in AWS Lambda""" + monkeypatch.setenv("AWS_LAMBDA_FUNCTION_NAME", "my-func") + + async with aiobotocore_client("sns", tracer) as sns: + await sns.list_topics() + + traces = tracer.pop_traces() + assert len(traces) == 1 + assert len(traces[0]) == 1 + span = traces[0][0] + # Should have peer.service set to sns hostname + assert span.get_tag("peer.service") == "sns.us-west-2.amazonaws.com" + + +@pytest.mark.asyncio +async def test_eventbridge_client_peer_service_in_lambda(tracer, monkeypatch): + """Test that peer.service tag is set for EventBridge when running in AWS Lambda""" + monkeypatch.setenv("AWS_LAMBDA_FUNCTION_NAME", "my-func") + + async with aiobotocore_client("events", tracer) as events: + await events.list_rules() + + traces = tracer.pop_traces() + assert len(traces) == 1 + assert len(traces[0]) == 1 + span = traces[0][0] + # Should have peer.service set to events hostname + assert span.get_tag("peer.service") == "events.us-west-2.amazonaws.com" diff --git a/tests/contrib/botocore/test.py b/tests/contrib/botocore/test.py index 2e1bd4e1e8d..74271415b7b 100644 --- a/tests/contrib/botocore/test.py +++ b/tests/contrib/botocore/test.py @@ -4247,3 +4247,107 @@ def test_aws_payload_tagging_kinesis(self): with self.tracer.trace("kinesis.manual_span"): client.create_stream(StreamName=stream_name, ShardCount=1) client.put_records(StreamName=stream_name, Records=data) + + # Peer service tests + @mock_sqs + @TracerTestCase.run_in_subprocess(env_overrides=dict(AWS_LAMBDA_FUNCTION_NAME="my-func")) + def test_sqs_client_peer_service_in_lambda(self): + """Test that peer.service tag is set for SQS when running in AWS Lambda""" + sqs = self.session.create_client("sqs", region_name="us-east-1") + pin = Pin(service=self.TEST_SERVICE) + pin._tracer = self.tracer + pin.onto(sqs) + + sqs.list_queues() + spans = self.get_spans() + assert spans + assert len(spans) == 1 + span = spans[0] + # Should have peer.service set to sqs hostname + assert span.get_tag("peer.service") == "sqs.us-east-1.amazonaws.com" + + @mock_s3 + @TracerTestCase.run_in_subprocess(env_overrides=dict(AWS_LAMBDA_FUNCTION_NAME="my-func")) + def test_s3_client_peer_service_in_lambda(self): + """Test that peer.service tag is set for S3 when running in AWS Lambda""" + s3 = self.session.create_client("s3", region_name="us-east-1") + pin = Pin(service=self.TEST_SERVICE) + pin._tracer = self.tracer + pin.onto(s3) + + # Test with bucket parameter + s3.create_bucket(Bucket="test-bucket") + spans = self.get_spans() + assert spans + assert len(spans) == 1 + span = spans[0] + # Should have peer.service set to bucket-specific hostname + assert span.get_tag("peer.service") == "test-bucket.s3.us-east-1.amazonaws.com" + + @mock_dynamodb + @TracerTestCase.run_in_subprocess(env_overrides=dict(AWS_LAMBDA_FUNCTION_NAME="my-func")) + def test_dynamodb_client_peer_service_in_lambda(self): + """Test that peer.service tag is set for DynamoDB when running in AWS Lambda""" + dynamodb = self.session.create_client("dynamodb", region_name="us-west-2") + pin = Pin(service=self.TEST_SERVICE) + pin._tracer = self.tracer + pin.onto(dynamodb) + + dynamodb.list_tables() + spans = self.get_spans() + assert spans + assert len(spans) == 1 + span = spans[0] + # Should have peer.service set to dynamodb hostname + assert span.get_tag("peer.service") == "dynamodb.us-west-2.amazonaws.com" + + @mock_kinesis + @TracerTestCase.run_in_subprocess(env_overrides=dict(AWS_LAMBDA_FUNCTION_NAME="my-func")) + def test_kinesis_client_peer_service_in_lambda(self): + """Test that peer.service tag is set for Kinesis when running in AWS Lambda""" + kinesis = self.session.create_client("kinesis", region_name="us-east-1") + pin = Pin(service=self.TEST_SERVICE) + pin._tracer = self.tracer + pin.onto(kinesis) + + kinesis.list_streams() + spans = self.get_spans() + assert spans + assert len(spans) == 1 + span = spans[0] + # Should have peer.service set to kinesis hostname + assert span.get_tag("peer.service") == "kinesis.us-east-1.amazonaws.com" + + @mock_sns + @TracerTestCase.run_in_subprocess(env_overrides=dict(AWS_LAMBDA_FUNCTION_NAME="my-func")) + def test_sns_client_peer_service_in_lambda(self): + """Test that peer.service tag is set for SNS when running in AWS Lambda""" + sns = self.session.create_client("sns", region_name="us-west-2") + pin = Pin(service=self.TEST_SERVICE) + pin._tracer = self.tracer + pin.onto(sns) + + sns.list_topics() + spans = self.get_spans() + assert spans + assert len(spans) == 1 + span = spans[0] + # Should have peer.service set to sns hostname + assert span.get_tag("peer.service") == "sns.us-west-2.amazonaws.com" + + @mock_events + @TracerTestCase.run_in_subprocess(env_overrides=dict(AWS_LAMBDA_FUNCTION_NAME="my-func")) + def test_eventbridge_client_peer_service_in_lambda(self): + """Test that peer.service tag is set for EventBridge when running in AWS Lambda""" + events = self.session.create_client("events", region_name="us-east-1") + pin = Pin(service=self.TEST_SERVICE) + pin._tracer = self.tracer + pin.onto(events) + + events.list_rules() + spans = self.get_spans() + assert spans + assert len(spans) == 1 + span = spans[0] + # Should have peer.service set to events hostname + assert span.get_tag("peer.service") == "events.us-east-1.amazonaws.com"