Skip to content

Commit 7fb968d

Browse files
authored
chore(llmobs): remove llm io from bedrock spans (#14057)
Remove potentially sensitive i/o data from apm spans. This way, prompt and completion data will only appear on the llm obs spans, which are/will be subject to data access controls. Mostly, this just removes io tag sets. A few things (mostly metrics) have llmobs tags dependent on span tags, so there is a bit more refactoring there. Let me know if I removed anything that should really stay, or if I missed something that should be restricted. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
1 parent a3e1e7e commit 7fb968d

File tree

27 files changed

+163
-442
lines changed

27 files changed

+163
-442
lines changed

ddtrace/_trace/trace_handlers.py

Lines changed: 9 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -661,13 +661,9 @@ def _on_botocore_patched_bedrock_api_call_started(ctx, request_params):
661661

662662
span.set_tag_str("bedrock.request.model_provider", ctx["model_provider"])
663663
span.set_tag_str("bedrock.request.model", ctx["model_name"])
664-
for k, v in request_params.items():
665-
if k == "prompt":
666-
if integration.is_pc_sampled_span(span):
667-
v = integration.trunc(str(v))
668-
span.set_tag_str("bedrock.request.{}".format(k), str(v))
669-
if k == "n":
670-
ctx.set_item("num_generations", str(v))
664+
665+
if "n" in request_params:
666+
ctx.set_item("num_generations", str(request_params["n"]))
671667

672668

673669
def _on_botocore_patched_bedrock_api_call_exception(ctx, exc_info):
@@ -680,16 +676,6 @@ def _on_botocore_patched_bedrock_api_call_exception(ctx, exc_info):
680676
span.finish()
681677

682678

683-
def _on_botocore_patched_bedrock_api_call_success(ctx, reqid, latency, input_token_count, output_token_count):
684-
span = ctx.span
685-
span.set_tag_str("bedrock.response.id", reqid)
686-
span.set_tag_str("bedrock.response.duration", latency)
687-
if input_token_count:
688-
span.set_metric("bedrock.response.usage.prompt_tokens", int(input_token_count))
689-
if output_token_count:
690-
span.set_metric("bedrock.response.usage.completion_tokens", int(output_token_count))
691-
692-
693679
def _propagate_context(ctx, headers):
694680
distributed_tracing_enabled = ctx["integration_config"].distributed_tracing_enabled
695681
span = ctx.span
@@ -731,38 +717,13 @@ def _on_botocore_bedrock_process_response_converse(
731717
def _on_botocore_bedrock_process_response(
732718
ctx: core.ExecutionContext,
733719
formatted_response: Dict[str, Any],
734-
metadata: Dict[str, Any],
735-
body: Dict[str, List[Dict]],
736-
should_set_choice_ids: bool,
737720
) -> None:
738-
text = formatted_response["text"]
739-
span = ctx.span
740-
model_name = ctx["model_name"]
741-
if should_set_choice_ids:
742-
for i in range(len(text)):
743-
span.set_tag_str("bedrock.response.choices.{}.id".format(i), str(body["generations"][i]["id"]))
744-
integration = ctx["bedrock_integration"]
745-
if metadata is not None:
746-
for k, v in metadata.items():
747-
if k in ["usage.completion_tokens", "usage.prompt_tokens"] and v:
748-
span.set_metric("bedrock.response.{}".format(k), int(v))
749-
else:
750-
span.set_tag_str("bedrock.{}".format(k), str(v))
751-
if "embed" in model_name:
752-
span.set_metric("bedrock.response.embedding_length", len(formatted_response["text"][0]))
753-
span.finish()
754-
return
755-
for i in range(len(formatted_response["text"])):
756-
if integration.is_pc_sampled_span(span):
757-
span.set_tag_str(
758-
"bedrock.response.choices.{}.text".format(i),
759-
integration.trunc(str(formatted_response["text"][i])),
760-
)
761-
span.set_tag_str(
762-
"bedrock.response.choices.{}.finish_reason".format(i), str(formatted_response["finish_reason"][i])
763-
)
764-
integration.llmobs_set_tags(span, args=[ctx], kwargs={}, response=formatted_response)
765-
span.finish()
721+
with ctx.span as span:
722+
model_name = ctx["model_name"]
723+
integration = ctx["bedrock_integration"]
724+
if "embed" in model_name:
725+
return
726+
integration.llmobs_set_tags(span, args=[ctx], kwargs={}, response=formatted_response)
766727

767728

768729
def _on_botocore_sqs_recvmessage_post(
@@ -931,7 +892,6 @@ def listen():
931892
core.on("botocore.client_context.update_messages", _on_botocore_update_messages)
932893
core.on("botocore.patched_bedrock_api_call.started", _on_botocore_patched_bedrock_api_call_started)
933894
core.on("botocore.patched_bedrock_api_call.exception", _on_botocore_patched_bedrock_api_call_exception)
934-
core.on("botocore.patched_bedrock_api_call.success", _on_botocore_patched_bedrock_api_call_success)
935895
core.on("botocore.bedrock.process_response", _on_botocore_bedrock_process_response)
936896
core.on("botocore.bedrock.process_response_converse", _on_botocore_bedrock_process_response_converse)
937897
core.on("botocore.sqs.ReceiveMessage.post", _on_botocore_sqs_recvmessage_post)

ddtrace/contrib/internal/botocore/services/bedrock.py

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,9 @@ def read(self, amt=None):
4848
self._body.append(json.loads(body))
4949
if self.__wrapped__.tell() == int(self.__wrapped__._content_length):
5050
formatted_response = _extract_text_and_response_reason(self._execution_ctx, self._body[0])
51-
model_provider = self._execution_ctx["model_provider"]
52-
model_name = self._execution_ctx["model_name"]
53-
should_set_choice_ids = model_provider == _COHERE and "embed" not in model_name
5451
core.dispatch(
5552
"botocore.bedrock.process_response",
56-
[self._execution_ctx, formatted_response, None, self._body[0], should_set_choice_ids],
53+
[self._execution_ctx, formatted_response],
5754
)
5855
return body
5956
except Exception:
@@ -67,12 +64,9 @@ def readlines(self):
6764
for line in lines:
6865
self._body.append(json.loads(line))
6966
formatted_response = _extract_text_and_response_reason(self._execution_ctx, self._body[0])
70-
model_provider = self._execution_ctx["model_provider"]
71-
model_name = self._execution_ctx["model_name"]
72-
should_set_choice_ids = model_provider == _COHERE and "embed" not in model_name
7367
core.dispatch(
7468
"botocore.bedrock.process_response",
75-
[self._execution_ctx, formatted_response, None, self._body[0], should_set_choice_ids],
69+
[self._execution_ctx, formatted_response],
7670
)
7771
return lines
7872
except Exception:
@@ -93,16 +87,10 @@ def __iter__(self):
9387
finally:
9488
if exception_raised:
9589
return
96-
metadata = _extract_streamed_response_metadata(self._execution_ctx, self._body)
9790
formatted_response = _extract_streamed_response(self._execution_ctx, self._body)
98-
model_provider = self._execution_ctx["model_provider"]
99-
model_name = self._execution_ctx["model_name"]
100-
should_set_choice_ids = (
101-
model_provider == _COHERE and "is_finished" not in self._body[0] and "embed" not in model_name
102-
)
10391
core.dispatch(
10492
"botocore.bedrock.process_response",
105-
[self._execution_ctx, formatted_response, metadata, self._body, should_set_choice_ids],
93+
[self._execution_ctx, formatted_response],
10694
)
10795

10896

@@ -443,18 +431,6 @@ def handle_bedrock_response(
443431
safe_token_count(cache_write_tokens),
444432
)
445433

446-
# for both converse & invoke, dispatch success event to store basic metrics
447-
core.dispatch(
448-
"botocore.patched_bedrock_api_call.success",
449-
[
450-
ctx,
451-
str(metadata.get("RequestId", "")),
452-
request_latency,
453-
str(input_tokens),
454-
str(output_tokens),
455-
],
456-
)
457-
458434
if ctx["resource"] == "Converse":
459435
core.dispatch("botocore.bedrock.process_response_converse", [ctx, result])
460436
return result
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
other:
2+
- |
3+
botocore: Removes I/O tags, including llm prompts, completions, and token usage, from APM spans for Bedrock LLM requests and responses, which is duplicated in LLM Observability.

tests/contrib/botocore/test_bedrock.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,4 +252,3 @@ def test_span_finishes_after_generator_exit(bedrock_client, request_vcr, mock_tr
252252
assert span is not None
253253
assert span.name == "bedrock-runtime.command"
254254
assert span.resource == "InvokeModelWithResponseStream"
255-
assert span.get_tag("bedrock.response.choices.0.text").startswith("Hobb")

tests/contrib/botocore/test_bedrock_llmobs.py

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -25,41 +25,41 @@
2525
)
2626
class TestLLMObsBedrock:
2727
@staticmethod
28-
def expected_llmobs_span_event(span, n_output, message=False):
29-
prompt_tokens = span.get_metric("bedrock.response.usage.prompt_tokens")
30-
completion_tokens = span.get_metric("bedrock.response.usage.completion_tokens")
31-
token_metrics = {}
32-
if prompt_tokens is not None:
33-
token_metrics["input_tokens"] = prompt_tokens
34-
if completion_tokens is not None:
35-
token_metrics["output_tokens"] = completion_tokens
36-
if prompt_tokens is not None and completion_tokens is not None:
37-
token_metrics["total_tokens"] = prompt_tokens + completion_tokens
38-
39-
if span.get_tag("bedrock.request.temperature"):
40-
expected_parameters = {"temperature": float(span.get_tag("bedrock.request.temperature"))}
41-
if span.get_tag("bedrock.request.max_tokens"):
42-
expected_parameters["max_tokens"] = int(span.get_tag("bedrock.request.max_tokens"))
43-
28+
def expected_llmobs_span_event(span, n_output, message=False, metadata=None, token_metrics=None):
4429
expected_input = [{"content": mock.ANY}]
4530
if message:
4631
expected_input = [{"content": mock.ANY, "role": "user"}]
47-
return _expected_llmobs_llm_span_event(
32+
33+
# Use empty dicts as defaults for _expected_llmobs_llm_span_event to avoid None issues
34+
expected_parameters = metadata if metadata is not None else {}
35+
expected_token_metrics = token_metrics if token_metrics is not None else None
36+
37+
expected_event = _expected_llmobs_llm_span_event(
4838
span,
4939
model_name=span.get_tag("bedrock.request.model"),
5040
model_provider=span.get_tag("bedrock.request.model_provider"),
5141
input_messages=expected_input,
5242
output_messages=[{"content": mock.ANY} for _ in range(n_output)],
5343
metadata=expected_parameters,
54-
token_metrics=token_metrics,
44+
token_metrics=expected_token_metrics,
5545
tags={"service": "aws.bedrock-runtime", "ml_app": "<ml-app-name>"},
5646
)
5747

48+
# If parameters were not explicitly provided, use mock.ANY to match anything
49+
if metadata is None:
50+
expected_event["meta"]["metadata"] = mock.ANY
51+
if token_metrics is None:
52+
expected_event["metrics"] = mock.ANY
53+
54+
return expected_event
55+
5856
@classmethod
5957
def _test_llmobs_invoke(cls, provider, bedrock_client, mock_tracer, llmobs_events, cassette_name=None, n_output=1):
6058
if cassette_name is None:
6159
cassette_name = "%s_invoke.yaml" % provider
6260
body = _REQUEST_BODIES[provider]
61+
expected_metadata = None
62+
6363
if provider == "cohere":
6464
body = {
6565
"prompt": "\n\nHuman: %s\n\nAssistant: Can you explain what a LLM chain is?",
@@ -71,6 +71,8 @@ def _test_llmobs_invoke(cls, provider, bedrock_client, mock_tracer, llmobs_event
7171
"stream": False,
7272
"num_generations": n_output,
7373
}
74+
expected_metadata = {"temperature": 0.9, "max_tokens": 10}
75+
7476
with get_request_vcr().use_cassette(cassette_name):
7577
body, model = json.dumps(body), _MODELS[provider]
7678
if provider == "anthropic_message":
@@ -82,7 +84,9 @@ def _test_llmobs_invoke(cls, provider, bedrock_client, mock_tracer, llmobs_event
8284
span = mock_tracer.pop_traces()[0][0]
8385

8486
assert len(llmobs_events) == 1
85-
assert llmobs_events[0] == cls.expected_llmobs_span_event(span, n_output, message="message" in provider)
87+
assert llmobs_events[0] == cls.expected_llmobs_span_event(
88+
span, n_output, message="message" in provider, metadata=expected_metadata
89+
)
8690
LLMObs.disable()
8791

8892
@classmethod
@@ -92,6 +96,8 @@ def _test_llmobs_invoke_stream(
9296
if cassette_name is None:
9397
cassette_name = "%s_invoke_stream.yaml" % provider
9498
body = _REQUEST_BODIES[provider]
99+
expected_metadata = None
100+
95101
if provider == "cohere":
96102
body = {
97103
"prompt": "\n\nHuman: %s\n\nAssistant: Can you explain what a LLM chain is?",
@@ -103,6 +109,8 @@ def _test_llmobs_invoke_stream(
103109
"stream": True,
104110
"num_generations": n_output,
105111
}
112+
expected_metadata = {"temperature": 0.9, "max_tokens": 10}
113+
106114
with get_request_vcr().use_cassette(cassette_name):
107115
body, model = json.dumps(body), _MODELS[provider]
108116
response = bedrock_client.invoke_model_with_response_stream(body=body, modelId=model)
@@ -111,7 +119,9 @@ def _test_llmobs_invoke_stream(
111119
span = mock_tracer.pop_traces()[0][0]
112120

113121
assert len(llmobs_events) == 1
114-
assert llmobs_events[0] == cls.expected_llmobs_span_event(span, n_output, message="message" in provider)
122+
assert llmobs_events[0] == cls.expected_llmobs_span_event(
123+
span, n_output, message="message" in provider, metadata=expected_metadata
124+
)
115125

116126
def test_llmobs_ai21_invoke(self, ddtrace_global_config, bedrock_client, mock_tracer, llmobs_events):
117127
self._test_llmobs_invoke("ai21", bedrock_client, mock_tracer, llmobs_events)
@@ -216,16 +226,15 @@ def test_llmobs_error(self, ddtrace_global_config, bedrock_client, mock_tracer,
216226
json.loads(response.get("body").read())
217227
span = mock_tracer.pop_traces()[0][0]
218228

229+
metadata = mock.ANY
230+
219231
assert len(llmobs_events) == 1
220232
assert llmobs_events[0] == _expected_llmobs_llm_span_event(
221233
span,
222234
model_name=span.get_tag("bedrock.request.model"),
223235
model_provider=span.get_tag("bedrock.request.model_provider"),
224236
input_messages=[{"content": mock.ANY}],
225-
metadata={
226-
"temperature": float(span.get_tag("bedrock.request.temperature")),
227-
"max_tokens": int(span.get_tag("bedrock.request.max_tokens")),
228-
},
237+
metadata=metadata,
229238
output_messages=[{"content": ""}],
230239
error=span.get_tag("error.type"),
231240
error_message=span.get_tag("error.message"),
@@ -647,11 +656,8 @@ def test_llmobs_converse_tool_result_json_non_text_or_json(
647656
)
648657
class TestLLMObsBedrockProxy:
649658
@staticmethod
650-
def expected_llmobs_span_event_proxy(span, n_output, message=False):
651-
if span.get_tag("bedrock.request.temperature"):
652-
expected_parameters = {"temperature": float(span.get_tag("bedrock.request.temperature"))}
653-
if span.get_tag("bedrock.request.max_tokens"):
654-
expected_parameters["max_tokens"] = int(span.get_tag("bedrock.request.max_tokens"))
659+
def expected_llmobs_span_event_proxy(span, n_output, message=False, metadata=None):
660+
expected_parameters = metadata if metadata is not None else mock.ANY
655661
return _expected_llmobs_non_llm_span_event(
656662
span,
657663
span_kind="workflow",

tests/snapshots/tests.contrib.botocore.test_bedrock.test_ai21_invoke.json

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,29 +11,18 @@
1111
"meta": {
1212
"_dd.base_service": "tests.contrib.botocore",
1313
"_dd.p.dm": "-0",
14-
"_dd.p.tid": "659c346700000000",
15-
"bedrock.request.max_tokens": "10",
14+
"_dd.p.tid": "687f905d00000000",
1615
"bedrock.request.model": "j2-mid-v1",
1716
"bedrock.request.model_provider": "ai21",
18-
"bedrock.request.prompt": "Explain like I'm a five-year old: what is a neural network?",
19-
"bedrock.request.stop_sequences": "[]",
20-
"bedrock.request.temperature": "0.9",
21-
"bedrock.request.top_p": "1.0",
22-
"bedrock.response.choices.0.finish_reason": "{'reason': 'length', 'length': 10}",
23-
"bedrock.response.choices.0.text": "\\nA neural network is like a secret recipe that a computer uses to learn how to",
24-
"bedrock.response.duration": "319",
25-
"bedrock.response.id": "1de3312e-48d1-4d7f-8694-733c1c1ea20f",
2617
"language": "python",
27-
"runtime-id": "3dd17f1c810946349e47a84acb56402a"
18+
"runtime-id": "f9bd434e80c74a2e87a2796368dac486"
2819
},
2920
"metrics": {
3021
"_dd.top_level": 1,
3122
"_dd.tracer_kr": 1.0,
3223
"_sampling_priority_v1": 1,
33-
"bedrock.response.usage.completion_tokens": 10,
34-
"bedrock.response.usage.prompt_tokens": 10,
35-
"process_id": 7458
24+
"process_id": 25551
3625
},
37-
"duration": 2112000,
38-
"start": 1704735847029810000
26+
"duration": 2443000,
27+
"start": 1753190493493215000
3928
}]]

tests/snapshots/tests.contrib.botocore.test_bedrock.test_amazon_embedding.json

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,18 @@
1111
"meta": {
1212
"_dd.base_service": "tests.contrib.botocore",
1313
"_dd.p.dm": "-0",
14-
"_dd.p.tid": "662820e400000000",
14+
"_dd.p.tid": "687f909300000000",
1515
"bedrock.request.model": "titan-embed-text-v1",
1616
"bedrock.request.model_provider": "amazon",
17-
"bedrock.request.prompt": "Hello World!",
18-
"bedrock.response.duration": "311",
19-
"bedrock.response.id": "1fd884e0-c9e8-44fa-b736-d31e2f607d54",
2017
"language": "python",
21-
"runtime-id": "a7bb6456241740dea419398d37aa13d2"
18+
"runtime-id": "f9bd434e80c74a2e87a2796368dac486"
2219
},
2320
"metrics": {
2421
"_dd.top_level": 1,
2522
"_dd.tracer_kr": 1.0,
2623
"_sampling_priority_v1": 1,
27-
"bedrock.response.embedding_length": 1536,
28-
"bedrock.response.usage.prompt_tokens": 3,
29-
"process_id": 60939
24+
"process_id": 25551
3025
},
31-
"duration": 6739000,
32-
"start": 1713905892539987000
26+
"duration": 2984000,
27+
"start": 1753190547196841000
3328
}]]

0 commit comments

Comments
 (0)