Skip to content

[do not merge] Remove databag trimming #4645

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: potel-base
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion MIGRATION_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ Looking to upgrade from Sentry SDK 2.x to 3.x? Here's a comprehensive list of wh
- The `Span()` constructor does not accept a `hub` parameter anymore.
- The `sentry_sdk.Scope()` constructor no longer accepts a `client` parameter.
- `Span.finish()` does not accept a `hub` parameter anymore.
- `Span.finish()` no longer returns the `event_id` if the event is sent to sentry.
- `Span.finish()` no longer returns the `event_id` if the event is sent to Sentry.
- We trim events to a much lesser extent in the SDK. Note that your events might still be subject to server-side trimming.
- The default value of `max_request_body_size` was changed to `"always"`, so request bodies will now be included in events by default, regardless of size.
- The `hint` parameter in custom repr processors no longer contains a `remaining_depth` key.
- The `Profile()` constructor does not accept a `hub` parameter anymore.
- A `Profile` object does not have a `.hub` property anymore.
- `MAX_PROFILE_DURATION_NS`, `PROFILE_MINIMUM_SAMPLES`, `Profile`, `Scheduler`, `ThreadScheduler`, `GeventScheduler`, `has_profiling_enabled`, `setup_profiler`, `teardown_profiler` are no longer accessible from `sentry_sdk.profiler`. They're still accessible from `sentry_sdk.profiler.transaction_profiler`.
Expand Down
1 change: 0 additions & 1 deletion sentry_sdk/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,6 @@ def _prepare_event(
if event is not None:
event: Event = serialize( # type: ignore[no-redef]
event,
max_request_body_size=self.options.get("max_request_body_size"),
max_value_length=self.options.get("max_value_length"),
custom_repr=self.options.get("custom_repr"),
)
Expand Down
2 changes: 1 addition & 1 deletion sentry_sdk/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,7 @@ def __init__(
http_proxy: Optional[str] = None,
https_proxy: Optional[str] = None,
ignore_errors: Sequence[Union[type, str]] = [], # noqa: B006
max_request_body_size: str = "medium",
max_request_body_size: str = "always",
socket_options: Optional[List[Tuple[int, int, int | bytes]]] = None,
keep_alive: Optional[bool] = None,
before_send: Optional[EventProcessor] = None,
Expand Down
5 changes: 1 addition & 4 deletions sentry_sdk/integrations/pure_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,5 @@ def start(n: ast.expr) -> Tuple[int, int]:
atok = source.asttokens()

expressions.sort(key=closeness, reverse=True)
vars = {
atok.get_text(nodes[0]): value
for nodes, value in expressions[: serializer.MAX_DATABAG_BREADTH]
}
vars = {atok.get_text(nodes[0]): value for nodes, value in expressions}
return serializer.serialize(vars, is_vars=True)
58 changes: 1 addition & 57 deletions sentry_sdk/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,6 @@
# Bytes are technically not strings in Python 3, but we can serialize them
serializable_str_types = (str, bytes, bytearray, memoryview)


# Maximum depth and breadth of databags. Excess data will be trimmed. If
# max_request_body_size is "always", request bodies won't be trimmed.
MAX_DATABAG_DEPTH = 5
MAX_DATABAG_BREADTH = 10
CYCLE_MARKER = "<cyclic>"


Expand Down Expand Up @@ -99,11 +94,9 @@ def serialize(event: Union[Dict[str, Any], Event], **kwargs: Any) -> Dict[str, A
The algorithm itself is a recursive graph walk down the data structures it encounters.

It has the following responsibilities:
* Trimming databags and keeping them within MAX_DATABAG_BREADTH and MAX_DATABAG_DEPTH.
* Calling safe_repr() on objects appropriately to keep them informative and readable in the final payload.
* Annotating the payload with the _meta field whenever trimming happens.

:param max_request_body_size: If set to "always", will never trim request bodies.
:param max_value_length: The max length to strip strings to, defaults to sentry_sdk.consts.DEFAULT_MAX_VALUE_LENGTH
:param is_vars: If we're serializing vars early, we want to repr() things that are JSON-serializable to make their type more apparent. For example, it's useful to see the difference between a unicode-string and a bytestring when viewing a stacktrace.
:param custom_repr: A custom repr function that runs before safe_repr on the object to be serialized. If it returns None or throws internally, we will fallback to safe_repr.
Expand All @@ -113,7 +106,6 @@ def serialize(event: Union[Dict[str, Any], Event], **kwargs: Any) -> Dict[str, A
path: List[Segment] = []
meta_stack: List[Dict[str, Any]] = []

keep_request_bodies: bool = kwargs.pop("max_request_body_size", None) == "always"
max_value_length: Optional[int] = kwargs.pop("max_value_length", None)
is_vars = kwargs.pop("is_vars", False)
custom_repr: Callable[..., Optional[str]] = kwargs.pop("custom_repr", None)
Expand Down Expand Up @@ -179,11 +171,8 @@ def _is_request_body() -> Optional[bool]:
def _serialize_node(
obj: Any,
is_databag: Optional[bool] = None,
is_request_body: Optional[bool] = None,
should_repr_strings: Optional[bool] = None,
segment: Optional[Segment] = None,
remaining_breadth: Optional[Union[int, float]] = None,
remaining_depth: Optional[Union[int, float]] = None,
) -> Any:
if segment is not None:
path.append(segment)
Expand All @@ -196,10 +185,7 @@ def _serialize_node(
return _serialize_node_impl(
obj,
is_databag=is_databag,
is_request_body=is_request_body,
should_repr_strings=should_repr_strings,
remaining_depth=remaining_depth,
remaining_breadth=remaining_breadth,
)
except BaseException:
capture_internal_exception(sys.exc_info())
Expand All @@ -222,10 +208,7 @@ def _flatten_annotated(obj: Any) -> Any:
def _serialize_node_impl(
obj: Any,
is_databag: Optional[bool],
is_request_body: Optional[bool],
should_repr_strings: Optional[bool],
remaining_depth: Optional[Union[float, int]],
remaining_breadth: Optional[Union[float, int]],
) -> Any:
if isinstance(obj, AnnotatedValue):
should_repr_strings = False
Expand All @@ -235,31 +218,10 @@ def _serialize_node_impl(
if is_databag is None:
is_databag = _is_databag()

if is_request_body is None:
is_request_body = _is_request_body()

if is_databag:
if is_request_body and keep_request_bodies:
remaining_depth = float("inf")
remaining_breadth = float("inf")
else:
if remaining_depth is None:
remaining_depth = MAX_DATABAG_DEPTH
if remaining_breadth is None:
remaining_breadth = MAX_DATABAG_BREADTH

obj = _flatten_annotated(obj)

if remaining_depth is not None and remaining_depth <= 0:
_annotate(rem=[["!limit", "x"]])
if is_databag:
return _flatten_annotated(
strip_string(_safe_repr_wrapper(obj), max_length=max_value_length)
)
return None

if is_databag and global_repr_processors:
hints = {"memo": memo, "remaining_depth": remaining_depth}
hints = {"memo": memo}
for processor in global_repr_processors:
result = processor(obj, hints)
if result is not NotImplemented:
Expand Down Expand Up @@ -294,21 +256,12 @@ def _serialize_node_impl(
i = 0

for k, v in obj.items():
if remaining_breadth is not None and i >= remaining_breadth:
_annotate(len=len(obj))
break

str_k = str(k)
v = _serialize_node(
v,
segment=str_k,
should_repr_strings=should_repr_strings,
is_databag=is_databag,
is_request_body=is_request_body,
remaining_depth=(
remaining_depth - 1 if remaining_depth is not None else None
),
remaining_breadth=remaining_breadth,
)
rv_dict[str_k] = v
i += 1
Expand All @@ -321,21 +274,12 @@ def _serialize_node_impl(
rv_list = []

for i, v in enumerate(obj):
if remaining_breadth is not None and i >= remaining_breadth:
_annotate(len=len(obj))
break

rv_list.append(
_serialize_node(
v,
segment=i,
should_repr_strings=should_repr_strings,
is_databag=is_databag,
is_request_body=is_request_body,
remaining_depth=(
remaining_depth - 1 if remaining_depth is not None else None
),
remaining_breadth=remaining_breadth,
)
)

Expand Down
13 changes: 5 additions & 8 deletions tests/integrations/bottle/test_bottle.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from sentry_sdk import capture_message
from sentry_sdk.consts import DEFAULT_MAX_VALUE_LENGTH
from sentry_sdk.integrations.bottle import BottleIntegration
from sentry_sdk.serializer import MAX_DATABAG_BREADTH

from sentry_sdk.integrations.logging import LoggingIntegration
from werkzeug.test import Client
Expand Down Expand Up @@ -122,7 +121,7 @@ def index():


def test_large_json_request(sentry_init, capture_events, app, get_client):
sentry_init(integrations=[BottleIntegration()], max_request_body_size="always")
sentry_init(integrations=[BottleIntegration()])

data = {"foo": {"bar": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10)}}

Expand Down Expand Up @@ -180,7 +179,7 @@ def index():


def test_medium_formdata_request(sentry_init, capture_events, app, get_client):
sentry_init(integrations=[BottleIntegration()], max_request_body_size="always")
sentry_init(integrations=[BottleIntegration()])

data = {"foo": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10)}

Expand Down Expand Up @@ -242,7 +241,7 @@ def index():


def test_files_and_form(sentry_init, capture_events, app, get_client):
sentry_init(integrations=[BottleIntegration()], max_request_body_size="always")
sentry_init(integrations=[BottleIntegration()])

data = {
"foo": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10),
Expand Down Expand Up @@ -287,11 +286,9 @@ def index():
def test_json_not_truncated_if_max_request_body_size_is_always(
sentry_init, capture_events, app, get_client
):
sentry_init(integrations=[BottleIntegration()], max_request_body_size="always")
sentry_init(integrations=[BottleIntegration()])

data = {
"key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10)
}
data = {"key{}".format(i): "value{}".format(i) for i in range(1000)}

@app.route("/", method="POST")
def index():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ def test_clickhouse_dbapi_breadcrumbs_with_pii(sentry_init, capture_events) -> N
"server.address": "localhost",
"server.port": 9000,
"db.params": {"minv": 150},
"db.result": [[["370"]], [["'sum(x)'", "'Int64'"]]],
"db.result": [[[370]], [["sum(x)", "Int64"]]],
},
"message": "SELECT sum(x) FROM test WHERE x > 150",
"type": "default",
Expand Down
2 changes: 1 addition & 1 deletion tests/integrations/falcon/test_falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def on_get(self, req, resp):


def test_falcon_large_json_request(sentry_init, capture_events):
sentry_init(integrations=[FalconIntegration()], max_request_body_size="always")
sentry_init(integrations=[FalconIntegration()])

data = {"foo": {"bar": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10)}}

Expand Down
21 changes: 5 additions & 16 deletions tests/integrations/flask/test_flask.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
)
from sentry_sdk.consts import DEFAULT_MAX_VALUE_LENGTH
from sentry_sdk.integrations.logging import LoggingIntegration
from sentry_sdk.serializer import MAX_DATABAG_BREADTH


login_manager = LoginManager()
Expand Down Expand Up @@ -249,9 +248,7 @@ def login():


def test_flask_large_json_request(sentry_init, capture_events, app):
sentry_init(
integrations=[flask_sentry.FlaskIntegration()], max_request_body_size="always"
)
sentry_init(integrations=[flask_sentry.FlaskIntegration()])

data = {"foo": {"bar": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10)}}

Expand Down Expand Up @@ -344,9 +341,7 @@ def index():


def test_flask_medium_formdata_request(sentry_init, capture_events, app):
sentry_init(
integrations=[flask_sentry.FlaskIntegration()], max_request_body_size="always"
)
sentry_init(integrations=[flask_sentry.FlaskIntegration()])

data = {"foo": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10)}

Expand Down Expand Up @@ -452,9 +447,7 @@ def index():


def test_flask_files_and_form(sentry_init, capture_events, app):
sentry_init(
integrations=[flask_sentry.FlaskIntegration()], max_request_body_size="always"
)
sentry_init(integrations=[flask_sentry.FlaskIntegration()])

data = {
"foo": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10),
Expand Down Expand Up @@ -497,13 +490,9 @@ def index():
def test_json_not_truncated_if_max_request_body_size_is_always(
sentry_init, capture_events, app
):
sentry_init(
integrations=[flask_sentry.FlaskIntegration()], max_request_body_size="always"
)
sentry_init(integrations=[flask_sentry.FlaskIntegration()])

data = {
"key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10)
}
data = {"key{}".format(i): "value{}".format(i) for i in range(10**5)}

@app.route("/", methods=["POST"])
def index():
Expand Down
12 changes: 10 additions & 2 deletions tests/integrations/pure_eval/test_pure_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

from sentry_sdk import capture_exception, serializer
from sentry_sdk import capture_exception
from sentry_sdk.integrations.pure_eval import PureEvalIntegration


Expand Down Expand Up @@ -62,6 +62,14 @@ def foo():
"i",
"u",
"y",
"t",
"r",
"e",
"w",
"q",
"(q, w, e, r, t, y, u, i, o, p, a, s)",
"str((q, w, e, r, t, y, u, i, o, p, a, s))",
"events",
]
assert list(frame_vars.keys()) == expected_keys
assert frame_vars["namespace.d"] == {"1": "2"}
Expand All @@ -85,4 +93,4 @@ def foo():
"s",
"events",
}
assert len(frame_vars) == serializer.MAX_DATABAG_BREADTH
assert len(frame_vars) == 14
11 changes: 4 additions & 7 deletions tests/integrations/pyramid/test_pyramid.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from sentry_sdk import capture_message, add_breadcrumb
from sentry_sdk.consts import DEFAULT_MAX_VALUE_LENGTH
from sentry_sdk.integrations.pyramid import PyramidIntegration
from sentry_sdk.serializer import MAX_DATABAG_BREADTH
from tests.conftest import unpack_werkzeug_response


Expand Down Expand Up @@ -157,7 +156,7 @@ def test_transaction_style(


def test_large_json_request(sentry_init, capture_events, route, get_client):
sentry_init(integrations=[PyramidIntegration()], max_request_body_size="always")
sentry_init(integrations=[PyramidIntegration()])

data = {"foo": {"bar": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10)}}

Expand Down Expand Up @@ -211,11 +210,9 @@ def index(request):
def test_json_not_truncated_if_max_request_body_size_is_always(
sentry_init, capture_events, route, get_client
):
sentry_init(integrations=[PyramidIntegration()], max_request_body_size="always")
sentry_init(integrations=[PyramidIntegration()])

data = {
"key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10)
}
data = {"key{}".format(i): "value{}".format(i) for i in range(10**5)}

@route("/")
def index(request):
Expand All @@ -234,7 +231,7 @@ def index(request):


def test_files_and_form(sentry_init, capture_events, route, get_client):
sentry_init(integrations=[PyramidIntegration()], max_request_body_size="always")
sentry_init(integrations=[PyramidIntegration()])

data = {
"foo": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10),
Expand Down
Loading
Loading