diff --git a/MIGRATION_GUIDE.md b/MIGRATION_GUIDE.md index 0cc375b4bd..cb96c87ac2 100644 --- a/MIGRATION_GUIDE.md +++ b/MIGRATION_GUIDE.md @@ -19,7 +19,10 @@ Looking to upgrade from Sentry SDK 2.x to 3.x? Here's a comprehensive list of wh - The `Span()` constructor does not accept a `hub` parameter anymore. - The `sentry_sdk.Scope()` constructor no longer accepts a `client` parameter. - `Span.finish()` does not accept a `hub` parameter anymore. -- `Span.finish()` no longer returns the `event_id` if the event is sent to sentry. +- `Span.finish()` no longer returns the `event_id` if the event is sent to Sentry. +- We trim events to a much lesser extent in the SDK. Note that your events might still be subject to server-side trimming. +- The default value of `max_request_body_size` was changed to `"always"`, so request bodies will now be included in events by default, regardless of size. +- The `hint` parameter in custom repr processors no longer contains a `remaining_depth` key. - The `Profile()` constructor does not accept a `hub` parameter anymore. - A `Profile` object does not have a `.hub` property anymore. - `MAX_PROFILE_DURATION_NS`, `PROFILE_MINIMUM_SAMPLES`, `Profile`, `Scheduler`, `ThreadScheduler`, `GeventScheduler`, `has_profiling_enabled`, `setup_profiler`, `teardown_profiler` are no longer accessible from `sentry_sdk.profiler`. They're still accessible from `sentry_sdk.profiler.transaction_profiler`. diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 98553d8993..f07f84dc63 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -547,7 +547,6 @@ def _prepare_event( if event is not None: event: Event = serialize( # type: ignore[no-redef] event, - max_request_body_size=self.options.get("max_request_body_size"), max_value_length=self.options.get("max_value_length"), custom_repr=self.options.get("custom_repr"), ) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index b8b0a46d44..13f3c82b62 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -812,7 +812,7 @@ def __init__( http_proxy: Optional[str] = None, https_proxy: Optional[str] = None, ignore_errors: Sequence[Union[type, str]] = [], # noqa: B006 - max_request_body_size: str = "medium", + max_request_body_size: str = "always", socket_options: Optional[List[Tuple[int, int, int | bytes]]] = None, keep_alive: Optional[bool] = None, before_send: Optional[EventProcessor] = None, diff --git a/sentry_sdk/integrations/pure_eval.py b/sentry_sdk/integrations/pure_eval.py index 74cfa5a7c6..4084622dce 100644 --- a/sentry_sdk/integrations/pure_eval.py +++ b/sentry_sdk/integrations/pure_eval.py @@ -128,8 +128,5 @@ def start(n: ast.expr) -> Tuple[int, int]: atok = source.asttokens() expressions.sort(key=closeness, reverse=True) - vars = { - atok.get_text(nodes[0]): value - for nodes, value in expressions[: serializer.MAX_DATABAG_BREADTH] - } + vars = {atok.get_text(nodes[0]): value for nodes, value in expressions} return serializer.serialize(vars, is_vars=True) diff --git a/sentry_sdk/serializer.py b/sentry_sdk/serializer.py index a4a54e757a..5d7a20fd9a 100644 --- a/sentry_sdk/serializer.py +++ b/sentry_sdk/serializer.py @@ -38,11 +38,6 @@ # Bytes are technically not strings in Python 3, but we can serialize them serializable_str_types = (str, bytes, bytearray, memoryview) - -# Maximum depth and breadth of databags. Excess data will be trimmed. If -# max_request_body_size is "always", request bodies won't be trimmed. -MAX_DATABAG_DEPTH = 5 -MAX_DATABAG_BREADTH = 10 CYCLE_MARKER = "" @@ -99,11 +94,9 @@ def serialize(event: Union[Dict[str, Any], Event], **kwargs: Any) -> Dict[str, A The algorithm itself is a recursive graph walk down the data structures it encounters. It has the following responsibilities: - * Trimming databags and keeping them within MAX_DATABAG_BREADTH and MAX_DATABAG_DEPTH. * Calling safe_repr() on objects appropriately to keep them informative and readable in the final payload. * Annotating the payload with the _meta field whenever trimming happens. - :param max_request_body_size: If set to "always", will never trim request bodies. :param max_value_length: The max length to strip strings to, defaults to sentry_sdk.consts.DEFAULT_MAX_VALUE_LENGTH :param is_vars: If we're serializing vars early, we want to repr() things that are JSON-serializable to make their type more apparent. For example, it's useful to see the difference between a unicode-string and a bytestring when viewing a stacktrace. :param custom_repr: A custom repr function that runs before safe_repr on the object to be serialized. If it returns None or throws internally, we will fallback to safe_repr. @@ -113,7 +106,6 @@ def serialize(event: Union[Dict[str, Any], Event], **kwargs: Any) -> Dict[str, A path: List[Segment] = [] meta_stack: List[Dict[str, Any]] = [] - keep_request_bodies: bool = kwargs.pop("max_request_body_size", None) == "always" max_value_length: Optional[int] = kwargs.pop("max_value_length", None) is_vars = kwargs.pop("is_vars", False) custom_repr: Callable[..., Optional[str]] = kwargs.pop("custom_repr", None) @@ -179,11 +171,8 @@ def _is_request_body() -> Optional[bool]: def _serialize_node( obj: Any, is_databag: Optional[bool] = None, - is_request_body: Optional[bool] = None, should_repr_strings: Optional[bool] = None, segment: Optional[Segment] = None, - remaining_breadth: Optional[Union[int, float]] = None, - remaining_depth: Optional[Union[int, float]] = None, ) -> Any: if segment is not None: path.append(segment) @@ -196,10 +185,7 @@ def _serialize_node( return _serialize_node_impl( obj, is_databag=is_databag, - is_request_body=is_request_body, should_repr_strings=should_repr_strings, - remaining_depth=remaining_depth, - remaining_breadth=remaining_breadth, ) except BaseException: capture_internal_exception(sys.exc_info()) @@ -222,10 +208,7 @@ def _flatten_annotated(obj: Any) -> Any: def _serialize_node_impl( obj: Any, is_databag: Optional[bool], - is_request_body: Optional[bool], should_repr_strings: Optional[bool], - remaining_depth: Optional[Union[float, int]], - remaining_breadth: Optional[Union[float, int]], ) -> Any: if isinstance(obj, AnnotatedValue): should_repr_strings = False @@ -235,31 +218,10 @@ def _serialize_node_impl( if is_databag is None: is_databag = _is_databag() - if is_request_body is None: - is_request_body = _is_request_body() - - if is_databag: - if is_request_body and keep_request_bodies: - remaining_depth = float("inf") - remaining_breadth = float("inf") - else: - if remaining_depth is None: - remaining_depth = MAX_DATABAG_DEPTH - if remaining_breadth is None: - remaining_breadth = MAX_DATABAG_BREADTH - obj = _flatten_annotated(obj) - if remaining_depth is not None and remaining_depth <= 0: - _annotate(rem=[["!limit", "x"]]) - if is_databag: - return _flatten_annotated( - strip_string(_safe_repr_wrapper(obj), max_length=max_value_length) - ) - return None - if is_databag and global_repr_processors: - hints = {"memo": memo, "remaining_depth": remaining_depth} + hints = {"memo": memo} for processor in global_repr_processors: result = processor(obj, hints) if result is not NotImplemented: @@ -294,21 +256,12 @@ def _serialize_node_impl( i = 0 for k, v in obj.items(): - if remaining_breadth is not None and i >= remaining_breadth: - _annotate(len=len(obj)) - break - str_k = str(k) v = _serialize_node( v, segment=str_k, should_repr_strings=should_repr_strings, is_databag=is_databag, - is_request_body=is_request_body, - remaining_depth=( - remaining_depth - 1 if remaining_depth is not None else None - ), - remaining_breadth=remaining_breadth, ) rv_dict[str_k] = v i += 1 @@ -321,21 +274,12 @@ def _serialize_node_impl( rv_list = [] for i, v in enumerate(obj): - if remaining_breadth is not None and i >= remaining_breadth: - _annotate(len=len(obj)) - break - rv_list.append( _serialize_node( v, segment=i, should_repr_strings=should_repr_strings, is_databag=is_databag, - is_request_body=is_request_body, - remaining_depth=( - remaining_depth - 1 if remaining_depth is not None else None - ), - remaining_breadth=remaining_breadth, ) ) diff --git a/tests/integrations/bottle/test_bottle.py b/tests/integrations/bottle/test_bottle.py index 1965691d6c..5fc8fa0536 100644 --- a/tests/integrations/bottle/test_bottle.py +++ b/tests/integrations/bottle/test_bottle.py @@ -7,7 +7,6 @@ from sentry_sdk import capture_message from sentry_sdk.consts import DEFAULT_MAX_VALUE_LENGTH from sentry_sdk.integrations.bottle import BottleIntegration -from sentry_sdk.serializer import MAX_DATABAG_BREADTH from sentry_sdk.integrations.logging import LoggingIntegration from werkzeug.test import Client @@ -122,7 +121,7 @@ def index(): def test_large_json_request(sentry_init, capture_events, app, get_client): - sentry_init(integrations=[BottleIntegration()], max_request_body_size="always") + sentry_init(integrations=[BottleIntegration()]) data = {"foo": {"bar": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10)}} @@ -180,7 +179,7 @@ def index(): def test_medium_formdata_request(sentry_init, capture_events, app, get_client): - sentry_init(integrations=[BottleIntegration()], max_request_body_size="always") + sentry_init(integrations=[BottleIntegration()]) data = {"foo": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10)} @@ -242,7 +241,7 @@ def index(): def test_files_and_form(sentry_init, capture_events, app, get_client): - sentry_init(integrations=[BottleIntegration()], max_request_body_size="always") + sentry_init(integrations=[BottleIntegration()]) data = { "foo": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10), @@ -287,11 +286,9 @@ def index(): def test_json_not_truncated_if_max_request_body_size_is_always( sentry_init, capture_events, app, get_client ): - sentry_init(integrations=[BottleIntegration()], max_request_body_size="always") + sentry_init(integrations=[BottleIntegration()]) - data = { - "key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10) - } + data = {"key{}".format(i): "value{}".format(i) for i in range(1000)} @app.route("/", method="POST") def index(): diff --git a/tests/integrations/clickhouse_driver/test_clickhouse_driver.py b/tests/integrations/clickhouse_driver/test_clickhouse_driver.py index 47131f2d3d..20f221fc08 100644 --- a/tests/integrations/clickhouse_driver/test_clickhouse_driver.py +++ b/tests/integrations/clickhouse_driver/test_clickhouse_driver.py @@ -677,7 +677,7 @@ def test_clickhouse_dbapi_breadcrumbs_with_pii(sentry_init, capture_events) -> N "server.address": "localhost", "server.port": 9000, "db.params": {"minv": 150}, - "db.result": [[["370"]], [["'sum(x)'", "'Int64'"]]], + "db.result": [[[370]], [["sum(x)", "Int64"]]], }, "message": "SELECT sum(x) FROM test WHERE x > 150", "type": "default", diff --git a/tests/integrations/falcon/test_falcon.py b/tests/integrations/falcon/test_falcon.py index f972419092..631727a714 100644 --- a/tests/integrations/falcon/test_falcon.py +++ b/tests/integrations/falcon/test_falcon.py @@ -208,7 +208,7 @@ def on_get(self, req, resp): def test_falcon_large_json_request(sentry_init, capture_events): - sentry_init(integrations=[FalconIntegration()], max_request_body_size="always") + sentry_init(integrations=[FalconIntegration()]) data = {"foo": {"bar": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10)}} diff --git a/tests/integrations/flask/test_flask.py b/tests/integrations/flask/test_flask.py index 969fff2379..2127e81db2 100644 --- a/tests/integrations/flask/test_flask.py +++ b/tests/integrations/flask/test_flask.py @@ -29,7 +29,6 @@ ) from sentry_sdk.consts import DEFAULT_MAX_VALUE_LENGTH from sentry_sdk.integrations.logging import LoggingIntegration -from sentry_sdk.serializer import MAX_DATABAG_BREADTH login_manager = LoginManager() @@ -249,9 +248,7 @@ def login(): def test_flask_large_json_request(sentry_init, capture_events, app): - sentry_init( - integrations=[flask_sentry.FlaskIntegration()], max_request_body_size="always" - ) + sentry_init(integrations=[flask_sentry.FlaskIntegration()]) data = {"foo": {"bar": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10)}} @@ -344,9 +341,7 @@ def index(): def test_flask_medium_formdata_request(sentry_init, capture_events, app): - sentry_init( - integrations=[flask_sentry.FlaskIntegration()], max_request_body_size="always" - ) + sentry_init(integrations=[flask_sentry.FlaskIntegration()]) data = {"foo": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10)} @@ -452,9 +447,7 @@ def index(): def test_flask_files_and_form(sentry_init, capture_events, app): - sentry_init( - integrations=[flask_sentry.FlaskIntegration()], max_request_body_size="always" - ) + sentry_init(integrations=[flask_sentry.FlaskIntegration()]) data = { "foo": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10), @@ -497,13 +490,9 @@ def index(): def test_json_not_truncated_if_max_request_body_size_is_always( sentry_init, capture_events, app ): - sentry_init( - integrations=[flask_sentry.FlaskIntegration()], max_request_body_size="always" - ) + sentry_init(integrations=[flask_sentry.FlaskIntegration()]) - data = { - "key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10) - } + data = {"key{}".format(i): "value{}".format(i) for i in range(10**5)} @app.route("/", methods=["POST"]) def index(): diff --git a/tests/integrations/pure_eval/test_pure_eval.py b/tests/integrations/pure_eval/test_pure_eval.py index 497a8768d0..e6e549fda5 100644 --- a/tests/integrations/pure_eval/test_pure_eval.py +++ b/tests/integrations/pure_eval/test_pure_eval.py @@ -2,7 +2,7 @@ import pytest -from sentry_sdk import capture_exception, serializer +from sentry_sdk import capture_exception from sentry_sdk.integrations.pure_eval import PureEvalIntegration @@ -62,6 +62,14 @@ def foo(): "i", "u", "y", + "t", + "r", + "e", + "w", + "q", + "(q, w, e, r, t, y, u, i, o, p, a, s)", + "str((q, w, e, r, t, y, u, i, o, p, a, s))", + "events", ] assert list(frame_vars.keys()) == expected_keys assert frame_vars["namespace.d"] == {"1": "2"} @@ -85,4 +93,4 @@ def foo(): "s", "events", } - assert len(frame_vars) == serializer.MAX_DATABAG_BREADTH + assert len(frame_vars) == 14 diff --git a/tests/integrations/pyramid/test_pyramid.py b/tests/integrations/pyramid/test_pyramid.py index cd200f7f7b..67f9f6269e 100644 --- a/tests/integrations/pyramid/test_pyramid.py +++ b/tests/integrations/pyramid/test_pyramid.py @@ -11,7 +11,6 @@ from sentry_sdk import capture_message, add_breadcrumb from sentry_sdk.consts import DEFAULT_MAX_VALUE_LENGTH from sentry_sdk.integrations.pyramid import PyramidIntegration -from sentry_sdk.serializer import MAX_DATABAG_BREADTH from tests.conftest import unpack_werkzeug_response @@ -157,7 +156,7 @@ def test_transaction_style( def test_large_json_request(sentry_init, capture_events, route, get_client): - sentry_init(integrations=[PyramidIntegration()], max_request_body_size="always") + sentry_init(integrations=[PyramidIntegration()]) data = {"foo": {"bar": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10)}} @@ -211,11 +210,9 @@ def index(request): def test_json_not_truncated_if_max_request_body_size_is_always( sentry_init, capture_events, route, get_client ): - sentry_init(integrations=[PyramidIntegration()], max_request_body_size="always") + sentry_init(integrations=[PyramidIntegration()]) - data = { - "key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10) - } + data = {"key{}".format(i): "value{}".format(i) for i in range(10**5)} @route("/") def index(request): @@ -234,7 +231,7 @@ def index(request): def test_files_and_form(sentry_init, capture_events, route, get_client): - sentry_init(integrations=[PyramidIntegration()], max_request_body_size="always") + sentry_init(integrations=[PyramidIntegration()]) data = { "foo": "a" * (DEFAULT_MAX_VALUE_LENGTH + 10), diff --git a/tests/integrations/starlette/test_starlette.py b/tests/integrations/starlette/test_starlette.py index bf89729b35..bdab710569 100644 --- a/tests/integrations/starlette/test_starlette.py +++ b/tests/integrations/starlette/test_starlette.py @@ -387,37 +387,6 @@ async def test_starletterequestextractor_body_consumed_twice( await extractor.request.body() -@pytest.mark.asyncio -async def test_starletterequestextractor_extract_request_info_too_big(sentry_init): - sentry_init( - send_default_pii=True, - integrations=[StarletteIntegration()], - ) - scope = SCOPE.copy() - scope["headers"] = [ - [b"content-type", b"multipart/form-data; boundary=fd721ef49ea403a6"], - [b"content-length", str(len(BODY_FORM)).encode()], - [b"cookie", b"yummy_cookie=choco; tasty_cookie=strawberry"], - ] - starlette_request = starlette.requests.Request(scope) - - # Mocking async `_receive()` that works in Python 3.7+ - side_effect = [_mock_receive(msg) for msg in FORM_RECEIVE_MESSAGES] - starlette_request._receive = mock.Mock(side_effect=side_effect) - - extractor = StarletteRequestExtractor(starlette_request) - - request_info = await extractor.extract_request_info() - - assert request_info - assert request_info["cookies"] == { - "tasty_cookie": "strawberry", - "yummy_cookie": "choco", - } - # Because request is too big only the AnnotatedValue is extracted. - assert request_info["data"].metadata == {"rem": [["!config", "x"]]} - - @pytest.mark.asyncio async def test_starletterequestextractor_extract_request_info(sentry_init): sentry_init( diff --git a/tests/test_client.py b/tests/test_client.py index 5350450d95..b25faeee9c 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -24,7 +24,6 @@ from sentry_sdk.utils import capture_internal_exception from sentry_sdk.integrations.executing import ExecutingIntegration from sentry_sdk.transport import Transport -from sentry_sdk.serializer import MAX_DATABAG_BREADTH from sentry_sdk.consts import DEFAULT_MAX_BREADCRUMBS, DEFAULT_MAX_VALUE_LENGTH from typing import TYPE_CHECKING @@ -726,7 +725,7 @@ def inner(): assert len(json.dumps(event)) < DEFAULT_MAX_VALUE_LENGTH * 10 -def test_databag_breadth_stripping(sentry_init, capture_events, benchmark): +def test_databag_breadth_no_stripping(sentry_init, capture_events, benchmark): sentry_init() events = capture_events() @@ -743,9 +742,8 @@ def inner(): assert ( len(event["exception"]["values"][0]["stacktrace"]["frames"][0]["vars"]["a"]) - == MAX_DATABAG_BREADTH + == 1000000 ) - assert len(json.dumps(event)) < 10000 def test_chained_exceptions(sentry_init, capture_events): diff --git a/tests/test_scrubber.py b/tests/test_scrubber.py index cc99411778..0e49f099ff 100644 --- a/tests/test_scrubber.py +++ b/tests/test_scrubber.py @@ -234,7 +234,7 @@ def test_recursive_event_scrubber(sentry_init, capture_events): capture_event({"extra": complex_structure}) (event,) = events - assert event["extra"]["deep"]["deeper"][0]["deepest"]["password"] == "'[Filtered]'" + assert event["extra"]["deep"]["deeper"][0]["deepest"]["password"] == "[Filtered]" def test_recursive_scrubber_does_not_override_original(sentry_init, capture_events): diff --git a/tests/test_serializer.py b/tests/test_serializer.py index 2f44ba8a08..d2c018480a 100644 --- a/tests/test_serializer.py +++ b/tests/test_serializer.py @@ -3,7 +3,7 @@ import pytest from sentry_sdk.consts import DEFAULT_MAX_VALUE_LENGTH -from sentry_sdk.serializer import MAX_DATABAG_BREADTH, MAX_DATABAG_DEPTH, serialize +from sentry_sdk.serializer import serialize try: from hypothesis import given @@ -140,32 +140,16 @@ def custom_repr(value): assert "Foo object" in result["foo"] -def test_trim_databag_breadth(body_normalizer): - data = { - "key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10) - } +def test_dont_trim_databag_breadth(body_normalizer): + data = {"key{}".format(i): "value{}".format(i) for i in range(10**5)} result = body_normalizer(data) - assert len(result) == MAX_DATABAG_BREADTH + assert len(result) == 10**5 for key, value in result.items(): assert data.get(key) == value -def test_no_trimming_if_max_request_body_size_is_always(body_normalizer): - data = { - "key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10) - } - curr = data - for _ in range(MAX_DATABAG_DEPTH + 5): - curr["nested"] = {} - curr = curr["nested"] - - result = body_normalizer(data, max_request_body_size="always") - - assert result == data - - def test_max_value_length_default(body_normalizer): data = {"key": "a" * (DEFAULT_MAX_VALUE_LENGTH * 10)}