Skip to content

Commit 945780b

Browse files
authored
perf: reduce calls to Path.resolve (#14460)
Calls to `Path.resolve` turn out to be quite expensive. We try to avoid them by checking whether a Path object actually needs to be resolved. In many cases, we just need to check whether the path is absolute or a link and then only resolve when necessary. ## Performance Analysis We take Symbol DB as a part of the codebase that exercises the code of interest. The before flame graph shows the CPU cost of calls to `resolve`. The after flame graph shows how they can be avoided in many cases. ### Before <img width="1407" height="460" alt="Screenshot 2025-09-01 at 15 40 16" src="https://github.com/user-attachments/assets/2b4da82c-140d-4ec7-9954-5734ea65c987" /> ### After <img width="1407" height="460" alt="Screenshot 2025-09-01 at 15 39 25" src="https://github.com/user-attachments/assets/e089e59a-260c-44af-9c51-a953ec62465b" /> ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
1 parent ebd1e38 commit 945780b

File tree

8 files changed

+65
-23
lines changed

8 files changed

+65
-23
lines changed

ddtrace/debugging/_function/discovery.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from collections import defaultdict
22
from collections import deque
3-
from pathlib import Path
43
from types import CodeType
54
from types import FunctionType
65
from types import ModuleType
@@ -24,6 +23,7 @@
2423
from ddtrace.internal.utils.inspection import collect_code_objects
2524
from ddtrace.internal.utils.inspection import functions_for_code
2625
from ddtrace.internal.utils.inspection import linenos
26+
from ddtrace.internal.utils.inspection import resolved_code_origin
2727
from ddtrace.internal.utils.inspection import undecorated
2828

2929

@@ -209,7 +209,7 @@ def _collect_functions(module: ModuleType) -> Dict[str, _FunctionCodePair]:
209209

210210
for name in (k, local_name) if isinstance(k, str) and k != local_name else (local_name,):
211211
fullname = ".".join((c.__fullname__, name)) if c.__fullname__ else name
212-
if fullname not in functions or Path(code.co_filename).resolve() == path:
212+
if fullname not in functions or resolved_code_origin(code) == path:
213213
# Give precedence to code objects from the module and
214214
# try to retrieve any potentially decorated function so
215215
# that we don't end up returning the decorator function
@@ -285,7 +285,7 @@ def __init__(self, module: ModuleType) -> None:
285285

286286
if (
287287
function not in seen_functions
288-
and Path(cast(FunctionType, function).__code__.co_filename).resolve() == module_path
288+
and resolved_code_origin(cast(FunctionType, function).__code__) == module_path
289289
):
290290
# We only map line numbers for functions that actually belong to
291291
# the module.
@@ -342,7 +342,7 @@ def _resolve_pair(self, pair: _FunctionCodePair, fullname: str) -> FullyNamedFun
342342

343343
code = pair.code
344344
assert code is not None # nosec
345-
f = undecorated(cast(FunctionType, target), cast(str, part), Path(code.co_filename).resolve())
345+
f = undecorated(cast(FunctionType, target), cast(str, part), resolved_code_origin(code))
346346
if not (isinstance(f, FunctionType) and f.__code__ is code):
347347
raise e
348348

ddtrace/debugging/_origin/span.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def from_frame(cls, frame: FrameType) -> "ExitSpanProbe":
102102
ExitSpanProbe,
103103
cls.build(
104104
name=code.co_qualname if sys.version_info >= (3, 11) else code.co_name, # type: ignore[attr-defined]
105-
filename=str(Path(code.co_filename).resolve()),
105+
filename=str(Path(code.co_filename)),
106106
line=frame.f_lineno,
107107
),
108108
)

ddtrace/internal/coverage/code.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from ddtrace.internal.packages import purelib_path
2121
from ddtrace.internal.packages import stdlib_path
2222
from ddtrace.internal.test_visibility.coverage_lines import CoverageLines
23+
from ddtrace.internal.utils.inspection import resolved_code_origin
2324

2425

2526
log = get_logger(__name__)
@@ -317,7 +318,7 @@ def transform(self, code: CodeType, _module: ModuleType) -> CodeType:
317318
if _module is None:
318319
return code
319320

320-
code_path = Path(code.co_filename).resolve()
321+
code_path = resolved_code_origin(code)
321322

322323
if not any(code_path.is_relative_to(include_path) for include_path in self._include_paths):
323324
# Not a code object we want to instrument

ddtrace/internal/module.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -111,21 +111,34 @@ def unregister_post_run_module_hook(hook: ModuleHookType) -> None:
111111
def origin(module: ModuleType) -> t.Optional[Path]:
112112
"""Get the origin source file of the module."""
113113
try:
114-
# DEV: Use object.__getattribute__ to avoid potential side-effects.
115-
orig = Path(object.__getattribute__(module, "__file__")).resolve()
116-
except (AttributeError, TypeError):
117-
# Module is probably only partially initialised, so we look at its
118-
# spec instead
114+
# Do not access __dd_origin__ directly to avoid force-loading lazy
115+
# modules.
116+
return object.__getattribute__(module, "__dd_origin__")
117+
except AttributeError:
119118
try:
120119
# DEV: Use object.__getattribute__ to avoid potential side-effects.
121-
orig = Path(object.__getattribute__(module, "__spec__").origin).resolve()
122-
except (AttributeError, ValueError, TypeError):
123-
orig = None
120+
orig = Path(object.__getattribute__(module, "__file__")).resolve()
121+
except (AttributeError, TypeError):
122+
# Module is probably only partially initialised, so we look at its
123+
# spec instead
124+
try:
125+
# DEV: Use object.__getattribute__ to avoid potential side-effects.
126+
orig = Path(object.__getattribute__(module, "__spec__").origin).resolve()
127+
except (AttributeError, ValueError, TypeError):
128+
orig = None
124129

125-
if orig is not None and orig.is_file():
126-
return orig.with_suffix(".py") if orig.suffix == ".pyc" else orig
130+
if orig is not None and orig.suffix == "pyc":
131+
orig = orig.with_suffix(".py")
127132

128-
return None
133+
if orig is not None:
134+
# If we failed to find a valid origin we don't cache the value and
135+
# try again the next time.
136+
try:
137+
module.__dd_origin__ = orig # type: ignore[attr-defined]
138+
except AttributeError:
139+
pass
140+
141+
return orig
129142

130143

131144
def _resolve(path: Path) -> t.Optional[Path]:

ddtrace/internal/packages.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,21 @@ def _effective_root(rel_path: Path, parent: Path) -> str:
9999
return base if root.is_dir() and (root / "__init__.py").exists() else "/".join(rel_path.parts[:2])
100100

101101

102+
# DEV: Since we can't lock on sys.path, these operations can be racy.
103+
_SYS_PATH_HASH: t.Optional[int] = None
104+
_RESOLVED_SYS_PATH: t.List[Path] = []
105+
106+
107+
def resolve_sys_path() -> t.List[Path]:
108+
global _SYS_PATH_HASH, _RESOLVED_SYS_PATH
109+
110+
if (h := hash(tuple(sys.path))) != _SYS_PATH_HASH:
111+
_SYS_PATH_HASH = h
112+
_RESOLVED_SYS_PATH = [Path(_).resolve() for _ in sys.path]
113+
114+
return _RESOLVED_SYS_PATH
115+
116+
102117
def _root_module(path: Path) -> str:
103118
# Try the most likely prefixes first
104119
for parent_path in (purelib_path, platlib_path):
@@ -112,7 +127,7 @@ def _root_module(path: Path) -> str:
112127
# Try to resolve the root module using sys.path. We keep the shortest
113128
# relative path as the one more likely to give us the root module.
114129
min_relative_path = max_parent_path = None
115-
for parent_path in (Path(_).resolve() for _ in sys.path):
130+
for parent_path in resolve_sys_path():
116131
try:
117132
relative = path.relative_to(parent_path)
118133
if min_relative_path is None or len(relative.parents) < len(min_relative_path.parents):
@@ -240,7 +255,9 @@ def module_to_package(module: ModuleType) -> t.Optional[Distribution]:
240255

241256
@cached(maxsize=256)
242257
def is_stdlib(path: Path) -> bool:
243-
rpath = path.resolve()
258+
rpath = path
259+
if not rpath.is_absolute() or rpath.is_symlink():
260+
rpath = rpath.resolve()
244261

245262
return (rpath.is_relative_to(stdlib_path) or rpath.is_relative_to(platstdlib_path)) and not (
246263
rpath.is_relative_to(purelib_path) or rpath.is_relative_to(platlib_path)

ddtrace/internal/symbol_db/symbols.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from ddtrace.internal.utils.http import connector
3939
from ddtrace.internal.utils.http import multipart
4040
from ddtrace.internal.utils.inspection import linenos
41+
from ddtrace.internal.utils.inspection import resolved_code_origin
4142
from ddtrace.internal.utils.inspection import undecorated
4243
from ddtrace.settings._agent import config as agent_config
4344
from ddtrace.settings.symbol_db import config as symdb_config
@@ -324,7 +325,7 @@ def _(cls, code: CodeType, data: ScopeData, recursive: bool = True):
324325
return None
325326
data.seen.add(code_id)
326327

327-
if Path(code.co_filename).resolve() != data.origin:
328+
if (code_origin := resolved_code_origin(code)) != data.origin:
328329
# Comes from another module.
329330
return None
330331

@@ -338,7 +339,7 @@ def _(cls, code: CodeType, data: ScopeData, recursive: bool = True):
338339
return Scope(
339340
scope_type=ScopeType.CLOSURE, # DEV: Not in the sense of a Python closure.
340341
name=code.co_name,
341-
source_file=str(Path(code.co_filename).resolve()),
342+
source_file=str(code_origin),
342343
start_line=start_line,
343344
end_line=end_line,
344345
symbols=Symbol.from_code(code),

ddtrace/internal/utils/inspection.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from typing import cast
1414

1515
from ddtrace.internal.safety import _isinstance
16+
from ddtrace.internal.utils.cache import cached
1617

1718

1819
@singledispatch
@@ -31,14 +32,23 @@ def _(f: FunctionType) -> Set[int]:
3132
return linenos(f.__code__)
3233

3334

35+
@cached(maxsize=4 << 10)
36+
def _filename_to_resolved_path(filename: str) -> Path:
37+
return Path(filename).resolve()
38+
39+
40+
def resolved_code_origin(code: CodeType) -> Path:
41+
return _filename_to_resolved_path(code.co_filename)
42+
43+
3444
def undecorated(f: FunctionType, name: str, path: Path) -> FunctionType:
3545
# Find the original function object from a decorated function. We use the
3646
# expected function name to guide the search and pick the correct function.
3747
# The recursion is needed in case of multiple decorators. We make it BFS
3848
# to find the function as soon as possible.
3949

4050
def match(g):
41-
return g.__code__.co_name == name and Path(g.__code__.co_filename).resolve() == path
51+
return g.__code__.co_name == name and resolved_code_origin(g.__code__) == path
4252

4353
if _isinstance(f, FunctionType) and match(f):
4454
return f

tests/appsec/iast/_ast/test_ast_patching.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ def test_should_not_iast_patch_if_stdlib(module_name):
412412

413413

414414
def test_module_path_none(caplog):
415-
with caplog.at_level(logging.DEBUG), mock.patch("ddtrace.internal.module.Path.resolve", side_effect=AttributeError):
415+
with caplog.at_level(logging.DEBUG), mock.patch("ddtrace.appsec._iast._ast.ast_patching.origin", return_value=None):
416416
assert ("", None) == astpatch_module(__import__("tests.appsec.iast.fixtures.ast.str.class_str", fromlist=["*"]))
417417
assert (
418418
"iast::instrumentation::ast_patching::compiling::"

0 commit comments

Comments
 (0)