fix(sampling): ensure all tags in a sampling rule are matched (#14097)

mabdinur · web-flow · commit 162d587fb904 · 2025-07-24T22:20:49.000Z
Trace sampling rules now require all specified tags to be present for a match, instead of ignoring missing tags. Additionally, all glob patterns (e.g., *, ?, [ ]) now work with numeric tags, including decimals. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
diff --git a/ddtrace/_trace/sampling_rule.py b/ddtrace/_trace/sampling_rule.py
@@ -111,46 +111,38 @@ def matches(self, span: Span) -> bool:
         :returns: Whether this span matches or not
         :rtype: :obj:`bool`
         """
-        tags_match = self.tags_match(span)
-        return tags_match and self._matches((span.service, span.name, span.resource))
+        return self.tags_match(span) and self._matches((span.service, span.name, span.resource))
 
     def tags_match(self, span: Span) -> bool:
-        tag_match = True
-        if self._tag_value_matchers:
-            tag_match = self.check_tags(span.get_tags(), span.get_metrics())
-        return tag_match
+        if not self._tag_value_matchers:
+            return True
 
-    def check_tags(self, meta, metrics):
-        if meta is None and metrics is None:
+        meta = span._meta or {}
+        metrics = span._metrics or {}
+        if not meta and not metrics:
             return False
 
-        tag_match = False
-        for tag_key in self._tag_value_matchers.keys():
-            value = meta.get(tag_key)
-            # it's because we're not checking metrics first before continuing
+        for tag_key, pattern in self._tag_value_matchers.items():
+            value = meta.get(tag_key, metrics.get(tag_key))
             if value is None:
-                value = metrics.get(tag_key)
-                if value is None:
-                    continue
-                # Floats: Matching floating point values with a non-zero decimal part is not supported.
-                # For floating point values with a non-zero decimal part, any all * pattern always returns true.
-                # Other patterns always return false.
-                if isinstance(value, float):
-                    if not value.is_integer():
-                        if all(c == "*" for c in self._tag_value_matchers[tag_key].pattern):
-                            tag_match = True
-                            continue
-                        else:
-                            return False
-                    else:
-                        value = int(value)
-
-            tag_match = self._tag_value_matchers[tag_key].match(str(value))
-            # if we don't match with all specified tags for a rule, it's not a match
-            if tag_match is False:
+                # If the tag is not present, we failed the match
+                # (Metrics and meta do not support the value None)
+                return False
+
+            if isinstance(value, float):
+                # Floats: Convert floats that represent integers to int for matching. This is because
+                # SamplingRules only support integers for matfching or glob patterns.
+                if value.is_integer():
+                    value = int(value)
+                elif set(pattern.pattern) - {"?", "*"}:
+                    # Only match floats to patterns that only contain wildcards (ex: * or ?*)
+                    # This is because we do not want to match floats to patterns like `23.*`.
+                    return False
+
+            if not pattern.match(str(value)):
                 return False
 
-        return tag_match
+        return True
 
     def sample(self, span):
         """
diff --git a/releasenotes/notes/fix-sampling-on-tags-17b90f67a74b91e9.yaml b/releasenotes/notes/fix-sampling-on-tags-17b90f67a74b91e9.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+  - |
+    sampling: Trace sampling rules now require all specified tags to be present for a match, instead of ignoring missing tags.
+    Additionally, all glob pattern that do not contain digits (e.g., *, ?, [ ]) now work with numeric tags, including decimals.
diff --git a/tests/integration/test_sampling.py b/tests/integration/test_sampling.py
@@ -243,10 +243,33 @@ def test_extended_sampling_tags_and_name_glob():
 
 
 @pytest.mark.snapshot()
-@pytest.mark.subprocess(env={"DD_TRACE_SAMPLING_RULES": json.dumps([{"sample_rate": 0, "tags": {"tag": "2*"}}])})
+@pytest.mark.subprocess(
+    env={
+        "DD_TRACE_SAMPLING_RULES": json.dumps(
+            [{"sample_rate": 0, "service": "mycoolservice", "tags": {"tag1": "monkey", "tag2": "banana"}}]
+        )
+    }
+)
+def test_extended_sampling_tags_partial_match():
+    """
+    For a span to match a sampling rule it must contain all the tags listed in the rule.
+    Partial matches are not allowed.
+    """
+    from ddtrace.trace import tracer
+
+    with tracer.trace(name="should_send", service="mycoolservice") as span:
+        span.set_tag("tag1", "monkey")
+
+    with tracer.trace(name="should_not_send", service="mycoolservice") as span:
+        span.set_tag("tag1", "monkey")
+        span.set_tag("tag2", "banana")
+
+
+@pytest.mark.snapshot()
+@pytest.mark.subprocess(env={"DD_TRACE_SAMPLING_RULES": json.dumps([{"sample_rate": 0, "tags": {"tag1": "2*"}}])})
 def test_extended_sampling_float_special_case_do_not_match():
-    """A float with a non-zero decimal and a tag with a non-* pattern
-    # should not match the rule, and should therefore be kept
+    """A float with a non-zero decimal and a tag with a pattern
+    that contains a digit should not match the rule, and should therefore be kept.
     """
     from ddtrace.trace import tracer
 
@@ -255,16 +278,32 @@ def test_extended_sampling_float_special_case_do_not_match():
 
 
 @pytest.mark.snapshot()
-@pytest.mark.subprocess(env={"DD_TRACE_SAMPLING_RULES": json.dumps([{"sample_rate": 0, "tags": {"tag": "*"}}])})
+@pytest.mark.subprocess(
+    env={
+        "DD_TRACE_SAMPLING_RULES": json.dumps(
+            [
+                {"sample_rate": 0, "tags": {"tag": "*"}},
+                {"sample_rate": 0, "tags": {"tag2": "?*"}},
+                {"sample_rate": 0, "tags": {"tag3": "**"}},
+            ]
+        )
+    }
+)
 def test_extended_sampling_float_special_case_match_star():
-    """A float with a non-zero decimal and a tag with a * pattern
-    # should match the rule, and should therefore should be dropped
+    """A float with a non-zero decimal and a tag with a glob pattern that does
+    not contain a digit should match the rule and should therefore should be dropped
     """
     from ddtrace.trace import tracer
 
-    with tracer.trace(name="should_send") as span:
+    with tracer.trace(name="should_not_send") as span:
         span.set_tag("tag", 20.1)
 
+    with tracer.trace(name="should_not_send2") as span:
+        span.set_tag("tag2", 22.2)
+
+    with tracer.trace(name="should_not_send3") as span:
+        span.set_tag("tag3", 3333333.33333)
+
 
 @pytest.mark.subprocess()
 def test_rate_limiter_on_spans(tracer):
diff --git a/tests/snapshots/tests.integration.test_sampling.test_extended_sampling_float_special_case_match_star.json b/tests/snapshots/tests.integration.test_sampling.test_extended_sampling_float_special_case_match_star.json
@@ -1,27 +1,81 @@
 [[
   {
-    "name": "should_send",
+    "name": "should_not_send",
     "service": "ddtrace_subprocess_dir",
-    "resource": "should_send",
+    "resource": "should_not_send",
     "trace_id": 0,
     "span_id": 1,
     "parent_id": 0,
     "type": "",
     "error": 0,
     "meta": {
       "_dd.p.dm": "-3",
-      "_dd.p.tid": "6600aa0700000000",
+      "_dd.p.tid": "68824df100000000",
       "language": "python",
-      "runtime-id": "8eef084f89104256b5f82b95c235139c"
+      "runtime-id": "79453d6c5643438bbdf8e2c1d9548877"
     },
     "metrics": {
       "_dd.rule_psr": 0.0,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": -1,
-      "process_id": 53744,
+      "process_id": 65903,
       "tag": 20.1
     },
-    "duration": 25000,
-    "start": 1711319559844282297
+    "duration": 22000,
+    "start": 1753370097140066000
+  }],
+[
+  {
+    "name": "should_not_send2",
+    "service": "ddtrace_subprocess_dir",
+    "resource": "should_not_send2",
+    "trace_id": 1,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-3",
+      "_dd.p.tid": "68824df100000000",
+      "language": "python",
+      "runtime-id": "79453d6c5643438bbdf8e2c1d9548877"
+    },
+    "metrics": {
+      "_dd.rule_psr": 0.0,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": -1,
+      "process_id": 65903,
+      "tag2": 22.2
+    },
+    "duration": 14000,
+    "start": 1753370097140825000
+  }],
+[
+  {
+    "name": "should_not_send3",
+    "service": "ddtrace_subprocess_dir",
+    "resource": "should_not_send3",
+    "trace_id": 2,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-3",
+      "_dd.p.tid": "68824df100000000",
+      "language": "python",
+      "runtime-id": "79453d6c5643438bbdf8e2c1d9548877"
+    },
+    "metrics": {
+      "_dd.rule_psr": 0.0,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": -1,
+      "process_id": 65903,
+      "tag3": 3333333.33333
+    },
+    "duration": 9000,
+    "start": 1753370097140891000
   }]]
diff --git a/tests/snapshots/tests.integration.test_sampling.test_extended_sampling_tags_partial_match.json b/tests/snapshots/tests.integration.test_sampling.test_extended_sampling_tags_partial_match.json
@@ -0,0 +1,56 @@
+[[
+  {
+    "name": "should_send",
+    "service": "mycoolservice",
+    "resource": "should_send",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.base_service": "ddtrace_subprocess_dir",
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "6881b20100000000",
+      "language": "python",
+      "runtime-id": "e5827348f1274e43a58dc88d9dc65596",
+      "tag1": "monkey"
+    },
+    "metrics": {
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 51177
+    },
+    "duration": 21000,
+    "start": 1753330177405084000
+  }],
+[
+  {
+    "name": "should_not_send",
+    "service": "mycoolservice",
+    "resource": "should_not_send",
+    "trace_id": 1,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.base_service": "ddtrace_subprocess_dir",
+      "_dd.p.dm": "-3",
+      "_dd.p.tid": "6881b20100000000",
+      "language": "python",
+      "runtime-id": "e5827348f1274e43a58dc88d9dc65596",
+      "tag1": "monkey",
+      "tag2": "banana"
+    },
+    "metrics": {
+      "_dd.rule_psr": 0.0,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": -1,
+      "process_id": 51177
+    },
+    "duration": 15000,
+    "start": 1753330177405259000
+  }]]

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +fixes:
 +  - |
 +    sampling: Trace sampling rules now require all specified tags to be present for a match, instead of ignoring missing tags.
 +    Additionally, all glob pattern that do not contain digits (e.g., *, ?, [ ]) now work with numeric tags, including decimals.