From 1d8bab290c85362213d47aa99d925421908b8558 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 7 Nov 2025 17:26:15 -0500 Subject: [PATCH 01/78] Add initial attempt at adding process related tags on trace payloads. This is still missing memoization and additional tests. --- lib/datadog/core/environment/ext.rb | 7 ++ lib/datadog/core/environment/process.rb | 47 +++++++++++ lib/datadog/tracing/configuration/ext.rb | 1 + lib/datadog/tracing/configuration/settings.rb | 10 +++ .../tracing/transport/trace_formatter.rb | 9 ++ spec/datadog/core/environment/process_spec.rb | 83 +++++++++++++++++++ supported-configurations.json | 3 + 7 files changed, 160 insertions(+) create mode 100644 lib/datadog/core/environment/process.rb create mode 100644 spec/datadog/core/environment/process_spec.rb diff --git a/lib/datadog/core/environment/ext.rb b/lib/datadog/core/environment/ext.rb index 0b01d9fc4fe..6bf062dcba5 100644 --- a/lib/datadog/core/environment/ext.rb +++ b/lib/datadog/core/environment/ext.rb @@ -33,8 +33,15 @@ module Ext LANG_INTERPRETER = "#{RUBY_ENGINE}-#{RUBY_PLATFORM}" LANG_PLATFORM = RUBY_PLATFORM LANG_VERSION = RUBY_VERSION + PROCESS_TYPE = 'script' RUBY_ENGINE = ::RUBY_ENGINE # e.g. 'ruby', 'jruby', 'truffleruby' TAG_ENV = 'env' + TAG_ENTRYPOINT_BASEDIR = "entrypoint.basedir" + TAG_ENTRYPOINT_NAME = "entrypoint.name" + TAG_ENTRYPOINT_WORKDIR = "entrypoint.workdir" + TAG_ENTRYPOINT_TYPE = "entrypoint.type" + TAG_PROCESS_TAGS = "_dd.tags.process" + TAG_SERVER_TYPE = "server.type" TAG_SERVICE = 'service' TAG_VERSION = 'version' diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb new file mode 100644 index 00000000000..f3c06be9a78 --- /dev/null +++ b/lib/datadog/core/environment/process.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true +require_relative 'ext' + +module Datadog + module Core + module Environment + # Retrieves process level information + # TODO: Add memoization + module Process + module_function + + def entrypoint_workdir + File.basename(Dir.pwd) + end + + def entrypoint_type + Core::Environment::Ext::PROCESS_TYPE + end + + def entrypoint_name + File.basename($0.to_s) + end + + def entrypoint_basedir + current_basedir = File.expand_path(File.dirname($0.to_s)) + normalized_basedir = current_basedir.tr(File::SEPARATOR, '/') + normalized_basedir.sub(%r{^/}, '') + end + + def server_type + 'placeholder' + end + + def formatted_process_tags_k1_v1 + return @formatted_process_tags_k1_v1 if defined?(@formatted_process_tags_k1_v1) + tags = [] + tags << "#{Core::Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{entrypoint_workdir}" if entrypoint_workdir + tags << "#{Core::Environment::Ext::TAG_ENTRYPOINT_NAME}:#{entrypoint_name}" if entrypoint_name + tags << "#{Core::Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{entrypoint_basedir}" if entrypoint_basedir + tags << "#{Core::Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{entrypoint_type}" if entrypoint_type + tags << "#{Core::Environment::Ext::TAG_SERVER_TYPE}:#{server_type}" if server_type + @formatted_process_tags_k1_v1 = tags.join(',').freeze + end + end + end + end +end diff --git a/lib/datadog/tracing/configuration/ext.rb b/lib/datadog/tracing/configuration/ext.rb index b86fc662418..1ab17e6a213 100644 --- a/lib/datadog/tracing/configuration/ext.rb +++ b/lib/datadog/tracing/configuration/ext.rb @@ -15,6 +15,7 @@ module Ext ENV_NATIVE_SPAN_EVENTS = 'DD_TRACE_NATIVE_SPAN_EVENTS' ENV_RESOURCE_RENAMING_ENABLED = 'DD_TRACE_RESOURCE_RENAMING_ENABLED' ENV_RESOURCE_RENAMING_ALWAYS_SIMPLIFIED_ENDPOINT = 'DD_TRACE_RESOURCE_RENAMING_ALWAYS_SIMPLIFIED_ENDPOINT' + ENV_EXPERIMENTAL_PROPAGATE_PROCESS_TAGS_ENABLED = 'DD_EXPERIMENTAL_PROPAGATE_PROCESS_TAGS_ENABLED' # @public_api module SpanAttributeSchema diff --git a/lib/datadog/tracing/configuration/settings.rb b/lib/datadog/tracing/configuration/settings.rb index 82f6eb4a32e..f730d1ed438 100644 --- a/lib/datadog/tracing/configuration/settings.rb +++ b/lib/datadog/tracing/configuration/settings.rb @@ -203,6 +203,16 @@ def self.extended(base) o.type :bool end + # Enable experimental process tags propagation. + # + # @default `DD_EXPERIMENTAL_PROPAGATE_PROCESS_TAGS_ENABLED` environment variable, otherwise `false` + # @return [Boolean] + option :experimental_propagate_process_tags_enabled do |o| + o.env Tracing::Configuration::Ext::ENV_EXPERIMENTAL_PROPAGATE_PROCESS_TAGS_ENABLED + o.default false + o.type :bool + end + # Enable 128 bit trace id injected for logging. # # @default `DD_TRACE_128_BIT_TRACEID_LOGGING_ENABLED` environment variable, otherwise `false` diff --git a/lib/datadog/tracing/transport/trace_formatter.rb b/lib/datadog/tracing/transport/trace_formatter.rb index 48e733de951..9422dc33765 100644 --- a/lib/datadog/tracing/transport/trace_formatter.rb +++ b/lib/datadog/tracing/transport/trace_formatter.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative '../../core/environment/identity' +require_relative '../../core/environment/process' require_relative '../../core/environment/socket' require_relative '../../core/environment/git' require_relative '../../core/git/ext' @@ -60,6 +61,7 @@ def format! tag_sampling_priority! tag_profiling_enabled! tag_apm_tracing_disabled! + tag_process_tags! if first_span tag_git_repository_url! @@ -215,6 +217,13 @@ def tag_git_commit_sha! first_span.set_tag(Core::Git::Ext::TAG_COMMIT_SHA, git_commit_sha) end + def tag_process_tags! + return unless trace.experimental_propagate_process_tags_enabled + process_tags = Core::Environment::Process.formatted_process_tags_k1_v1 + return if process_tags.empty? + root_span.set_tag(Core::Environment::Ext::TAG_PROCESS_TAGS, process_tags) + end + private def partial? diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb new file mode 100644 index 00000000000..ca345709b5e --- /dev/null +++ b/spec/datadog/core/environment/process_spec.rb @@ -0,0 +1,83 @@ +require 'spec_helper' +require 'datadog/core/environment/process' +require 'open3' + +RSpec.describe Datadog::Core::Environment::Process do + describe '::entrypoint_workdir' do + subject(:entrypoint_workdir) { described_class.entrypoint_workdir } + + it { is_expected.to be_a_kind_of(String) } + end + + describe '::entrypoint_type' do + subject(:entrypoint_type) { described_class.entrypoint_type } + + it { is_expected.to be_a_kind_of(String) } + it { is_expected.to eq(Datadog::Core::Environment::Ext::PROCESS_TYPE) } + end + + describe '::entrypoint_name' do + subject(:entrypoint_name) { described_class.entrypoint_name } + + it { is_expected.to be_a_kind_of(String) } + end + + describe '::entrypoint_basedir' do + subject(:entrypoint_basedir) { described_class.entrypoint_basedir } + + it { is_expected.to be_a_kind_of(String) } + end + + describe '::server_type' do + subject(:server_type) { described_class.server_type } + + it { is_expected.to be_a_kind_of(String) } + end + + describe 'Scenario: Real applications' do + context 'when running a real Rails application' do + it 'detects Rails process information correctly' do + Dir.mktmpdir do |tmp_dir| + Dir.chdir(tmp_dir) do + Bundler.with_unbundled_env do + skip('rails gem could not be installed') unless system('gem install rails -v 7.0.0') + unless system('rails new test_app --minimal --skip-test --skip-keeps --skip-git --skip-docker') + skip('rails new command failed') + end + end + end + File.open("#{tmp_dir}/test_app/Gemfile", 'a') do |file| + file.puts "gem 'datadog', path: '#{Dir.pwd}', require: false" + end + File.write("#{tmp_dir}/test_app/config/initializers/process_initializer.rb", <<-RUBY) + Rails.application.config.after_initialize do + require 'datadog/core/environment/process' + STDERR.puts "entrypoint_workdir:\#{Datadog::Core::Environment::Process.entrypoint_workdir}" + STDERR.puts "entrypoint_type:\#{Datadog::Core::Environment::Process.entrypoint_type}" + STDERR.puts "entrypoint_name:\#{Datadog::Core::Environment::Process.entrypoint_name}" + STDERR.puts "entrypoint_basedir:\#{Datadog::Core::Environment::Process.entrypoint_basedir}" + STDERR.puts "server_type:\#{Datadog::Core::Environment::Process.server_type}" + STDERR.puts "_dd.tags.process:\#{Datadog::Core::Environment::Process.formatted_process_tags_k1_v1}" + STDERR.flush + Thread.new { sleep 1; Process.kill('TERM', Process.pid)}#{' '} + end + RUBY + Bundler.with_unbundled_env do + Dir.chdir("#{tmp_dir}/test_app") do + _, _, _ = Open3.capture3('bundle install') + _, err, _ = Open3.capture3('bundle exec rails s') + expect(err).to include('entrypoint_workdir:test_app') + expect(err).to include('entrypoint_type:script') + expect(err).to include('entrypoint_name:rails') + basedir_test = tmp_dir.sub(%r{^/}, '') + expect(err).to include("entrypoint_basedir:#{basedir_test}/test_app/bin") + expect(err).to include('server_type:placeholder') + expected_tags = "entrypoint.workdir:test_app,entrypoint.name:rails,entrypoint.basedir:#{basedir_test}/test_app/bin,entrypoint.type:script,server.type:placeholder" + expect(err).to include("_dd.tags.process:#{expected_tags}") + end + end + end + end + end + end +end diff --git a/supported-configurations.json b/supported-configurations.json index a52d162b18b..bf673eb74db 100644 --- a/supported-configurations.json +++ b/supported-configurations.json @@ -109,6 +109,9 @@ "DD_ERROR_TRACKING_HANDLED_ERRORS_INCLUDE": { "version": ["A"] }, + "DD_EXPERIMENTAL_PROPAGATE_PROCESS_TAGS_ENABLED": { + "version": ["A"] + }, "DD_GIT_COMMIT_SHA": { "version": ["A"] }, From 58592a3cfe8468dbaff20eada17d524e1376d9a0 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Mon, 10 Nov 2025 15:36:23 -0500 Subject: [PATCH 02/78] Add test for multiple calls to the formatter tags --- lib/datadog/core/environment/process.rb | 1 - spec/datadog/core/environment/process_spec.rb | 13 +++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index f3c06be9a78..8d9f3ce5605 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -5,7 +5,6 @@ module Datadog module Core module Environment # Retrieves process level information - # TODO: Add memoization module Process module_function diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index ca345709b5e..8649083a294 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -34,6 +34,19 @@ it { is_expected.to be_a_kind_of(String) } end + describe '::formatted_process_tags_k1_v1' do + subject(:formatted_process_tags_k1_v1) { described_class.formatted_process_tags_k1_v1 } + + it { is_expected.to be_a_kind_of(String) } + + it 'returns the same object when called multiple times' do + # Processes are fixed so no need to recompute this on each call + first_call = described_class.formatted_process_tags_k1_v1 + second_call = described_class.formatted_process_tags_k1_v1 + expect(first_call).to equal(second_call) + end + end + describe 'Scenario: Real applications' do context 'when running a real Rails application' do it 'detects Rails process information correctly' do From 7dc9184f576202a5a83a593feb7b261e4e063960 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Mon, 10 Nov 2025 16:29:59 -0500 Subject: [PATCH 03/78] Add tests for trace formatter spec to assert that the first span of the payload has the process tag only when the feature is enabled. --- .../configuration/supported_configurations.rb | 1 + .../tracing/transport/trace_formatter.rb | 4 +- .../tracing/transport/trace_formatter_spec.rb | 53 +++++++++++++++++++ 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/lib/datadog/core/configuration/supported_configurations.rb b/lib/datadog/core/configuration/supported_configurations.rb index 707787510d1..8dd727b2d4b 100644 --- a/lib/datadog/core/configuration/supported_configurations.rb +++ b/lib/datadog/core/configuration/supported_configurations.rb @@ -43,6 +43,7 @@ module Configuration "DD_ENV" => {version: ["A"]}, "DD_ERROR_TRACKING_HANDLED_ERRORS" => {version: ["A"]}, "DD_ERROR_TRACKING_HANDLED_ERRORS_INCLUDE" => {version: ["A"]}, + "DD_EXPERIMENTAL_PROPAGATE_PROCESS_TAGS_ENABLED" => {version: ["A"]}, "DD_GIT_COMMIT_SHA" => {version: ["A"]}, "DD_GIT_REPOSITORY_URL" => {version: ["A"]}, "DD_HEALTH_METRICS_ENABLED" => {version: ["A"]}, diff --git a/lib/datadog/tracing/transport/trace_formatter.rb b/lib/datadog/tracing/transport/trace_formatter.rb index 9422dc33765..51f6cfffebe 100644 --- a/lib/datadog/tracing/transport/trace_formatter.rb +++ b/lib/datadog/tracing/transport/trace_formatter.rb @@ -218,10 +218,10 @@ def tag_git_commit_sha! end def tag_process_tags! - return unless trace.experimental_propagate_process_tags_enabled + return unless Datadog.configuration.tracing.experimental_propagate_process_tags_enabled process_tags = Core::Environment::Process.formatted_process_tags_k1_v1 return if process_tags.empty? - root_span.set_tag(Core::Environment::Ext::TAG_PROCESS_TAGS, process_tags) + first_span.set_tag(Core::Environment::Ext::TAG_PROCESS_TAGS, process_tags) end private diff --git a/spec/datadog/tracing/transport/trace_formatter_spec.rb b/spec/datadog/tracing/transport/trace_formatter_spec.rb index 3c0bb34cd74..223db489ba7 100644 --- a/spec/datadog/tracing/transport/trace_formatter_spec.rb +++ b/spec/datadog/tracing/transport/trace_formatter_spec.rb @@ -237,6 +237,23 @@ end end + shared_examples 'first span with process tags' do + it do + format! + expect(first_span.meta).to include('_dd.tags.process') + expect(first_span.meta['_dd.tags.process']).to eq(Datadog::Core::Environment::Process.formatted_process_tags_k1_v1) + # TODO figure out if we need an assertion for the value, ie + # `"entrypoint.workdir:app,entrypoint.name:rspec,entrypoint.basedir:usr/local/bundle/bin,entrypoint.type:script,server.type:placeholder"` + end + end + + shared_examples 'first span without process tags' do + it do + format! + expect(first_span.meta).to_not include('_dd.tags.process') + end + end + context 'with no root span' do include_context 'no root span' @@ -284,6 +301,18 @@ include_context 'no git metadata' it_behaves_like 'first span with no git metadata' end + + context 'with process tags enabled' do + before do + allow(Datadog.configuration.tracing).to receive(:experimental_propagate_process_tags_enabled).and_return(true) + end + it_behaves_like 'first span with process tags' + end + + context 'without process tags enabled' do + # default is false + it_behaves_like 'first span without process tags' + end end context 'with missing root span' do @@ -333,6 +362,18 @@ include_context 'no git metadata' it_behaves_like 'first span with no git metadata' end + + context 'with process tags enabled' do + before do + allow(Datadog.configuration.tracing).to receive(:experimental_propagate_process_tags_enabled).and_return(true) + end + it_behaves_like 'first span with process tags' + end + + context 'without process tags enabled' do + # default is false + it_behaves_like 'first span without process tags' + end end context 'with a root span' do @@ -384,6 +425,18 @@ include_context 'no git metadata' it_behaves_like 'first span with no git metadata' end + + context 'with process tags enabled' do + before do + allow(Datadog.configuration.tracing).to receive(:experimental_propagate_process_tags_enabled).and_return(true) + end + it_behaves_like 'first span with process tags' + end + + context 'without process tags enabled' do + # default is false + it_behaves_like 'first span without process tags' + end end end end From cad26a6de1709acb625c13bd084d831daf12fb7b Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Mon, 10 Nov 2025 16:39:16 -0500 Subject: [PATCH 04/78] it turns out you cannot just pin things to rails 7 due to newer ruby versions so this fixes that. --- spec/datadog/core/environment/process_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index 8649083a294..6585d17fada 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -53,7 +53,7 @@ Dir.mktmpdir do |tmp_dir| Dir.chdir(tmp_dir) do Bundler.with_unbundled_env do - skip('rails gem could not be installed') unless system('gem install rails -v 7.0.0') + skip('rails gem could not be installed') unless system('gem install rails') unless system('rails new test_app --minimal --skip-test --skip-keeps --skip-git --skip-docker') skip('rails new command failed') end From f31440a54b737b01ea6a19fc9006ca51f53a421b Mon Sep 17 00:00:00 2001 From: wantsui Date: Mon, 10 Nov 2025 16:47:15 -0500 Subject: [PATCH 05/78] Update lib/datadog/core/environment/process.rb Co-authored-by: Marco Costa --- lib/datadog/core/environment/process.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index 8d9f3ce5605..e2d393b5e1d 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -23,7 +23,7 @@ def entrypoint_name def entrypoint_basedir current_basedir = File.expand_path(File.dirname($0.to_s)) normalized_basedir = current_basedir.tr(File::SEPARATOR, '/') - normalized_basedir.sub(%r{^/}, '') + normalized_basedir.delete_prefix!('/') end def server_type From cfec602c0177f60415042a08c42db94811a5fbbd Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Mon, 10 Nov 2025 17:05:34 -0500 Subject: [PATCH 06/78] fix string and rename formatted_process_tags_k1_v1 to serialized --- lib/datadog/core/configuration/settings.rb | 10 ++++++++++ lib/datadog/core/environment/process.rb | 11 ++++++----- lib/datadog/tracing/configuration/settings.rb | 10 ---------- lib/datadog/tracing/transport/trace_formatter.rb | 4 ++-- spec/datadog/core/environment/process_spec.rb | 10 +++++----- .../datadog/tracing/transport/trace_formatter_spec.rb | 8 ++++---- 6 files changed, 27 insertions(+), 26 deletions(-) diff --git a/lib/datadog/core/configuration/settings.rb b/lib/datadog/core/configuration/settings.rb index ad3249ccc55..d443ab93088 100644 --- a/lib/datadog/core/configuration/settings.rb +++ b/lib/datadog/core/configuration/settings.rb @@ -1003,6 +1003,16 @@ def initialize(*_) end end + # Enable experimental process tags propagation. + # + # @default `DD_EXPERIMENTAL_PROPAGATE_PROCESS_TAGS_ENABLED` environment variable, otherwise `false` + # @return [Boolean] + option :experimental_propagate_process_tags_enabled do |o| + o.env 'DD_EXPERIMENTAL_PROPAGATE_PROCESS_TAGS_ENABLED' + o.default false + o.type :bool + end + # Tracer specific configuration starting with APM (e.g. DD_APM_TRACING_ENABLED). # @public_api settings :apm do diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index e2d393b5e1d..59d1b211468 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -17,11 +17,11 @@ def entrypoint_type end def entrypoint_name - File.basename($0.to_s) + File.basename($0) end def entrypoint_basedir - current_basedir = File.expand_path(File.dirname($0.to_s)) + current_basedir = File.expand_path(File.dirname($0)) normalized_basedir = current_basedir.tr(File::SEPARATOR, '/') normalized_basedir.delete_prefix!('/') end @@ -30,15 +30,16 @@ def server_type 'placeholder' end - def formatted_process_tags_k1_v1 - return @formatted_process_tags_k1_v1 if defined?(@formatted_process_tags_k1_v1) + # This method returns a key/value part of serialized tags in the format of k1:v1,k2:v2,k3:v3 + def serialized + return @serialized if defined?(@serialized) tags = [] tags << "#{Core::Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{entrypoint_workdir}" if entrypoint_workdir tags << "#{Core::Environment::Ext::TAG_ENTRYPOINT_NAME}:#{entrypoint_name}" if entrypoint_name tags << "#{Core::Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{entrypoint_basedir}" if entrypoint_basedir tags << "#{Core::Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{entrypoint_type}" if entrypoint_type tags << "#{Core::Environment::Ext::TAG_SERVER_TYPE}:#{server_type}" if server_type - @formatted_process_tags_k1_v1 = tags.join(',').freeze + @serialized = tags.join(',').freeze end end end diff --git a/lib/datadog/tracing/configuration/settings.rb b/lib/datadog/tracing/configuration/settings.rb index f730d1ed438..82f6eb4a32e 100644 --- a/lib/datadog/tracing/configuration/settings.rb +++ b/lib/datadog/tracing/configuration/settings.rb @@ -203,16 +203,6 @@ def self.extended(base) o.type :bool end - # Enable experimental process tags propagation. - # - # @default `DD_EXPERIMENTAL_PROPAGATE_PROCESS_TAGS_ENABLED` environment variable, otherwise `false` - # @return [Boolean] - option :experimental_propagate_process_tags_enabled do |o| - o.env Tracing::Configuration::Ext::ENV_EXPERIMENTAL_PROPAGATE_PROCESS_TAGS_ENABLED - o.default false - o.type :bool - end - # Enable 128 bit trace id injected for logging. # # @default `DD_TRACE_128_BIT_TRACEID_LOGGING_ENABLED` environment variable, otherwise `false` diff --git a/lib/datadog/tracing/transport/trace_formatter.rb b/lib/datadog/tracing/transport/trace_formatter.rb index 51f6cfffebe..0d3b41528b1 100644 --- a/lib/datadog/tracing/transport/trace_formatter.rb +++ b/lib/datadog/tracing/transport/trace_formatter.rb @@ -218,8 +218,8 @@ def tag_git_commit_sha! end def tag_process_tags! - return unless Datadog.configuration.tracing.experimental_propagate_process_tags_enabled - process_tags = Core::Environment::Process.formatted_process_tags_k1_v1 + return unless Datadog.configuration.experimental_propagate_process_tags_enabled + process_tags = Core::Environment::Process.serialized return if process_tags.empty? first_span.set_tag(Core::Environment::Ext::TAG_PROCESS_TAGS, process_tags) end diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index 6585d17fada..aeb0d62cb80 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -34,15 +34,15 @@ it { is_expected.to be_a_kind_of(String) } end - describe '::formatted_process_tags_k1_v1' do - subject(:formatted_process_tags_k1_v1) { described_class.formatted_process_tags_k1_v1 } + describe '::serialized' do + subject(:serialized) { described_class.serialized } it { is_expected.to be_a_kind_of(String) } it 'returns the same object when called multiple times' do # Processes are fixed so no need to recompute this on each call - first_call = described_class.formatted_process_tags_k1_v1 - second_call = described_class.formatted_process_tags_k1_v1 + first_call = described_class.serialized + second_call = described_class.serialized expect(first_call).to equal(second_call) end end @@ -70,7 +70,7 @@ STDERR.puts "entrypoint_name:\#{Datadog::Core::Environment::Process.entrypoint_name}" STDERR.puts "entrypoint_basedir:\#{Datadog::Core::Environment::Process.entrypoint_basedir}" STDERR.puts "server_type:\#{Datadog::Core::Environment::Process.server_type}" - STDERR.puts "_dd.tags.process:\#{Datadog::Core::Environment::Process.formatted_process_tags_k1_v1}" + STDERR.puts "_dd.tags.process:\#{Datadog::Core::Environment::Process.serialized}" STDERR.flush Thread.new { sleep 1; Process.kill('TERM', Process.pid)}#{' '} end diff --git a/spec/datadog/tracing/transport/trace_formatter_spec.rb b/spec/datadog/tracing/transport/trace_formatter_spec.rb index 223db489ba7..f48c1a17d58 100644 --- a/spec/datadog/tracing/transport/trace_formatter_spec.rb +++ b/spec/datadog/tracing/transport/trace_formatter_spec.rb @@ -241,7 +241,7 @@ it do format! expect(first_span.meta).to include('_dd.tags.process') - expect(first_span.meta['_dd.tags.process']).to eq(Datadog::Core::Environment::Process.formatted_process_tags_k1_v1) + expect(first_span.meta['_dd.tags.process']).to eq(Datadog::Core::Environment::Process.serialized) # TODO figure out if we need an assertion for the value, ie # `"entrypoint.workdir:app,entrypoint.name:rspec,entrypoint.basedir:usr/local/bundle/bin,entrypoint.type:script,server.type:placeholder"` end @@ -304,7 +304,7 @@ context 'with process tags enabled' do before do - allow(Datadog.configuration.tracing).to receive(:experimental_propagate_process_tags_enabled).and_return(true) + allow(Datadog.configuration).to receive(:experimental_propagate_process_tags_enabled).and_return(true) end it_behaves_like 'first span with process tags' end @@ -365,7 +365,7 @@ context 'with process tags enabled' do before do - allow(Datadog.configuration.tracing).to receive(:experimental_propagate_process_tags_enabled).and_return(true) + allow(Datadog.configuration).to receive(:experimental_propagate_process_tags_enabled).and_return(true) end it_behaves_like 'first span with process tags' end @@ -428,7 +428,7 @@ context 'with process tags enabled' do before do - allow(Datadog.configuration.tracing).to receive(:experimental_propagate_process_tags_enabled).and_return(true) + allow(Datadog.configuration).to receive(:experimental_propagate_process_tags_enabled).and_return(true) end it_behaves_like 'first span with process tags' end From 8dae7053be47f76d7685d17eac72f752a987175d Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Mon, 10 Nov 2025 17:14:03 -0500 Subject: [PATCH 07/78] remove unneeded line --- lib/datadog/tracing/transport/trace_formatter.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/datadog/tracing/transport/trace_formatter.rb b/lib/datadog/tracing/transport/trace_formatter.rb index 0d3b41528b1..eb32de4de75 100644 --- a/lib/datadog/tracing/transport/trace_formatter.rb +++ b/lib/datadog/tracing/transport/trace_formatter.rb @@ -220,7 +220,6 @@ def tag_git_commit_sha! def tag_process_tags! return unless Datadog.configuration.experimental_propagate_process_tags_enabled process_tags = Core::Environment::Process.serialized - return if process_tags.empty? first_span.set_tag(Core::Environment::Ext::TAG_PROCESS_TAGS, process_tags) end From 055586fb4830398bc46ffc0cce6f0120a9b2b97e Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Mon, 10 Nov 2025 17:16:40 -0500 Subject: [PATCH 08/78] remove server type for now until more research is done --- lib/datadog/core/environment/ext.rb | 1 - lib/datadog/core/environment/process.rb | 5 ----- spec/datadog/core/environment/process_spec.rb | 10 +--------- 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/lib/datadog/core/environment/ext.rb b/lib/datadog/core/environment/ext.rb index 6bf062dcba5..141aead240b 100644 --- a/lib/datadog/core/environment/ext.rb +++ b/lib/datadog/core/environment/ext.rb @@ -41,7 +41,6 @@ module Ext TAG_ENTRYPOINT_WORKDIR = "entrypoint.workdir" TAG_ENTRYPOINT_TYPE = "entrypoint.type" TAG_PROCESS_TAGS = "_dd.tags.process" - TAG_SERVER_TYPE = "server.type" TAG_SERVICE = 'service' TAG_VERSION = 'version' diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index 59d1b211468..04813c97773 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -26,10 +26,6 @@ def entrypoint_basedir normalized_basedir.delete_prefix!('/') end - def server_type - 'placeholder' - end - # This method returns a key/value part of serialized tags in the format of k1:v1,k2:v2,k3:v3 def serialized return @serialized if defined?(@serialized) @@ -38,7 +34,6 @@ def serialized tags << "#{Core::Environment::Ext::TAG_ENTRYPOINT_NAME}:#{entrypoint_name}" if entrypoint_name tags << "#{Core::Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{entrypoint_basedir}" if entrypoint_basedir tags << "#{Core::Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{entrypoint_type}" if entrypoint_type - tags << "#{Core::Environment::Ext::TAG_SERVER_TYPE}:#{server_type}" if server_type @serialized = tags.join(',').freeze end end diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index aeb0d62cb80..3e953069f58 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -28,12 +28,6 @@ it { is_expected.to be_a_kind_of(String) } end - describe '::server_type' do - subject(:server_type) { described_class.server_type } - - it { is_expected.to be_a_kind_of(String) } - end - describe '::serialized' do subject(:serialized) { described_class.serialized } @@ -69,7 +63,6 @@ STDERR.puts "entrypoint_type:\#{Datadog::Core::Environment::Process.entrypoint_type}" STDERR.puts "entrypoint_name:\#{Datadog::Core::Environment::Process.entrypoint_name}" STDERR.puts "entrypoint_basedir:\#{Datadog::Core::Environment::Process.entrypoint_basedir}" - STDERR.puts "server_type:\#{Datadog::Core::Environment::Process.server_type}" STDERR.puts "_dd.tags.process:\#{Datadog::Core::Environment::Process.serialized}" STDERR.flush Thread.new { sleep 1; Process.kill('TERM', Process.pid)}#{' '} @@ -84,8 +77,7 @@ expect(err).to include('entrypoint_name:rails') basedir_test = tmp_dir.sub(%r{^/}, '') expect(err).to include("entrypoint_basedir:#{basedir_test}/test_app/bin") - expect(err).to include('server_type:placeholder') - expected_tags = "entrypoint.workdir:test_app,entrypoint.name:rails,entrypoint.basedir:#{basedir_test}/test_app/bin,entrypoint.type:script,server.type:placeholder" + expected_tags = "entrypoint.workdir:test_app,entrypoint.name:rails,entrypoint.basedir:#{basedir_test}/test_app/bin,entrypoint.type:script" expect(err).to include("_dd.tags.process:#{expected_tags}") end end From cacb5006e34debecab656a2e6951607441c0f798 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 11 Nov 2025 11:24:53 -0500 Subject: [PATCH 09/78] Add new tag normalizer logic following the trace agent. --- lib/datadog/core/environment/process.rb | 16 ++++++-- lib/datadog/core/normalizer.rb | 31 +++++++++++++++ spec/datadog/core/normalizer_spec.rb | 50 +++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 4 deletions(-) create mode 100644 lib/datadog/core/normalizer.rb create mode 100644 spec/datadog/core/normalizer_spec.rb diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index 04813c97773..3c2bea13687 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true require_relative 'ext' +require_relative '../normalizer' module Datadog module Core @@ -26,14 +27,21 @@ def entrypoint_basedir normalized_basedir.delete_prefix!('/') end + # Normalize tag key and value using the Datadog Agent's tag normalization logic + def serialized_kv_helper(key, value) + key = Core::Normalizer.normalize(key) + value = Core::Normalizer.normalize(value) + "#{key}:#{value}" + end + # This method returns a key/value part of serialized tags in the format of k1:v1,k2:v2,k3:v3 def serialized return @serialized if defined?(@serialized) tags = [] - tags << "#{Core::Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{entrypoint_workdir}" if entrypoint_workdir - tags << "#{Core::Environment::Ext::TAG_ENTRYPOINT_NAME}:#{entrypoint_name}" if entrypoint_name - tags << "#{Core::Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{entrypoint_basedir}" if entrypoint_basedir - tags << "#{Core::Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{entrypoint_type}" if entrypoint_type + tags << serialized_kv_helper(Core::Environment::Ext::TAG_ENTRYPOINT_WORKDIR, entrypoint_workdir) if entrypoint_workdir + tags << serialized_kv_helper(Core::Environment::Ext::TAG_ENTRYPOINT_NAME, entrypoint_name) if entrypoint_name + tags << serialized_kv_helper(Core::Environment::Ext::TAG_ENTRYPOINT_BASEDIR, entrypoint_basedir) if entrypoint_basedir + tags << serialized_kv_helper(Core::Environment::Ext::TAG_ENTRYPOINT_TYPE, entrypoint_type) if entrypoint_type @serialized = tags.join(',').freeze end end diff --git a/lib/datadog/core/normalizer.rb b/lib/datadog/core/normalizer.rb new file mode 100644 index 00000000000..82077937309 --- /dev/null +++ b/lib/datadog/core/normalizer.rb @@ -0,0 +1,31 @@ +module Datadog + module Core + module Normalizer + module_function + INVALID_TAG_CHARACTERS = %r{[^a-z0-9_\-:./]}.freeze + + # Based on https://docs.datadoghq.com/getting_started/tagging/#defining-tags + # Currently a reimplementation of the logic in the + # Datadog::Tracing::Metadata::Ext::HTTP::Headers.to_tag method with some additional items + # TODO: Swap out the logic in the Datadog Tracing Metadata headers logic + def self.normalize(original_value) + return "" if original_value.nil? || original_value.to_s.strip.empty? + + # Removes whitespaces + normalized_value = original_value.to_s.strip + # Lower case characters + normalized_value.downcase! + # Invalid characters are replaced with an underscore + normalized_value.gsub!(INVALID_TAG_CHARACTERS, '_') + # Merge consecutive underscores with a single underscore + normalized_value.gsub!(/_+/, '_') + # Remove leading non-letter characters + normalized_value.sub!(/\A[^a-z]+/, "") + # Maximum length is 200 characters + normalized_value = normalized_value[0...200] if normalized_value.length > 200 + + normalized_value + end + end + end +end \ No newline at end of file diff --git a/spec/datadog/core/normalizer_spec.rb b/spec/datadog/core/normalizer_spec.rb new file mode 100644 index 00000000000..be2925ad60b --- /dev/null +++ b/spec/datadog/core/normalizer_spec.rb @@ -0,0 +1,50 @@ +require 'spec_helper' +require 'datadog/core/normalizer' + +RSpec.describe Datadog::Core::Normalizer do + describe '.normalize' do + subject(:normalize) { described_class.normalize(input) } + + context 'keeps normal strings the same' do + let(:input) {'regulartag'} + let(:expected_output) {'regulartag'} + it { is_expected.to eq(expected_output) } + end + + context 'truncates long strings' do + let(:input) {'a' * 201} + let(:expected_output) {'a' * 200} + it { is_expected.to eq(expected_output) } + end + + context 'transforms special characters to underscores' do + let(:input) {'a&**!'} + let(:expected_output) {'a_'} + it { is_expected.to eq(expected_output) } + end + + context 'capital letters are lower cased' do + let(:input) {'A'*10} + let(:expected_output) {'a'*10} + it { is_expected.to eq(expected_output) } + end + + context 'removes whitespaces' do + let(:input) {' hi '} + let(:expected_output) {'hi'} + it { is_expected.to eq(expected_output) } + end + + context 'characters must start with a letter' do + let(:input) {'1hi'} + let(:expected_output) {'hi'} + it { is_expected.to eq(expected_output) } + end + + context 'if none of the characters are valid to start the value, the string is empty' do + let(:input) {'111111111'} + let(:expected_output) {''} + it { is_expected.to eq(expected_output) } + end + end +end \ No newline at end of file From 7661a3f1cd8a5dbd07e46b46023e370640be5b5d Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 11 Nov 2025 13:38:42 -0500 Subject: [PATCH 10/78] lint fix --- lib/datadog/core/environment/process.rb | 1 + lib/datadog/core/normalizer.rb | 9 ++-- spec/datadog/core/environment/process_spec.rb | 2 +- spec/datadog/core/normalizer_spec.rb | 44 +++++++++---------- 4 files changed, 30 insertions(+), 26 deletions(-) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index 3c2bea13687..5326b725bc7 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true + require_relative 'ext' require_relative '../normalizer' diff --git a/lib/datadog/core/normalizer.rb b/lib/datadog/core/normalizer.rb index 82077937309..702813a28c3 100644 --- a/lib/datadog/core/normalizer.rb +++ b/lib/datadog/core/normalizer.rb @@ -1,11 +1,14 @@ +# frozen_string_literal: true + module Datadog module Core module Normalizer module_function + INVALID_TAG_CHARACTERS = %r{[^a-z0-9_\-:./]}.freeze # Based on https://docs.datadoghq.com/getting_started/tagging/#defining-tags - # Currently a reimplementation of the logic in the + # Currently a reimplementation of the logic in the # Datadog::Tracing::Metadata::Ext::HTTP::Headers.to_tag method with some additional items # TODO: Swap out the logic in the Datadog Tracing Metadata headers logic def self.normalize(original_value) @@ -18,7 +21,7 @@ def self.normalize(original_value) # Invalid characters are replaced with an underscore normalized_value.gsub!(INVALID_TAG_CHARACTERS, '_') # Merge consecutive underscores with a single underscore - normalized_value.gsub!(/_+/, '_') + normalized_value.squeeze!('_') # Remove leading non-letter characters normalized_value.sub!(/\A[^a-z]+/, "") # Maximum length is 200 characters @@ -28,4 +31,4 @@ def self.normalize(original_value) end end end -end \ No newline at end of file +end diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index 3e953069f58..6698af87222 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -65,7 +65,7 @@ STDERR.puts "entrypoint_basedir:\#{Datadog::Core::Environment::Process.entrypoint_basedir}" STDERR.puts "_dd.tags.process:\#{Datadog::Core::Environment::Process.serialized}" STDERR.flush - Thread.new { sleep 1; Process.kill('TERM', Process.pid)}#{' '} + Thread.new { sleep 1; Process.kill('TERM', Process.pid)}#{" "} end RUBY Bundler.with_unbundled_env do diff --git a/spec/datadog/core/normalizer_spec.rb b/spec/datadog/core/normalizer_spec.rb index be2925ad60b..1ab3f784750 100644 --- a/spec/datadog/core/normalizer_spec.rb +++ b/spec/datadog/core/normalizer_spec.rb @@ -6,45 +6,45 @@ subject(:normalize) { described_class.normalize(input) } context 'keeps normal strings the same' do - let(:input) {'regulartag'} - let(:expected_output) {'regulartag'} - it { is_expected.to eq(expected_output) } + let(:input) { 'regulartag' } + let(:expected_output) { 'regulartag' } + it { is_expected.to eq(expected_output) } end context 'truncates long strings' do - let(:input) {'a' * 201} - let(:expected_output) {'a' * 200} - it { is_expected.to eq(expected_output) } + let(:input) { 'a' * 201 } + let(:expected_output) { 'a' * 200 } + it { is_expected.to eq(expected_output) } end context 'transforms special characters to underscores' do - let(:input) {'a&**!'} - let(:expected_output) {'a_'} - it { is_expected.to eq(expected_output) } + let(:input) { 'a&**!' } + let(:expected_output) { 'a_' } + it { is_expected.to eq(expected_output) } end context 'capital letters are lower cased' do - let(:input) {'A'*10} - let(:expected_output) {'a'*10} - it { is_expected.to eq(expected_output) } + let(:input) { 'A' * 10 } + let(:expected_output) { 'a' * 10 } + it { is_expected.to eq(expected_output) } end context 'removes whitespaces' do - let(:input) {' hi '} - let(:expected_output) {'hi'} - it { is_expected.to eq(expected_output) } + let(:input) { ' hi ' } + let(:expected_output) { 'hi' } + it { is_expected.to eq(expected_output) } end context 'characters must start with a letter' do - let(:input) {'1hi'} - let(:expected_output) {'hi'} - it { is_expected.to eq(expected_output) } + let(:input) { '1hi' } + let(:expected_output) { 'hi' } + it { is_expected.to eq(expected_output) } end context 'if none of the characters are valid to start the value, the string is empty' do - let(:input) {'111111111'} - let(:expected_output) {''} - it { is_expected.to eq(expected_output) } + let(:input) { '111111111' } + let(:expected_output) { '' } + it { is_expected.to eq(expected_output) } end end -end \ No newline at end of file +end From 7825940c10cf82e3e911040cbef4180b375ea8a9 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 11 Nov 2025 13:41:11 -0500 Subject: [PATCH 11/78] add missing files from prototype command --- sig/datadog/core/environment/process.rbs | 19 +++++++++++++++++++ sig/datadog/core/normalizer.rbs | 8 ++++++++ 2 files changed, 27 insertions(+) create mode 100644 sig/datadog/core/environment/process.rbs create mode 100644 sig/datadog/core/normalizer.rbs diff --git a/sig/datadog/core/environment/process.rbs b/sig/datadog/core/environment/process.rbs new file mode 100644 index 00000000000..68482bcbf95 --- /dev/null +++ b/sig/datadog/core/environment/process.rbs @@ -0,0 +1,19 @@ +module Datadog + module Core + module Environment + module Process + @serialized: untyped + + def self?.entrypoint_workdir: () -> untyped + + def self?.entrypoint_type: () -> untyped + + def self?.entrypoint_name: () -> untyped + + def self?.entrypoint_basedir: () -> untyped + def self?.serialized_kv_helper: (untyped key, untyped value) -> ::String + def self?.serialized: () -> untyped + end + end + end +end diff --git a/sig/datadog/core/normalizer.rbs b/sig/datadog/core/normalizer.rbs new file mode 100644 index 00000000000..4e5d73e5034 --- /dev/null +++ b/sig/datadog/core/normalizer.rbs @@ -0,0 +1,8 @@ +module Datadog + module Core + module Normalizer + INVALID_TAG_CHARACTERS: ::Regexp + def self.normalize: (untyped original_value) -> ("" | untyped) + end + end +end From 5de6efdf9f83b5e5a3aa72d41d90fefbaa062f67 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 11 Nov 2025 13:47:36 -0500 Subject: [PATCH 12/78] Add missing constants to ext rbs file --- sig/datadog/core/environment/ext.rbs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sig/datadog/core/environment/ext.rbs b/sig/datadog/core/environment/ext.rbs index 2d5fe0a9851..4527eca6d21 100644 --- a/sig/datadog/core/environment/ext.rbs +++ b/sig/datadog/core/environment/ext.rbs @@ -37,6 +37,18 @@ module Datadog TAG_SERVICE: String TAG_VERSION: String + + PROCESS_TYPE: String + + TAG_ENTRYPOINT_BASEDIR: String + + TAG_ENTRYPOINT_NAME: String + + TAG_ENTRYPOINT_WORKDIR: String + + TAG_ENTRYPOINT_TYPE: String + + TAG_PROCESS_TAGS: String end end end From f5ca84a413f97a66e29138ef5fc49bc2ddeb18a7 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 11 Nov 2025 15:23:45 -0500 Subject: [PATCH 13/78] jruby fix for the process spec --- spec/datadog/core/environment/process_spec.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index 6698af87222..4e65be52ab8 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -54,6 +54,7 @@ end end File.open("#{tmp_dir}/test_app/Gemfile", 'a') do |file| + file.puts "gem 'jdbc-sqlite3', platform: :jruby" if RUBY_ENGINE == 'jruby' file.puts "gem 'datadog', path: '#{Dir.pwd}', require: false" end File.write("#{tmp_dir}/test_app/config/initializers/process_initializer.rb", <<-RUBY) From 9ad5be512f3203a833d60f87ffae089eeef1f60b Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 11 Nov 2025 15:46:14 -0500 Subject: [PATCH 14/78] remove the active record during rails creation because it caused a jruby conflict with sqlite and it is not needed for this test --- spec/datadog/core/environment/process_spec.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index 4e65be52ab8..c160c77bbec 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -48,13 +48,12 @@ Dir.chdir(tmp_dir) do Bundler.with_unbundled_env do skip('rails gem could not be installed') unless system('gem install rails') - unless system('rails new test_app --minimal --skip-test --skip-keeps --skip-git --skip-docker') + unless system('rails new test_app --minimal --skip-active-record --skip-test --skip-keeps --skip-git --skip-docker') skip('rails new command failed') end end end File.open("#{tmp_dir}/test_app/Gemfile", 'a') do |file| - file.puts "gem 'jdbc-sqlite3', platform: :jruby" if RUBY_ENGINE == 'jruby' file.puts "gem 'datadog', path: '#{Dir.pwd}', require: false" end File.write("#{tmp_dir}/test_app/config/initializers/process_initializer.rb", <<-RUBY) From a66e6359764965405b77778a4c8c95c0d8e04309 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Thu, 13 Nov 2025 16:14:21 -0500 Subject: [PATCH 15/78] Bring tag normalization to 1:1 parity with the Trace Agent --- lib/datadog/core/normalizer.rb | 33 ++++++----- spec/datadog/core/normalizer_spec.rb | 87 ++++++++++++++-------------- 2 files changed, 62 insertions(+), 58 deletions(-) diff --git a/lib/datadog/core/normalizer.rb b/lib/datadog/core/normalizer.rb index 702813a28c3..6bbc1b1fc8a 100644 --- a/lib/datadog/core/normalizer.rb +++ b/lib/datadog/core/normalizer.rb @@ -5,27 +5,30 @@ module Core module Normalizer module_function - INVALID_TAG_CHARACTERS = %r{[^a-z0-9_\-:./]}.freeze + INVALID_TAG_CHARACTERS = %r{[^\p{L}0-9_\-:./]}.freeze + LEADING_INVALID_CHARS = %r{\A[^\p{L}:]+}.freeze + TRAILING_UNDERSCORES = %r{_+\z}.freeze + MAX_CHARACTER_LENGTH = (0...200).freeze - # Based on https://docs.datadoghq.com/getting_started/tagging/#defining-tags - # Currently a reimplementation of the logic in the - # Datadog::Tracing::Metadata::Ext::HTTP::Headers.to_tag method with some additional items - # TODO: Swap out the logic in the Datadog Tracing Metadata headers logic + # Based on https://github.com/DataDog/datadog-agent/blob/45799c842bbd216bcda208737f9f11cade6fdd95/pkg/trace/traceutil/normalize.go#L131 + # Specifically: + # - Must be valid UTF-8 + # - Invalid characters are replaced with an underscore + # - Leading non-letter characters are removed but colons are kept + # - Trailing non-letter characters are removed + # - Trailing underscores are removed + # - Consecutive underscores are merged into a single underscore + # - Maximum length is 200 characters def self.normalize(original_value) - return "" if original_value.nil? || original_value.to_s.strip.empty? + normalized_value = original_value.to_s.encode('UTF-8', invalid: :replace, undef: :replace).strip + return "" if normalized_value.empty? - # Removes whitespaces - normalized_value = original_value.to_s.strip - # Lower case characters normalized_value.downcase! - # Invalid characters are replaced with an underscore normalized_value.gsub!(INVALID_TAG_CHARACTERS, '_') - # Merge consecutive underscores with a single underscore + normalized_value.sub!(LEADING_INVALID_CHARS, "") + normalized_value.sub!(TRAILING_UNDERSCORES, "") normalized_value.squeeze!('_') - # Remove leading non-letter characters - normalized_value.sub!(/\A[^a-z]+/, "") - # Maximum length is 200 characters - normalized_value = normalized_value[0...200] if normalized_value.length > 200 + normalized_value = normalized_value[MAX_CHARACTER_LENGTH] normalized_value end diff --git a/spec/datadog/core/normalizer_spec.rb b/spec/datadog/core/normalizer_spec.rb index 1ab3f784750..dc2fb3563c3 100644 --- a/spec/datadog/core/normalizer_spec.rb +++ b/spec/datadog/core/normalizer_spec.rb @@ -2,49 +2,50 @@ require 'datadog/core/normalizer' RSpec.describe Datadog::Core::Normalizer do - describe '.normalize' do - subject(:normalize) { described_class.normalize(input) } - - context 'keeps normal strings the same' do - let(:input) { 'regulartag' } - let(:expected_output) { 'regulartag' } - it { is_expected.to eq(expected_output) } - end - - context 'truncates long strings' do - let(:input) { 'a' * 201 } - let(:expected_output) { 'a' * 200 } - it { is_expected.to eq(expected_output) } - end - - context 'transforms special characters to underscores' do - let(:input) { 'a&**!' } - let(:expected_output) { 'a_' } - it { is_expected.to eq(expected_output) } - end - - context 'capital letters are lower cased' do - let(:input) { 'A' * 10 } - let(:expected_output) { 'a' * 10 } - it { is_expected.to eq(expected_output) } - end - - context 'removes whitespaces' do - let(:input) { ' hi ' } - let(:expected_output) { 'hi' } - it { is_expected.to eq(expected_output) } - end - - context 'characters must start with a letter' do - let(:input) { '1hi' } - let(:expected_output) { 'hi' } - it { is_expected.to eq(expected_output) } - end - - context 'if none of the characters are valid to start the value, the string is empty' do - let(:input) { '111111111' } - let(:expected_output) { '' } - it { is_expected.to eq(expected_output) } + describe 'Follows the normalization logic from the Trace Agent' do + # Test cases from the Trace Agent for consistency + # https://github.com/DataDog/datadog-agent/blob/45799c842bbd216bcda208737f9f11cade6fdd95/pkg/trace/traceutil/normalize_test.go#L17 + test_cases = [ + { in: '#test_starting_hash', out: 'test_starting_hash' }, + { in: 'TestCAPSandSuch', out: 'testcapsandsuch' }, + { in: 'Test Conversion Of Weird !@#$%^&**() Characters', out: 'test_conversion_of_weird_characters' }, + { in: '$#weird_starting', out: 'weird_starting' }, + { in: 'allowed:c0l0ns', out: 'allowed:c0l0ns' }, + { in: '1love', out: 'love' }, + { in: 'ünicöde', out: 'ünicöde' }, + { in: 'ünicöde:metäl', out: 'ünicöde:metäl' }, + { in: 'Data🐨dog🐶 繋がっ⛰てて', out: 'data_dog_繋がっ_てて' }, + { in: ' spaces ', out: 'spaces' }, + { in: ' #hashtag!@#spaces #__<># ', out: 'hashtag_spaces' }, + { in: ':testing', out: ':testing' }, + { in: '_foo', out: 'foo' }, + { in: ':::test', out: ':::test' }, + { in: 'contiguous_____underscores', out: 'contiguous_underscores' }, + { in: 'foo_', out: 'foo' }, + { in: '', out: '' }, + { in: ' ', out: '' }, + { in: 'ok', out: 'ok' }, + { in: 'AlsO:ök', out: 'also:ök' }, + { in: ':still_ok', out: ':still_ok' }, + { in: '___trim', out: 'trim' }, + { in: '12.:trim@', out: ':trim' }, + { in: '12.:trim@@', out: ':trim' }, + { in: 'fun:ky__tag/1', out: 'fun:ky_tag/1' }, + { in: 'fun:ky@tag/2', out: 'fun:ky_tag/2' }, + { in: 'fun:ky@@@tag/3', out: 'fun:ky_tag/3' }, + { in: 'tag:1/2.3', out: 'tag:1/2.3' }, + { in: '---fun:k####y_ta@#g/1_@@#', out: 'fun:k_y_ta_g/1' }, + { in: 'AlsO:œ#@ö))œk', out: 'also:œ_ö_œk' }, + { in: "test\x99\x8faaa", out: 'test_aaa' }, + { in: "test\x99\x8f", out: 'test' }, + { in: 'a' * 888, out: 'a' * 200 }, + { in: ' regulartag ', out: 'regulartag' }, + ] + + test_cases.each do |test_case| + it "normalizes #{test_case[:in].inspect} to #{test_case[:out].inspect} like the Trace Agent" do + expect(described_class.normalize(test_case[:in])).to eq(test_case[:out]) + end end end end From ec1e930447b7be4ee9eeb7bb9f6608a11ede3bc0 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Thu, 13 Nov 2025 16:27:42 -0500 Subject: [PATCH 16/78] Add changes from code review around comments and add test for the new environment variable. --- lib/datadog/core/configuration/settings.rb | 2 +- lib/datadog/core/environment/ext.rb | 2 +- lib/datadog/core/environment/process.rb | 16 +++++++- .../core/configuration/settings_spec.rb | 39 +++++++++++++++++++ 4 files changed, 55 insertions(+), 4 deletions(-) diff --git a/lib/datadog/core/configuration/settings.rb b/lib/datadog/core/configuration/settings.rb index d443ab93088..9a6a6faeebc 100644 --- a/lib/datadog/core/configuration/settings.rb +++ b/lib/datadog/core/configuration/settings.rb @@ -1003,7 +1003,7 @@ def initialize(*_) end end - # Enable experimental process tags propagation. + # Enable experimental process tags propagation such that payloads like spans contain the process tag. # # @default `DD_EXPERIMENTAL_PROPAGATE_PROCESS_TAGS_ENABLED` environment variable, otherwise `false` # @return [Boolean] diff --git a/lib/datadog/core/environment/ext.rb b/lib/datadog/core/environment/ext.rb index 141aead240b..572fc81c3ad 100644 --- a/lib/datadog/core/environment/ext.rb +++ b/lib/datadog/core/environment/ext.rb @@ -33,7 +33,7 @@ module Ext LANG_INTERPRETER = "#{RUBY_ENGINE}-#{RUBY_PLATFORM}" LANG_PLATFORM = RUBY_PLATFORM LANG_VERSION = RUBY_VERSION - PROCESS_TYPE = 'script' + PROCESS_TYPE = 'script' # Out of the options [jar, script, class, executable], we consider Ruby to always be a script RUBY_ENGINE = ::RUBY_ENGINE # e.g. 'ruby', 'jruby', 'truffleruby' TAG_ENV = 'env' TAG_ENTRYPOINT_BASEDIR = "entrypoint.basedir" diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index 5326b725bc7..606fde12bd7 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -6,29 +6,40 @@ module Datadog module Core module Environment - # Retrieves process level information + # Retrieves process level information such that it can be attached to various payloads module Process module_function + # Returns the last segment of the working directory of the process + # @return [String] the last segment of the working directory def entrypoint_workdir File.basename(Dir.pwd) end + # Returns the entrypoint type of the process + # @return [String] the type of the process, which is fixed in Ruby def entrypoint_type Core::Environment::Ext::PROCESS_TYPE end + # Returns the last segment of the base directory of the process + # @return [String] the last segment of base directory of the script def entrypoint_name File.basename($0) end + # Returns the last segment of the base directory of the process + # @return [String] the last segment of the base directory of the script def entrypoint_basedir current_basedir = File.expand_path(File.dirname($0)) normalized_basedir = current_basedir.tr(File::SEPARATOR, '/') normalized_basedir.delete_prefix!('/') end - # Normalize tag key and value using the Datadog Agent's tag normalization logic + # Normalize tag key and value using the Trace Agent's tag normalization logic + # @param key [String] the original key + # @param value [String] the original value + # @return [String] normalized key:value pair def serialized_kv_helper(key, value) key = Core::Normalizer.normalize(key) value = Core::Normalizer.normalize(value) @@ -36,6 +47,7 @@ def serialized_kv_helper(key, value) end # This method returns a key/value part of serialized tags in the format of k1:v1,k2:v2,k3:v3 + # @return [String] comma-separated normalized key:value pairs def serialized return @serialized if defined?(@serialized) tags = [] diff --git a/spec/datadog/core/configuration/settings_spec.rb b/spec/datadog/core/configuration/settings_spec.rb index 0e6ea0ae330..c42d7990ee1 100644 --- a/spec/datadog/core/configuration/settings_spec.rb +++ b/spec/datadog/core/configuration/settings_spec.rb @@ -1337,6 +1337,45 @@ end end + describe '#experimental_propagate_process_tags_enabled' do + subject(:experimental_propagate_process_tags_enabled) { settings.experimental_propagate_process_tags_enabled } + + context "when #{Datadog::Core::Environment::Ext::ENV_VERSION}" do + around do |example| + ClimateControl.modify('DD_EXPERIMENTAL_PROPAGATE_PROCESS_TAGS_ENABLED' => environment) do + example.run + end + end + + context 'by default' do + let(:environment) { nil } + + it { is_expected.to be false } + end + + context 'when set to true' do + let(:environment) { 'true' } + + it { is_expected.to be true } + end + + context 'when set to false' do + let(:environment) { 'false' } + + it { is_expected.to be false } + end + end + end + + describe '#experimental_propagate_process_tags_enabled=' do + it 'updates the #experimental_propagate_process_tags_enabled setting' do + expect { settings.experimental_propagate_process_tags_enabled = true } + .to change { settings.experimental_propagate_process_tags_enabled } + .from(false) + .to(true) + end + end + describe '#time_now_provider=' do subject(:set_time_now_provider) { settings.time_now_provider = time_now_provider } From 22a368073512504de4912eef93af4656253170b8 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 14 Nov 2025 13:44:51 -0500 Subject: [PATCH 17/78] Remove the rails gem install from process_spec --- Matrixfile | 3 +++ Rakefile | 8 +++++++- spec/datadog/core/environment/process_spec.rb | 1 - 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Matrixfile b/Matrixfile index 90c803f3a92..82ee07ac127 100644 --- a/Matrixfile +++ b/Matrixfile @@ -14,6 +14,9 @@ 'core_with_libdatadog_api' => { '' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ 3.5 / ❌ jruby', }, + 'core_with_rails' => { + 'rails8' => '❌ 2.5 / ❌ 2.6 / ❌ 2.7 / ❌ 3.0 / ❌ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ 3.5 / ❌ jruby', + }, 'error_tracking' => { '' => '❌ 2.5 / ❌ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ 3.5 / ❌ jruby', }, diff --git a/Rakefile b/Rakefile index b27e339f8e8..123181ab717 100644 --- a/Rakefile +++ b/Rakefile @@ -85,7 +85,7 @@ namespace :spec do :graphql, :graphql_unified_trace_patcher, :graphql_trace_patcher, :graphql_tracing_patcher, :rails, :railsredis, :railsredis_activesupport, :railsactivejob, :elasticsearch, :http, :redis, :sidekiq, :sinatra, :hanami, :hanami_autoinstrument, - :profiling, :core_with_libdatadog_api, :error_tracking, :open_feature] + :profiling, :core_with_libdatadog_api, :error_tracking, :open_feature, :core_with_rails] desc '' # "Explicitly hiding from `rake -T`" RSpec::Core::RakeTask.new(:main) do |t, args| @@ -232,6 +232,12 @@ namespace :spec do end # rubocop:enable Style/MultilineBlockChain + desc '' # "Explicitly hiding from `rake -T`" + RSpec::Core::RakeTask.new(:core_with_rails) do |t, args| + t.pattern = 'spec/datadog/core/environment/process_spec.rb' + t.rspec_opts = args.to_a.join(' ') + end + desc '' # "Explicitly hiding from `rake -T`" RSpec::Core::RakeTask.new(:error_tracking) do |t, args| t.pattern = 'spec/datadog/error_tracking/**/*_spec.rb' diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index c160c77bbec..285240429e4 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -47,7 +47,6 @@ Dir.mktmpdir do |tmp_dir| Dir.chdir(tmp_dir) do Bundler.with_unbundled_env do - skip('rails gem could not be installed') unless system('gem install rails') unless system('rails new test_app --minimal --skip-active-record --skip-test --skip-keeps --skip-git --skip-docker') skip('rails new command failed') end From 578483357d34eb2187ccd3e42067f40a500b1bd0 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 14 Nov 2025 13:52:39 -0500 Subject: [PATCH 18/78] Remove 1 sec delay. --- spec/datadog/core/environment/process_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index 285240429e4..d040a19a1af 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -64,7 +64,7 @@ STDERR.puts "entrypoint_basedir:\#{Datadog::Core::Environment::Process.entrypoint_basedir}" STDERR.puts "_dd.tags.process:\#{Datadog::Core::Environment::Process.serialized}" STDERR.flush - Thread.new { sleep 1; Process.kill('TERM', Process.pid)}#{" "} + Thread.new { Process.kill('TERM', Process.pid) } end RUBY Bundler.with_unbundled_env do From 2b705e34acd3bea73f2ee0bf71f72535065c3c72 Mon Sep 17 00:00:00 2001 From: wantsui Date: Fri, 14 Nov 2025 15:52:25 -0500 Subject: [PATCH 19/78] Update sig/datadog/core/environment/ext.rbs Co-authored-by: Sergey Fedorov --- sig/datadog/core/environment/ext.rbs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sig/datadog/core/environment/ext.rbs b/sig/datadog/core/environment/ext.rbs index 4527eca6d21..53032679975 100644 --- a/sig/datadog/core/environment/ext.rbs +++ b/sig/datadog/core/environment/ext.rbs @@ -38,17 +38,17 @@ module Datadog TAG_VERSION: String - PROCESS_TYPE: String + PROCESS_TYPE: ::String - TAG_ENTRYPOINT_BASEDIR: String + TAG_ENTRYPOINT_BASEDIR: ::String - TAG_ENTRYPOINT_NAME: String + TAG_ENTRYPOINT_NAME: ::String - TAG_ENTRYPOINT_WORKDIR: String + TAG_ENTRYPOINT_WORKDIR: ::String - TAG_ENTRYPOINT_TYPE: String + TAG_ENTRYPOINT_TYPE: ::String - TAG_PROCESS_TAGS: String + TAG_PROCESS_TAGS: ::String end end end From e3deb4c46eeaf691dd1197096e883f49165cdaa1 Mon Sep 17 00:00:00 2001 From: wantsui Date: Fri, 14 Nov 2025 15:52:50 -0500 Subject: [PATCH 20/78] Update lib/datadog/tracing/transport/trace_formatter.rb Co-authored-by: Sergey Fedorov --- lib/datadog/tracing/transport/trace_formatter.rb | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/datadog/tracing/transport/trace_formatter.rb b/lib/datadog/tracing/transport/trace_formatter.rb index eb32de4de75..151fcc5ba5f 100644 --- a/lib/datadog/tracing/transport/trace_formatter.rb +++ b/lib/datadog/tracing/transport/trace_formatter.rb @@ -219,8 +219,11 @@ def tag_git_commit_sha! def tag_process_tags! return unless Datadog.configuration.experimental_propagate_process_tags_enabled - process_tags = Core::Environment::Process.serialized - first_span.set_tag(Core::Environment::Ext::TAG_PROCESS_TAGS, process_tags) + + first_span.set_tag( + Core::Environment::Ext::TAG_PROCESS_TAGS, + Core::Environment::Process.serialized + ) end private From 47472592b66fd521629b3edd06e32ee649af9ca7 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 14 Nov 2025 15:54:00 -0500 Subject: [PATCH 21/78] Add improvements for long strings. --- lib/datadog/core/normalizer.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/datadog/core/normalizer.rb b/lib/datadog/core/normalizer.rb index 6bbc1b1fc8a..d78cb28ec6a 100644 --- a/lib/datadog/core/normalizer.rb +++ b/lib/datadog/core/normalizer.rb @@ -5,10 +5,10 @@ module Core module Normalizer module_function - INVALID_TAG_CHARACTERS = %r{[^\p{L}0-9_\-:./]}.freeze - LEADING_INVALID_CHARS = %r{\A[^\p{L}:]+}.freeze - TRAILING_UNDERSCORES = %r{_+\z}.freeze - MAX_CHARACTER_LENGTH = (0...200).freeze + INVALID_TAG_CHARACTERS = %r{[^\p{L}0-9_\-:./]} + LEADING_INVALID_CHARS = %r{\A[^\p{L}:]+} + TRAILING_UNDERSCORES = %r{_+\z} + MAX_CHARACTER_LENGTH = 200 # Based on https://github.com/DataDog/datadog-agent/blob/45799c842bbd216bcda208737f9f11cade6fdd95/pkg/trace/traceutil/normalize.go#L131 # Specifically: @@ -27,8 +27,8 @@ def self.normalize(original_value) normalized_value.gsub!(INVALID_TAG_CHARACTERS, '_') normalized_value.sub!(LEADING_INVALID_CHARS, "") normalized_value.sub!(TRAILING_UNDERSCORES, "") - normalized_value.squeeze!('_') - normalized_value = normalized_value[MAX_CHARACTER_LENGTH] + normalized_value.squeeze!('_') if normalized_value.include?('__') + normalized_value.slice!(MAX_CHARACTER_LENGTH..-1) if normalized_value.length > MAX_CHARACTER_LENGTH normalized_value end From 41bc6c0e966edee743063bc3de99c8bd9c6ddcc9 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 14 Nov 2025 16:09:44 -0500 Subject: [PATCH 22/78] small improvement to the whitespace removal. --- lib/datadog/core/normalizer.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/datadog/core/normalizer.rb b/lib/datadog/core/normalizer.rb index d78cb28ec6a..e6cd7e6d135 100644 --- a/lib/datadog/core/normalizer.rb +++ b/lib/datadog/core/normalizer.rb @@ -20,7 +20,8 @@ module Normalizer # - Consecutive underscores are merged into a single underscore # - Maximum length is 200 characters def self.normalize(original_value) - normalized_value = original_value.to_s.encode('UTF-8', invalid: :replace, undef: :replace).strip + normalized_value = original_value.to_s.encode('UTF-8', invalid: :replace, undef: :replace) + normalized_value.strip! return "" if normalized_value.empty? normalized_value.downcase! From c3605c081b3c6aee17ae92552c26a4aec53d3996 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 14 Nov 2025 16:16:59 -0500 Subject: [PATCH 23/78] Add upper bound to regex to avoid the polynomial regex on uncontrolled data error. --- lib/datadog/core/normalizer.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/datadog/core/normalizer.rb b/lib/datadog/core/normalizer.rb index e6cd7e6d135..4433477736d 100644 --- a/lib/datadog/core/normalizer.rb +++ b/lib/datadog/core/normalizer.rb @@ -6,8 +6,8 @@ module Normalizer module_function INVALID_TAG_CHARACTERS = %r{[^\p{L}0-9_\-:./]} - LEADING_INVALID_CHARS = %r{\A[^\p{L}:]+} - TRAILING_UNDERSCORES = %r{_+\z} + LEADING_INVALID_CHARS = %r{\A[^\p{L}:]{1,200}} + TRAILING_UNDERSCORES = %r{_{1,200}\z} MAX_CHARACTER_LENGTH = 200 # Based on https://github.com/DataDog/datadog-agent/blob/45799c842bbd216bcda208737f9f11cade6fdd95/pkg/trace/traceutil/normalize.go#L131 From adfa4166e2de424b209884e908cdb227d338b747 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 14 Nov 2025 16:26:30 -0500 Subject: [PATCH 24/78] Change untyped to string. --- sig/datadog/core/environment/process.rbs | 14 +++++++------- sig/datadog/core/normalizer.rbs | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sig/datadog/core/environment/process.rbs b/sig/datadog/core/environment/process.rbs index 68482bcbf95..cfc428ef0d3 100644 --- a/sig/datadog/core/environment/process.rbs +++ b/sig/datadog/core/environment/process.rbs @@ -2,17 +2,17 @@ module Datadog module Core module Environment module Process - @serialized: untyped + @serialized: ::String? - def self?.entrypoint_workdir: () -> untyped + def self?.entrypoint_workdir: () -> ::String - def self?.entrypoint_type: () -> untyped + def self?.entrypoint_type: () -> ::String - def self?.entrypoint_name: () -> untyped + def self?.entrypoint_name: () -> ::String - def self?.entrypoint_basedir: () -> untyped - def self?.serialized_kv_helper: (untyped key, untyped value) -> ::String - def self?.serialized: () -> untyped + def self?.entrypoint_basedir: () -> ::String + def self?.serialized_kv_helper: (::String key, ::String value) -> ::String + def self?.serialized: () -> ::String end end end diff --git a/sig/datadog/core/normalizer.rbs b/sig/datadog/core/normalizer.rbs index 4e5d73e5034..fc8daea90a1 100644 --- a/sig/datadog/core/normalizer.rbs +++ b/sig/datadog/core/normalizer.rbs @@ -2,7 +2,7 @@ module Datadog module Core module Normalizer INVALID_TAG_CHARACTERS: ::Regexp - def self.normalize: (untyped original_value) -> ("" | untyped) + def self.normalize: (untyped original_value) -> ::String end end end From 0dff54532d39560f0901e3e474db15e58f3bfeda Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 14 Nov 2025 16:35:54 -0500 Subject: [PATCH 25/78] Use possessive quantifiers in regex instead of limiting the upper bound to 200 characters --- lib/datadog/core/normalizer.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/datadog/core/normalizer.rb b/lib/datadog/core/normalizer.rb index 4433477736d..1a1fdea4017 100644 --- a/lib/datadog/core/normalizer.rb +++ b/lib/datadog/core/normalizer.rb @@ -6,8 +6,8 @@ module Normalizer module_function INVALID_TAG_CHARACTERS = %r{[^\p{L}0-9_\-:./]} - LEADING_INVALID_CHARS = %r{\A[^\p{L}:]{1,200}} - TRAILING_UNDERSCORES = %r{_{1,200}\z} + LEADING_INVALID_CHARS = %r{\A[^\p{L}:]++} + TRAILING_UNDERSCORES = %r{_++\z} MAX_CHARACTER_LENGTH = 200 # Based on https://github.com/DataDog/datadog-agent/blob/45799c842bbd216bcda208737f9f11cade6fdd95/pkg/trace/traceutil/normalize.go#L131 From 7d8da40f95fcb97f0e3b942f96de428b50dc0eab Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 14 Nov 2025 16:54:03 -0500 Subject: [PATCH 26/78] Fix types for steep check command --- lib/datadog/core/environment/process.rb | 2 +- sig/datadog/core/environment/process.rbs | 2 +- sig/datadog/core/normalizer.rbs | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index 606fde12bd7..3a9fb6da906 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -33,7 +33,7 @@ def entrypoint_name def entrypoint_basedir current_basedir = File.expand_path(File.dirname($0)) normalized_basedir = current_basedir.tr(File::SEPARATOR, '/') - normalized_basedir.delete_prefix!('/') + normalized_basedir.delete_prefix('/') end # Normalize tag key and value using the Trace Agent's tag normalization logic diff --git a/sig/datadog/core/environment/process.rbs b/sig/datadog/core/environment/process.rbs index cfc428ef0d3..fcdfeb8b2b2 100644 --- a/sig/datadog/core/environment/process.rbs +++ b/sig/datadog/core/environment/process.rbs @@ -2,7 +2,7 @@ module Datadog module Core module Environment module Process - @serialized: ::String? + @serialized: ::String def self?.entrypoint_workdir: () -> ::String diff --git a/sig/datadog/core/normalizer.rbs b/sig/datadog/core/normalizer.rbs index fc8daea90a1..5f35707a39d 100644 --- a/sig/datadog/core/normalizer.rbs +++ b/sig/datadog/core/normalizer.rbs @@ -2,6 +2,9 @@ module Datadog module Core module Normalizer INVALID_TAG_CHARACTERS: ::Regexp + LEADING_INVALID_CHARS: ::Regexp + TRAILING_UNDERSCORES: ::Regexp + MAX_CHARACTER_LENGTH: ::Integer def self.normalize: (untyped original_value) -> ::String end end From 31d97964fedc4d2f187f25e484ee87d9b3e2a2be Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 14 Nov 2025 17:00:04 -0500 Subject: [PATCH 27/78] Remove unneeded Core prefix --- lib/datadog/core/environment/process.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index 3a9fb6da906..d79998a408d 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -19,7 +19,7 @@ def entrypoint_workdir # Returns the entrypoint type of the process # @return [String] the type of the process, which is fixed in Ruby def entrypoint_type - Core::Environment::Ext::PROCESS_TYPE + Environment::Ext::PROCESS_TYPE end # Returns the last segment of the base directory of the process @@ -41,8 +41,8 @@ def entrypoint_basedir # @param value [String] the original value # @return [String] normalized key:value pair def serialized_kv_helper(key, value) - key = Core::Normalizer.normalize(key) - value = Core::Normalizer.normalize(value) + key = Normalizer.normalize(key) + value = Normalizer.normalize(value) "#{key}:#{value}" end @@ -51,10 +51,10 @@ def serialized_kv_helper(key, value) def serialized return @serialized if defined?(@serialized) tags = [] - tags << serialized_kv_helper(Core::Environment::Ext::TAG_ENTRYPOINT_WORKDIR, entrypoint_workdir) if entrypoint_workdir - tags << serialized_kv_helper(Core::Environment::Ext::TAG_ENTRYPOINT_NAME, entrypoint_name) if entrypoint_name - tags << serialized_kv_helper(Core::Environment::Ext::TAG_ENTRYPOINT_BASEDIR, entrypoint_basedir) if entrypoint_basedir - tags << serialized_kv_helper(Core::Environment::Ext::TAG_ENTRYPOINT_TYPE, entrypoint_type) if entrypoint_type + tags << serialized_kv_helper(Environment::Ext::TAG_ENTRYPOINT_WORKDIR, entrypoint_workdir) if entrypoint_workdir + tags << serialized_kv_helper(Environment::Ext::TAG_ENTRYPOINT_NAME, entrypoint_name) if entrypoint_name + tags << serialized_kv_helper(Environment::Ext::TAG_ENTRYPOINT_BASEDIR, entrypoint_basedir) if entrypoint_basedir + tags << serialized_kv_helper(Environment::Ext::TAG_ENTRYPOINT_TYPE, entrypoint_type) if entrypoint_type @serialized = tags.join(',').freeze end end From 3672a8ae31689f029835d007541d4bbaebca5f35 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 14 Nov 2025 17:07:18 -0500 Subject: [PATCH 28/78] lint fixes --- .../configuration/supported_configurations.rb | 2 +- .../tracing/transport/trace_formatter.rb | 2 +- .../core/configuration/settings_spec.rb | 2 +- spec/datadog/core/normalizer_spec.rb | 68 +++++++++---------- 4 files changed, 37 insertions(+), 37 deletions(-) diff --git a/lib/datadog/core/configuration/supported_configurations.rb b/lib/datadog/core/configuration/supported_configurations.rb index e1a454da175..d571e9694b9 100644 --- a/lib/datadog/core/configuration/supported_configurations.rb +++ b/lib/datadog/core/configuration/supported_configurations.rb @@ -44,7 +44,7 @@ module Configuration "DD_ERROR_TRACKING_HANDLED_ERRORS" => {version: ["A"]}, "DD_ERROR_TRACKING_HANDLED_ERRORS_INCLUDE" => {version: ["A"]}, "DD_EXPERIMENTAL_FLAGGING_PROVIDER_ENABLED" => {version: ["A"]}, - "DD_EXPERIMENTAL_PROPAGATE_PROCESS_TAGS_ENABLED" => {version: ["A"]}, + "DD_EXPERIMENTAL_PROPAGATE_PROCESS_TAGS_ENABLED" => {version: ["A"]}, "DD_GIT_COMMIT_SHA" => {version: ["A"]}, "DD_GIT_REPOSITORY_URL" => {version: ["A"]}, "DD_HEALTH_METRICS_ENABLED" => {version: ["A"]}, diff --git a/lib/datadog/tracing/transport/trace_formatter.rb b/lib/datadog/tracing/transport/trace_formatter.rb index 151fcc5ba5f..bc139d22c4e 100644 --- a/lib/datadog/tracing/transport/trace_formatter.rb +++ b/lib/datadog/tracing/transport/trace_formatter.rb @@ -219,7 +219,7 @@ def tag_git_commit_sha! def tag_process_tags! return unless Datadog.configuration.experimental_propagate_process_tags_enabled - + first_span.set_tag( Core::Environment::Ext::TAG_PROCESS_TAGS, Core::Environment::Process.serialized diff --git a/spec/datadog/core/configuration/settings_spec.rb b/spec/datadog/core/configuration/settings_spec.rb index c42d7990ee1..f7e8d6ee46a 100644 --- a/spec/datadog/core/configuration/settings_spec.rb +++ b/spec/datadog/core/configuration/settings_spec.rb @@ -1337,7 +1337,7 @@ end end - describe '#experimental_propagate_process_tags_enabled' do + describe '#experimental_propagate_process_tags_enabled' do subject(:experimental_propagate_process_tags_enabled) { settings.experimental_propagate_process_tags_enabled } context "when #{Datadog::Core::Environment::Ext::ENV_VERSION}" do diff --git a/spec/datadog/core/normalizer_spec.rb b/spec/datadog/core/normalizer_spec.rb index dc2fb3563c3..53229fa82d2 100644 --- a/spec/datadog/core/normalizer_spec.rb +++ b/spec/datadog/core/normalizer_spec.rb @@ -6,40 +6,40 @@ # Test cases from the Trace Agent for consistency # https://github.com/DataDog/datadog-agent/blob/45799c842bbd216bcda208737f9f11cade6fdd95/pkg/trace/traceutil/normalize_test.go#L17 test_cases = [ - { in: '#test_starting_hash', out: 'test_starting_hash' }, - { in: 'TestCAPSandSuch', out: 'testcapsandsuch' }, - { in: 'Test Conversion Of Weird !@#$%^&**() Characters', out: 'test_conversion_of_weird_characters' }, - { in: '$#weird_starting', out: 'weird_starting' }, - { in: 'allowed:c0l0ns', out: 'allowed:c0l0ns' }, - { in: '1love', out: 'love' }, - { in: 'ünicöde', out: 'ünicöde' }, - { in: 'ünicöde:metäl', out: 'ünicöde:metäl' }, - { in: 'Data🐨dog🐶 繋がっ⛰てて', out: 'data_dog_繋がっ_てて' }, - { in: ' spaces ', out: 'spaces' }, - { in: ' #hashtag!@#spaces #__<># ', out: 'hashtag_spaces' }, - { in: ':testing', out: ':testing' }, - { in: '_foo', out: 'foo' }, - { in: ':::test', out: ':::test' }, - { in: 'contiguous_____underscores', out: 'contiguous_underscores' }, - { in: 'foo_', out: 'foo' }, - { in: '', out: '' }, - { in: ' ', out: '' }, - { in: 'ok', out: 'ok' }, - { in: 'AlsO:ök', out: 'also:ök' }, - { in: ':still_ok', out: ':still_ok' }, - { in: '___trim', out: 'trim' }, - { in: '12.:trim@', out: ':trim' }, - { in: '12.:trim@@', out: ':trim' }, - { in: 'fun:ky__tag/1', out: 'fun:ky_tag/1' }, - { in: 'fun:ky@tag/2', out: 'fun:ky_tag/2' }, - { in: 'fun:ky@@@tag/3', out: 'fun:ky_tag/3' }, - { in: 'tag:1/2.3', out: 'tag:1/2.3' }, - { in: '---fun:k####y_ta@#g/1_@@#', out: 'fun:k_y_ta_g/1' }, - { in: 'AlsO:œ#@ö))œk', out: 'also:œ_ö_œk' }, - { in: "test\x99\x8faaa", out: 'test_aaa' }, - { in: "test\x99\x8f", out: 'test' }, - { in: 'a' * 888, out: 'a' * 200 }, - { in: ' regulartag ', out: 'regulartag' }, + {in: '#test_starting_hash', out: 'test_starting_hash'}, + {in: 'TestCAPSandSuch', out: 'testcapsandsuch'}, + {in: 'Test Conversion Of Weird !@#$%^&**() Characters', out: 'test_conversion_of_weird_characters'}, + {in: '$#weird_starting', out: 'weird_starting'}, + {in: 'allowed:c0l0ns', out: 'allowed:c0l0ns'}, + {in: '1love', out: 'love'}, + {in: 'ünicöde', out: 'ünicöde'}, + {in: 'ünicöde:metäl', out: 'ünicöde:metäl'}, + {in: 'Data🐨dog🐶 繋がっ⛰てて', out: 'data_dog_繋がっ_てて'}, + {in: ' spaces ', out: 'spaces'}, + {in: ' #hashtag!@#spaces #__<># ', out: 'hashtag_spaces'}, + {in: ':testing', out: ':testing'}, + {in: '_foo', out: 'foo'}, + {in: ':::test', out: ':::test'}, + {in: 'contiguous_____underscores', out: 'contiguous_underscores'}, + {in: 'foo_', out: 'foo'}, + {in: '', out: ''}, + {in: ' ', out: ''}, + {in: 'ok', out: 'ok'}, + {in: 'AlsO:ök', out: 'also:ök'}, + {in: ':still_ok', out: ':still_ok'}, + {in: '___trim', out: 'trim'}, + {in: '12.:trim@', out: ':trim'}, + {in: '12.:trim@@', out: ':trim'}, + {in: 'fun:ky__tag/1', out: 'fun:ky_tag/1'}, + {in: 'fun:ky@tag/2', out: 'fun:ky_tag/2'}, + {in: 'fun:ky@@@tag/3', out: 'fun:ky_tag/3'}, + {in: 'tag:1/2.3', out: 'tag:1/2.3'}, + {in: '---fun:k####y_ta@#g/1_@@#', out: 'fun:k_y_ta_g/1'}, + {in: 'AlsO:œ#@ö))œk', out: 'also:œ_ö_œk'}, + {in: "test\x99\x8faaa", out: 'test_aaa'}, + {in: "test\x99\x8f", out: 'test'}, + {in: 'a' * 888, out: 'a' * 200}, + {in: ' regulartag ', out: 'regulartag'}, ] test_cases.each do |test_case| From 23d976939367c356bb9ce82a2fe28ca14809e674 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 14 Nov 2025 17:14:25 -0500 Subject: [PATCH 29/78] restructure folder lookup so it works on the macos ci tests --- spec/datadog/core/environment/process_spec.rb | 58 ++++++++++--------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index d040a19a1af..2570a0fe989 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -44,6 +44,8 @@ describe 'Scenario: Real applications' do context 'when running a real Rails application' do it 'detects Rails process information correctly' do + project_root_directory = Dir.pwd + Dir.mktmpdir do |tmp_dir| Dir.chdir(tmp_dir) do Bundler.with_unbundled_env do @@ -51,33 +53,35 @@ skip('rails new command failed') end end - end - File.open("#{tmp_dir}/test_app/Gemfile", 'a') do |file| - file.puts "gem 'datadog', path: '#{Dir.pwd}', require: false" - end - File.write("#{tmp_dir}/test_app/config/initializers/process_initializer.rb", <<-RUBY) - Rails.application.config.after_initialize do - require 'datadog/core/environment/process' - STDERR.puts "entrypoint_workdir:\#{Datadog::Core::Environment::Process.entrypoint_workdir}" - STDERR.puts "entrypoint_type:\#{Datadog::Core::Environment::Process.entrypoint_type}" - STDERR.puts "entrypoint_name:\#{Datadog::Core::Environment::Process.entrypoint_name}" - STDERR.puts "entrypoint_basedir:\#{Datadog::Core::Environment::Process.entrypoint_basedir}" - STDERR.puts "_dd.tags.process:\#{Datadog::Core::Environment::Process.serialized}" - STDERR.flush - Thread.new { Process.kill('TERM', Process.pid) } - end - RUBY - Bundler.with_unbundled_env do - Dir.chdir("#{tmp_dir}/test_app") do - _, _, _ = Open3.capture3('bundle install') - _, err, _ = Open3.capture3('bundle exec rails s') - expect(err).to include('entrypoint_workdir:test_app') - expect(err).to include('entrypoint_type:script') - expect(err).to include('entrypoint_name:rails') - basedir_test = tmp_dir.sub(%r{^/}, '') - expect(err).to include("entrypoint_basedir:#{basedir_test}/test_app/bin") - expected_tags = "entrypoint.workdir:test_app,entrypoint.name:rails,entrypoint.basedir:#{basedir_test}/test_app/bin,entrypoint.type:script" - expect(err).to include("_dd.tags.process:#{expected_tags}") + + File.open("test_app/Gemfile", 'a') do |file| + file.puts "gem 'datadog', path: '#{project_root_directory}', require: false" + end + File.write("test_app/config/initializers/process_initializer.rb", <<-RUBY) + Rails.application.config.after_initialize do + require 'datadog/core/environment/process' + STDERR.puts "entrypoint_workdir:\#{Datadog::Core::Environment::Process.entrypoint_workdir}" + STDERR.puts "entrypoint_type:\#{Datadog::Core::Environment::Process.entrypoint_type}" + STDERR.puts "entrypoint_name:\#{Datadog::Core::Environment::Process.entrypoint_name}" + STDERR.puts "entrypoint_basedir:\#{Datadog::Core::Environment::Process.entrypoint_basedir}" + STDERR.puts "_dd.tags.process:\#{Datadog::Core::Environment::Process.serialized}" + STDERR.flush + Thread.new { Process.kill('TERM', Process.pid) } + end + RUBY + + Bundler.with_unbundled_env do + Dir.chdir("test_app") do + _, _, _ = Open3.capture3('bundle install') + _, err, _ = Open3.capture3('bundle exec rails s') + expect(err).to include('entrypoint_workdir:test_app') + expect(err).to include('entrypoint_type:script') + expect(err).to include('entrypoint_name:rails') + basedir_test = tmp_dir.sub(%r{^/}, '') + expect(err).to include("entrypoint_basedir:#{basedir_test}/test_app/bin") + expected_tags = "entrypoint.workdir:test_app,entrypoint.name:rails,entrypoint.basedir:#{basedir_test}/test_app/bin,entrypoint.type:script" + expect(err).to include("_dd.tags.process:#{expected_tags}") + end end end end From 761590663a11565ee3faeba634a6bbb3b3301010 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Mon, 17 Nov 2025 16:44:06 -0500 Subject: [PATCH 30/78] fixes for local mac development. --- spec/datadog/core/environment/process_spec.rb | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index 2570a0fe989..9c4357cf6e6 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -49,8 +49,9 @@ Dir.mktmpdir do |tmp_dir| Dir.chdir(tmp_dir) do Bundler.with_unbundled_env do - unless system('rails new test_app --minimal --skip-active-record --skip-test --skip-keeps --skip-git --skip-docker') - skip('rails new command failed') + _, stderr, status = Open3.capture3('rails new test_app --minimal --skip-active-record --skip-test --skip-keeps --skip-git --skip-docker') + unless status.success? && File.exist?("test_app/Gemfile") + skip("rails new failed: #{stderr}") end end @@ -77,10 +78,8 @@ expect(err).to include('entrypoint_workdir:test_app') expect(err).to include('entrypoint_type:script') expect(err).to include('entrypoint_name:rails') - basedir_test = tmp_dir.sub(%r{^/}, '') - expect(err).to include("entrypoint_basedir:#{basedir_test}/test_app/bin") - expected_tags = "entrypoint.workdir:test_app,entrypoint.name:rails,entrypoint.basedir:#{basedir_test}/test_app/bin,entrypoint.type:script" - expect(err).to include("_dd.tags.process:#{expected_tags}") + # Regex accounts for symlink paths on MacOS + expect(err).to match(/entrypoint_basedir:.*\/test_app\/bin/) end end end From d4c6a914a6fbf62cdae1753c4efa73ebde535387 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Mon, 17 Nov 2025 18:04:10 -0500 Subject: [PATCH 31/78] Add missing trace agent test cases. --- spec/datadog/core/normalizer_spec.rb | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/spec/datadog/core/normalizer_spec.rb b/spec/datadog/core/normalizer_spec.rb index 53229fa82d2..d25a8cef030 100644 --- a/spec/datadog/core/normalizer_spec.rb +++ b/spec/datadog/core/normalizer_spec.rb @@ -40,6 +40,14 @@ {in: "test\x99\x8f", out: 'test'}, {in: 'a' * 888, out: 'a' * 200}, {in: ' regulartag ', out: 'regulartag'}, + {in: "\u017Fodd_\u017Fcase\u017F", out: "\u017Fodd_\u017Fcase\u017F"}, + {in: '™Ö™Ö™™Ö™', out: 'ö_ö_ö'}, + {in: "a#{"\ufffd"}", out: 'a'}, + {in: "a#{"\ufffd"}#{"\ufffd"}", out: 'a'}, + {in: "a#{"\ufffd"}#{"\ufffd"}b", out: 'a_b'}, + # Tests are currently failing on the last two + # {in: 'a' + ('🐶' * 799) + 'b', out: 'a'}, + # {in: 'A' + ('0' * 200) + ' ' + ('0' * 11), out: 'a' + ('0' * 200) + '_0'}, ] test_cases.each do |test_case| From 433b250d0f0e36765dfea9a2bd519dce960cfb98 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 18 Nov 2025 10:59:17 -0500 Subject: [PATCH 32/78] Fix lint --- spec/datadog/core/normalizer_spec.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/datadog/core/normalizer_spec.rb b/spec/datadog/core/normalizer_spec.rb index d25a8cef030..a7b8c3f334d 100644 --- a/spec/datadog/core/normalizer_spec.rb +++ b/spec/datadog/core/normalizer_spec.rb @@ -42,9 +42,9 @@ {in: ' regulartag ', out: 'regulartag'}, {in: "\u017Fodd_\u017Fcase\u017F", out: "\u017Fodd_\u017Fcase\u017F"}, {in: '™Ö™Ö™™Ö™', out: 'ö_ö_ö'}, - {in: "a#{"\ufffd"}", out: 'a'}, - {in: "a#{"\ufffd"}#{"\ufffd"}", out: 'a'}, - {in: "a#{"\ufffd"}#{"\ufffd"}b", out: 'a_b'}, + {in: "a�", out: 'a'}, + {in: "a��", out: 'a'}, + {in: "a��b", out: 'a_b'}, # Tests are currently failing on the last two # {in: 'a' + ('🐶' * 799) + 'b', out: 'a'}, # {in: 'A' + ('0' * 200) + ' ' + ('0' * 11), out: 'a' + ('0' * 200) + '_0'}, From 47efb900dfbe5d3fac579f176450020a8c89653d Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 18 Nov 2025 13:57:41 -0500 Subject: [PATCH 33/78] Change methods to private. Also add comments with examples --- lib/datadog/core/environment/process.rb | 32 +++++++++++++++--------- sig/datadog/core/environment/process.rbs | 6 ++++- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index d79998a408d..a57a081b802 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -10,25 +10,45 @@ module Environment module Process module_function + # This method returns a key/value part of serialized tags in the format of k1:v1,k2:v2,k3:v3 + # @return [String] comma-separated normalized key:value pairs + def serialized + return @serialized if defined?(@serialized) + tags = [] + tags << serialized_kv_helper(Environment::Ext::TAG_ENTRYPOINT_WORKDIR, entrypoint_workdir) if entrypoint_workdir + tags << serialized_kv_helper(Environment::Ext::TAG_ENTRYPOINT_NAME, entrypoint_name) if entrypoint_name + tags << serialized_kv_helper(Environment::Ext::TAG_ENTRYPOINT_BASEDIR, entrypoint_basedir) if entrypoint_basedir + tags << serialized_kv_helper(Environment::Ext::TAG_ENTRYPOINT_TYPE, entrypoint_type) if entrypoint_type + @serialized = tags.join(',').freeze + end + + private + # Returns the last segment of the working directory of the process + # Example: /app/myapp -> myapp # @return [String] the last segment of the working directory def entrypoint_workdir File.basename(Dir.pwd) end # Returns the entrypoint type of the process + # In Ruby, the entrypoint type is always 'script' # @return [String] the type of the process, which is fixed in Ruby def entrypoint_type Environment::Ext::PROCESS_TYPE end # Returns the last segment of the base directory of the process + # Example 1: /bin/mybin -> mybin + # Example 2: ruby /test/myapp.rb -> myapp # @return [String] the last segment of base directory of the script def entrypoint_name File.basename($0) end # Returns the last segment of the base directory of the process + # Example 1: /bin/mybin -> bin + # Example 2: ruby /test/myapp.js -> test # @return [String] the last segment of the base directory of the script def entrypoint_basedir current_basedir = File.expand_path(File.dirname($0)) @@ -45,18 +65,6 @@ def serialized_kv_helper(key, value) value = Normalizer.normalize(value) "#{key}:#{value}" end - - # This method returns a key/value part of serialized tags in the format of k1:v1,k2:v2,k3:v3 - # @return [String] comma-separated normalized key:value pairs - def serialized - return @serialized if defined?(@serialized) - tags = [] - tags << serialized_kv_helper(Environment::Ext::TAG_ENTRYPOINT_WORKDIR, entrypoint_workdir) if entrypoint_workdir - tags << serialized_kv_helper(Environment::Ext::TAG_ENTRYPOINT_NAME, entrypoint_name) if entrypoint_name - tags << serialized_kv_helper(Environment::Ext::TAG_ENTRYPOINT_BASEDIR, entrypoint_basedir) if entrypoint_basedir - tags << serialized_kv_helper(Environment::Ext::TAG_ENTRYPOINT_TYPE, entrypoint_type) if entrypoint_type - @serialized = tags.join(',').freeze - end end end end diff --git a/sig/datadog/core/environment/process.rbs b/sig/datadog/core/environment/process.rbs index fcdfeb8b2b2..1cfedadbb47 100644 --- a/sig/datadog/core/environment/process.rbs +++ b/sig/datadog/core/environment/process.rbs @@ -4,6 +4,10 @@ module Datadog module Process @serialized: ::String + def self.serialized: () -> ::String + + private + def self?.entrypoint_workdir: () -> ::String def self?.entrypoint_type: () -> ::String @@ -11,8 +15,8 @@ module Datadog def self?.entrypoint_name: () -> ::String def self?.entrypoint_basedir: () -> ::String + def self?.serialized_kv_helper: (::String key, ::String value) -> ::String - def self?.serialized: () -> ::String end end end From a2643a677771e90182985461ad7997561b874528 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 18 Nov 2025 14:34:04 -0500 Subject: [PATCH 34/78] Fix basedir logic and adjust tests (and also fix the private change) --- lib/datadog/core/environment/process.rb | 24 +++--------- sig/datadog/core/environment/process.rbs | 12 ------ spec/datadog/core/environment/process_spec.rb | 38 ++----------------- 3 files changed, 10 insertions(+), 64 deletions(-) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index a57a081b802..dc61460e833 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -8,17 +8,17 @@ module Core module Environment # Retrieves process level information such that it can be attached to various payloads module Process - module_function + extend self # This method returns a key/value part of serialized tags in the format of k1:v1,k2:v2,k3:v3 # @return [String] comma-separated normalized key:value pairs def serialized return @serialized if defined?(@serialized) tags = [] - tags << serialized_kv_helper(Environment::Ext::TAG_ENTRYPOINT_WORKDIR, entrypoint_workdir) if entrypoint_workdir - tags << serialized_kv_helper(Environment::Ext::TAG_ENTRYPOINT_NAME, entrypoint_name) if entrypoint_name - tags << serialized_kv_helper(Environment::Ext::TAG_ENTRYPOINT_BASEDIR, entrypoint_basedir) if entrypoint_basedir - tags << serialized_kv_helper(Environment::Ext::TAG_ENTRYPOINT_TYPE, entrypoint_type) if entrypoint_type + tags << "#{Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{Normalizer.normalize(entrypoint_workdir)}" if entrypoint_workdir + tags << "#{Environment::Ext::TAG_ENTRYPOINT_NAME}:#{Normalizer.normalize(entrypoint_name)}" if entrypoint_name + tags << "#{Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{Normalizer.normalize(entrypoint_basedir)}" if entrypoint_basedir + tags << "#{Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{Normalizer.normalize(entrypoint_type)}" if entrypoint_type @serialized = tags.join(',').freeze end @@ -51,19 +51,7 @@ def entrypoint_name # Example 2: ruby /test/myapp.js -> test # @return [String] the last segment of the base directory of the script def entrypoint_basedir - current_basedir = File.expand_path(File.dirname($0)) - normalized_basedir = current_basedir.tr(File::SEPARATOR, '/') - normalized_basedir.delete_prefix('/') - end - - # Normalize tag key and value using the Trace Agent's tag normalization logic - # @param key [String] the original key - # @param value [String] the original value - # @return [String] normalized key:value pair - def serialized_kv_helper(key, value) - key = Normalizer.normalize(key) - value = Normalizer.normalize(value) - "#{key}:#{value}" + File.basename(File.expand_path(File.dirname($0))) end end end diff --git a/sig/datadog/core/environment/process.rbs b/sig/datadog/core/environment/process.rbs index 1cfedadbb47..31daeb8f31c 100644 --- a/sig/datadog/core/environment/process.rbs +++ b/sig/datadog/core/environment/process.rbs @@ -5,18 +5,6 @@ module Datadog @serialized: ::String def self.serialized: () -> ::String - - private - - def self?.entrypoint_workdir: () -> ::String - - def self?.entrypoint_type: () -> ::String - - def self?.entrypoint_name: () -> ::String - - def self?.entrypoint_basedir: () -> ::String - - def self?.serialized_kv_helper: (::String key, ::String value) -> ::String end end end diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index 9c4357cf6e6..c9c2ca80f0c 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -3,31 +3,6 @@ require 'open3' RSpec.describe Datadog::Core::Environment::Process do - describe '::entrypoint_workdir' do - subject(:entrypoint_workdir) { described_class.entrypoint_workdir } - - it { is_expected.to be_a_kind_of(String) } - end - - describe '::entrypoint_type' do - subject(:entrypoint_type) { described_class.entrypoint_type } - - it { is_expected.to be_a_kind_of(String) } - it { is_expected.to eq(Datadog::Core::Environment::Ext::PROCESS_TYPE) } - end - - describe '::entrypoint_name' do - subject(:entrypoint_name) { described_class.entrypoint_name } - - it { is_expected.to be_a_kind_of(String) } - end - - describe '::entrypoint_basedir' do - subject(:entrypoint_basedir) { described_class.entrypoint_basedir } - - it { is_expected.to be_a_kind_of(String) } - end - describe '::serialized' do subject(:serialized) { described_class.serialized } @@ -61,10 +36,6 @@ File.write("test_app/config/initializers/process_initializer.rb", <<-RUBY) Rails.application.config.after_initialize do require 'datadog/core/environment/process' - STDERR.puts "entrypoint_workdir:\#{Datadog::Core::Environment::Process.entrypoint_workdir}" - STDERR.puts "entrypoint_type:\#{Datadog::Core::Environment::Process.entrypoint_type}" - STDERR.puts "entrypoint_name:\#{Datadog::Core::Environment::Process.entrypoint_name}" - STDERR.puts "entrypoint_basedir:\#{Datadog::Core::Environment::Process.entrypoint_basedir}" STDERR.puts "_dd.tags.process:\#{Datadog::Core::Environment::Process.serialized}" STDERR.flush Thread.new { Process.kill('TERM', Process.pid) } @@ -75,11 +46,10 @@ Dir.chdir("test_app") do _, _, _ = Open3.capture3('bundle install') _, err, _ = Open3.capture3('bundle exec rails s') - expect(err).to include('entrypoint_workdir:test_app') - expect(err).to include('entrypoint_type:script') - expect(err).to include('entrypoint_name:rails') - # Regex accounts for symlink paths on MacOS - expect(err).to match(/entrypoint_basedir:.*\/test_app\/bin/) + expect(err).to include('entrypoint.workdir:test_app') + expect(err).to include('entrypoint.type:script') + expect(err).to include('entrypoint.name:rails') + expect(err).to include('entrypoint.basedir:bin') end end end From ccd49712f285deb55df4fea9809851185df6e9f4 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 18 Nov 2025 14:42:16 -0500 Subject: [PATCH 35/78] Fix steepcheck error --- sig/datadog/core/environment/process.rbs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sig/datadog/core/environment/process.rbs b/sig/datadog/core/environment/process.rbs index 31daeb8f31c..1d1bab90b42 100644 --- a/sig/datadog/core/environment/process.rbs +++ b/sig/datadog/core/environment/process.rbs @@ -4,7 +4,17 @@ module Datadog module Process @serialized: ::String - def self.serialized: () -> ::String + def serialized: () -> ::String + + private + + def entrypoint_workdir: () -> ::String + + def entrypoint_type: () -> ::String + + def entrypoint_name: () -> ::String + + def entrypoint_basedir: () -> ::String end end end From be9587d73c300d21979ebba17d420649ce6ecf95 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 18 Nov 2025 17:32:12 -0500 Subject: [PATCH 36/78] Add in byte logic to handle emojis with early backoff and allow starting digits for tag values. --- lib/datadog/core/environment/process.rb | 8 ++--- lib/datadog/core/normalizer.rb | 46 ++++++++++++++++++++----- sig/datadog/core/normalizer.rbs | 8 +++-- spec/datadog/core/normalizer_spec.rb | 21 ++++++++--- 4 files changed, 64 insertions(+), 19 deletions(-) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index dc61460e833..83cf30a31b6 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -15,10 +15,10 @@ module Process def serialized return @serialized if defined?(@serialized) tags = [] - tags << "#{Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{Normalizer.normalize(entrypoint_workdir)}" if entrypoint_workdir - tags << "#{Environment::Ext::TAG_ENTRYPOINT_NAME}:#{Normalizer.normalize(entrypoint_name)}" if entrypoint_name - tags << "#{Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{Normalizer.normalize(entrypoint_basedir)}" if entrypoint_basedir - tags << "#{Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{Normalizer.normalize(entrypoint_type)}" if entrypoint_type + tags << "#{Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{Normalizer.normalize(entrypoint_workdir, remove_digit_start_char: false)}" if entrypoint_workdir + tags << "#{Environment::Ext::TAG_ENTRYPOINT_NAME}:#{Normalizer.normalize(entrypoint_name, remove_digit_start_char: false)}" if entrypoint_name + tags << "#{Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{Normalizer.normalize(entrypoint_basedir, remove_digit_start_char: false)}" if entrypoint_basedir + tags << "#{Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{Normalizer.normalize(entrypoint_type, remove_digit_start_char: false)}" if entrypoint_type @serialized = tags.join(',').freeze end diff --git a/lib/datadog/core/normalizer.rb b/lib/datadog/core/normalizer.rb index 1a1fdea4017..3b13094b1dd 100644 --- a/lib/datadog/core/normalizer.rb +++ b/lib/datadog/core/normalizer.rb @@ -6,9 +6,12 @@ module Normalizer module_function INVALID_TAG_CHARACTERS = %r{[^\p{L}0-9_\-:./]} - LEADING_INVALID_CHARS = %r{\A[^\p{L}:]++} - TRAILING_UNDERSCORES = %r{_++\z} + LEADING_INVALID_CHARS_NO_DIGITS = %r{\A[^\p{L}:]++} + LEADING_INVALID_CHARS_WITH_DIGITS = %r{\A[^\p{L}0-9:./\-]++} MAX_CHARACTER_LENGTH = 200 + MAX_BYTE_LENGTH = MAX_CHARACTER_LENGTH * 2 + TRAILING_UNDERSCORES = %r{_++\z} + VALID_ASCII_TAG = %r{\A[a-z:][a-z0-9:./-]*\z} # Based on https://github.com/DataDog/datadog-agent/blob/45799c842bbd216bcda208737f9f11cade6fdd95/pkg/trace/traceutil/normalize.go#L131 # Specifically: @@ -19,17 +22,42 @@ module Normalizer # - Trailing underscores are removed # - Consecutive underscores are merged into a single underscore # - Maximum length is 200 characters - def self.normalize(original_value) - normalized_value = original_value.to_s.encode('UTF-8', invalid: :replace, undef: :replace) - normalized_value.strip! - return "" if normalized_value.empty? + # If it's a tag value, allow it to start with a digit + def self.normalize(original_value, remove_digit_start_char: false) + transformed_value = original_value.to_s.encode('UTF-8', invalid: :replace, undef: :replace) + transformed_value.strip! + return "" if transformed_value.empty? + + return transformed_value if transformed_value.size <= MAX_CHARACTER_LENGTH && + transformed_value.match?(VALID_ASCII_TAG) + + if transformed_value.ascii_only? && transformed_value.length <= MAX_CHARACTER_LENGTH + normalized_value = transformed_value + else + byte_position = 0 + character_count = 0 + normalized_value = String.new(encoding: 'UTF-8') + + transformed_value.each_char do |char| + byte_width = char.bytesize + break if byte_position + byte_width > MAX_BYTE_LENGTH + break if character_count >= MAX_CHARACTER_LENGTH + + normalized_value << char + byte_position += byte_width + character_count += 1 + end + end normalized_value.downcase! normalized_value.gsub!(INVALID_TAG_CHARACTERS, '_') - normalized_value.sub!(LEADING_INVALID_CHARS, "") - normalized_value.sub!(TRAILING_UNDERSCORES, "") + + # The Trace Agent allows tag values to start with a number so this logic is here too + leading_invalid_regex = remove_digit_start_char ? LEADING_INVALID_CHARS_NO_DIGITS : LEADING_INVALID_CHARS_WITH_DIGITS + normalized_value.sub!(leading_invalid_regex, "") + normalized_value.squeeze!('_') if normalized_value.include?('__') - normalized_value.slice!(MAX_CHARACTER_LENGTH..-1) if normalized_value.length > MAX_CHARACTER_LENGTH + normalized_value.sub!(TRAILING_UNDERSCORES, "") normalized_value end diff --git a/sig/datadog/core/normalizer.rbs b/sig/datadog/core/normalizer.rbs index 5f35707a39d..0a3397a3f36 100644 --- a/sig/datadog/core/normalizer.rbs +++ b/sig/datadog/core/normalizer.rbs @@ -2,10 +2,14 @@ module Datadog module Core module Normalizer INVALID_TAG_CHARACTERS: ::Regexp - LEADING_INVALID_CHARS: ::Regexp + LEADING_INVALID_CHARS_NO_DIGITS: ::Regexp + LEADING_INVALID_CHARS_WITH_DIGITS: ::Regexp TRAILING_UNDERSCORES: ::Regexp MAX_CHARACTER_LENGTH: ::Integer - def self.normalize: (untyped original_value) -> ::String + MAX_BYTE_LENGTH: ::Integer + VALID_ASCII_TAG: ::Regexp + + def self.normalize: (untyped original_value, ?remove_digit_start_char: bool) -> ::String end end end diff --git a/spec/datadog/core/normalizer_spec.rb b/spec/datadog/core/normalizer_spec.rb index a7b8c3f334d..6900dd15d22 100644 --- a/spec/datadog/core/normalizer_spec.rb +++ b/spec/datadog/core/normalizer_spec.rb @@ -2,7 +2,7 @@ require 'datadog/core/normalizer' RSpec.describe Datadog::Core::Normalizer do - describe 'Follows the normalization logic from the Trace Agent' do + describe 'Follows the normalization logic from the Trace Agent for tag keys' do # Test cases from the Trace Agent for consistency # https://github.com/DataDog/datadog-agent/blob/45799c842bbd216bcda208737f9f11cade6fdd95/pkg/trace/traceutil/normalize_test.go#L17 test_cases = [ @@ -45,14 +45,27 @@ {in: "a�", out: 'a'}, {in: "a��", out: 'a'}, {in: "a��b", out: 'a_b'}, - # Tests are currently failing on the last two - # {in: 'a' + ('🐶' * 799) + 'b', out: 'a'}, + {in: 'a' + ('🐶' * 799) + 'b', out: 'a'}, + # This test case doesn't work with the current logic because it yields 202 characters # {in: 'A' + ('0' * 200) + ' ' + ('0' * 11), out: 'a' + ('0' * 200) + '_0'}, ] test_cases.each do |test_case| it "normalizes #{test_case[:in].inspect} to #{test_case[:out].inspect} like the Trace Agent" do - expect(described_class.normalize(test_case[:in])).to eq(test_case[:out]) + expect(described_class.normalize(test_case[:in], remove_digit_start_char: true)).to eq(test_case[:out]) + end + end + end + describe 'Follows the normalization logic from the Trace Agent for tag values' do + test_cases = [ + {in: '1test', out: '1test'}, + {in: 'atest', out: 'atest'}, + ] + + test_cases.each do |test_case| + it "normalizes #{test_case[:in].inspect} to #{test_case[:out].inspect} like the Trace Agent" do + # These test cases are from the Trace Agent's default normalize() behavior (tag keys) + expect(described_class.normalize(test_case[:in], remove_digit_start_char: false)).to eq(test_case[:out]) end end end From 6042830ed1953d06d5a59fbe9c61d941d3ea3294 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 18 Nov 2025 17:49:14 -0500 Subject: [PATCH 37/78] Move process tags only to the first span and adjust tests --- .../tracing/transport/trace_formatter.rb | 2 +- .../tracing/transport/trace_formatter_spec.rb | 41 ++++++++++++------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/lib/datadog/tracing/transport/trace_formatter.rb b/lib/datadog/tracing/transport/trace_formatter.rb index bc139d22c4e..d645b6879a6 100644 --- a/lib/datadog/tracing/transport/trace_formatter.rb +++ b/lib/datadog/tracing/transport/trace_formatter.rb @@ -61,9 +61,9 @@ def format! tag_sampling_priority! tag_profiling_enabled! tag_apm_tracing_disabled! - tag_process_tags! if first_span + tag_process_tags! tag_git_repository_url! tag_git_commit_sha! end diff --git a/spec/datadog/tracing/transport/trace_formatter_spec.rb b/spec/datadog/tracing/transport/trace_formatter_spec.rb index f48c1a17d58..2a1de61cfec 100644 --- a/spec/datadog/tracing/transport/trace_formatter_spec.rb +++ b/spec/datadog/tracing/transport/trace_formatter_spec.rb @@ -237,20 +237,31 @@ end end - shared_examples 'first span with process tags' do - it do + shared_examples 'spans with process tags' do + it 'the first span has process tags' do + format! + expect(first_span.meta).to include(Datadog::Core::Environment::Ext::TAG_PROCESS_TAGS) + expect(first_span.meta[Datadog::Core::Environment::Ext::TAG_PROCESS_TAGS]).to eq(Datadog::Core::Environment::Process.serialized) + end + it 'does not add process tags to non first spans' do format! - expect(first_span.meta).to include('_dd.tags.process') - expect(first_span.meta['_dd.tags.process']).to eq(Datadog::Core::Environment::Process.serialized) - # TODO figure out if we need an assertion for the value, ie - # `"entrypoint.workdir:app,entrypoint.name:rspec,entrypoint.basedir:usr/local/bundle/bin,entrypoint.type:script,server.type:placeholder"` + trace.spans.each_with_index do |span, index| + if index == 0 + expect(span.meta).to include(Datadog::Core::Environment::Ext::TAG_PROCESS_TAGS) + expect(span.meta[Datadog::Core::Environment::Ext::TAG_PROCESS_TAGS]).to eq(Datadog::Core::Environment::Process.serialized) + else + expect(span.meta).to_not include(Datadog::Core::Environment::Ext::TAG_PROCESS_TAGS) + end + end end end - shared_examples 'first span without process tags' do - it do + shared_examples 'spans without process tags' do + it 'does not add process tags to any spans' do format! - expect(first_span.meta).to_not include('_dd.tags.process') + trace.spans.each do |span| + expect(span.meta).to_not include(Datadog::Core::Environment::Ext::TAG_PROCESS_TAGS) + end end end @@ -306,12 +317,12 @@ before do allow(Datadog.configuration).to receive(:experimental_propagate_process_tags_enabled).and_return(true) end - it_behaves_like 'first span with process tags' + it_behaves_like 'spans with process tags' end context 'without process tags enabled' do # default is false - it_behaves_like 'first span without process tags' + it_behaves_like 'spans without process tags' end end @@ -367,12 +378,12 @@ before do allow(Datadog.configuration).to receive(:experimental_propagate_process_tags_enabled).and_return(true) end - it_behaves_like 'first span with process tags' + it_behaves_like 'spans with process tags' end context 'without process tags enabled' do # default is false - it_behaves_like 'first span without process tags' + it_behaves_like 'spans without process tags' end end @@ -430,12 +441,12 @@ before do allow(Datadog.configuration).to receive(:experimental_propagate_process_tags_enabled).and_return(true) end - it_behaves_like 'first span with process tags' + it_behaves_like 'spans with process tags' end context 'without process tags enabled' do # default is false - it_behaves_like 'first span without process tags' + it_behaves_like 'spans without process tags' end end end From 4210d74db8b7efe3322523bef65e29859e96c0c0 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Wed, 19 Nov 2025 11:07:36 -0500 Subject: [PATCH 38/78] Add a special character into the test app name to show that it gets normalized --- spec/datadog/core/environment/process_spec.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index c9c2ca80f0c..13a92418d18 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -24,16 +24,16 @@ Dir.mktmpdir do |tmp_dir| Dir.chdir(tmp_dir) do Bundler.with_unbundled_env do - _, stderr, status = Open3.capture3('rails new test_app --minimal --skip-active-record --skip-test --skip-keeps --skip-git --skip-docker') - unless status.success? && File.exist?("test_app/Gemfile") + _, stderr, status = Open3.capture3('rails new test@_app --minimal --skip-active-record --skip-test --skip-keeps --skip-git --skip-docker') + unless status.success? && File.exist?("test@_app/Gemfile") skip("rails new failed: #{stderr}") end end - File.open("test_app/Gemfile", 'a') do |file| + File.open("test@_app/Gemfile", 'a') do |file| file.puts "gem 'datadog', path: '#{project_root_directory}', require: false" end - File.write("test_app/config/initializers/process_initializer.rb", <<-RUBY) + File.write("test@_app/config/initializers/process_initializer.rb", <<-RUBY) Rails.application.config.after_initialize do require 'datadog/core/environment/process' STDERR.puts "_dd.tags.process:\#{Datadog::Core::Environment::Process.serialized}" @@ -43,7 +43,7 @@ RUBY Bundler.with_unbundled_env do - Dir.chdir("test_app") do + Dir.chdir("test@_app") do _, _, _ = Open3.capture3('bundle install') _, err, _ = Open3.capture3('bundle exec rails s') expect(err).to include('entrypoint.workdir:test_app') From f9af94640d7c3b3537a86aa5845f4ab7276e27e0 Mon Sep 17 00:00:00 2001 From: wantsui Date: Wed, 19 Nov 2025 16:47:52 -0500 Subject: [PATCH 39/78] Update lib/datadog/core/normalizer.rb Co-authored-by: Marco Costa --- lib/datadog/core/normalizer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/datadog/core/normalizer.rb b/lib/datadog/core/normalizer.rb index 3b13094b1dd..d6ad041ee24 100644 --- a/lib/datadog/core/normalizer.rb +++ b/lib/datadog/core/normalizer.rb @@ -28,7 +28,7 @@ def self.normalize(original_value, remove_digit_start_char: false) transformed_value.strip! return "" if transformed_value.empty? - return transformed_value if transformed_value.size <= MAX_CHARACTER_LENGTH && + return transformed_value if transformed_value.bytesize <= MAX_CHARACTER_LENGTH && transformed_value.match?(VALID_ASCII_TAG) if transformed_value.ascii_only? && transformed_value.length <= MAX_CHARACTER_LENGTH From 381fbe2168481dd2395f7ca50d823ef513c6cc83 Mon Sep 17 00:00:00 2001 From: wantsui Date: Wed, 19 Nov 2025 16:48:53 -0500 Subject: [PATCH 40/78] Update lib/datadog/core/normalizer.rb Co-authored-by: Marco Costa --- lib/datadog/core/normalizer.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/datadog/core/normalizer.rb b/lib/datadog/core/normalizer.rb index d6ad041ee24..2b0d1c5c598 100644 --- a/lib/datadog/core/normalizer.rb +++ b/lib/datadog/core/normalizer.rb @@ -8,8 +8,8 @@ module Normalizer INVALID_TAG_CHARACTERS = %r{[^\p{L}0-9_\-:./]} LEADING_INVALID_CHARS_NO_DIGITS = %r{\A[^\p{L}:]++} LEADING_INVALID_CHARS_WITH_DIGITS = %r{\A[^\p{L}0-9:./\-]++} - MAX_CHARACTER_LENGTH = 200 - MAX_BYTE_LENGTH = MAX_CHARACTER_LENGTH * 2 + MAX_BYTE_SIZE = 200 + MAX_BYTE_SIZE_BUFFER = MAX_BYTE_SIZE * 2 TRAILING_UNDERSCORES = %r{_++\z} VALID_ASCII_TAG = %r{\A[a-z:][a-z0-9:./-]*\z} From 138dff8d11546de3a7cff6fef510f59090176e7d Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Wed, 19 Nov 2025 16:50:44 -0500 Subject: [PATCH 41/78] Fixes for new constant names --- lib/datadog/core/normalizer.rb | 8 ++++---- sig/datadog/core/normalizer.rbs | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/datadog/core/normalizer.rb b/lib/datadog/core/normalizer.rb index 2b0d1c5c598..ecf051f4fb1 100644 --- a/lib/datadog/core/normalizer.rb +++ b/lib/datadog/core/normalizer.rb @@ -28,10 +28,10 @@ def self.normalize(original_value, remove_digit_start_char: false) transformed_value.strip! return "" if transformed_value.empty? - return transformed_value if transformed_value.bytesize <= MAX_CHARACTER_LENGTH && + return transformed_value if transformed_value.bytesize <= MAX_BYTE_SIZE && transformed_value.match?(VALID_ASCII_TAG) - if transformed_value.ascii_only? && transformed_value.length <= MAX_CHARACTER_LENGTH + if transformed_value.ascii_only? && transformed_value.length <= MAX_BYTE_SIZE normalized_value = transformed_value else byte_position = 0 @@ -40,8 +40,8 @@ def self.normalize(original_value, remove_digit_start_char: false) transformed_value.each_char do |char| byte_width = char.bytesize - break if byte_position + byte_width > MAX_BYTE_LENGTH - break if character_count >= MAX_CHARACTER_LENGTH + break if byte_position + byte_width > MAX_BYTE_SIZE + break if character_count >= MAX_BYTE_SIZE normalized_value << char byte_position += byte_width diff --git a/sig/datadog/core/normalizer.rbs b/sig/datadog/core/normalizer.rbs index 0a3397a3f36..899e46f5ce7 100644 --- a/sig/datadog/core/normalizer.rbs +++ b/sig/datadog/core/normalizer.rbs @@ -5,8 +5,8 @@ module Datadog LEADING_INVALID_CHARS_NO_DIGITS: ::Regexp LEADING_INVALID_CHARS_WITH_DIGITS: ::Regexp TRAILING_UNDERSCORES: ::Regexp - MAX_CHARACTER_LENGTH: ::Integer - MAX_BYTE_LENGTH: ::Integer + MAX_BYTE_SIZE: ::Integer + MAX_BYTE_SIZE_BUFFER: ::Integer VALID_ASCII_TAG: ::Regexp def self.normalize: (untyped original_value, ?remove_digit_start_char: bool) -> ::String From 24491532caee884ecd1f21f27e618ed78b599393 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Wed, 19 Nov 2025 17:40:02 -0500 Subject: [PATCH 42/78] Change to byteslice --- lib/datadog/core/normalizer.rb | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/lib/datadog/core/normalizer.rb b/lib/datadog/core/normalizer.rb index ecf051f4fb1..ec7e879ea2b 100644 --- a/lib/datadog/core/normalizer.rb +++ b/lib/datadog/core/normalizer.rb @@ -34,19 +34,7 @@ def self.normalize(original_value, remove_digit_start_char: false) if transformed_value.ascii_only? && transformed_value.length <= MAX_BYTE_SIZE normalized_value = transformed_value else - byte_position = 0 - character_count = 0 - normalized_value = String.new(encoding: 'UTF-8') - - transformed_value.each_char do |char| - byte_width = char.bytesize - break if byte_position + byte_width > MAX_BYTE_SIZE - break if character_count >= MAX_BYTE_SIZE - - normalized_value << char - byte_position += byte_width - character_count += 1 - end + normalized_value = transformed_value.byteslice(0, MAX_BYTE_SIZE).scrub("") end normalized_value.downcase! From 5252259a6f7629fdc8c3fd88c1ef9a47c4bde24e Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Wed, 19 Nov 2025 17:50:16 -0500 Subject: [PATCH 43/78] fix lint. --- lib/datadog/core/normalizer.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/datadog/core/normalizer.rb b/lib/datadog/core/normalizer.rb index ec7e879ea2b..d5419bc1fed 100644 --- a/lib/datadog/core/normalizer.rb +++ b/lib/datadog/core/normalizer.rb @@ -31,10 +31,10 @@ def self.normalize(original_value, remove_digit_start_char: false) return transformed_value if transformed_value.bytesize <= MAX_BYTE_SIZE && transformed_value.match?(VALID_ASCII_TAG) - if transformed_value.ascii_only? && transformed_value.length <= MAX_BYTE_SIZE - normalized_value = transformed_value + normalized_value = if transformed_value.ascii_only? && transformed_value.length <= MAX_BYTE_SIZE + transformed_value else - normalized_value = transformed_value.byteslice(0, MAX_BYTE_SIZE).scrub("") + transformed_value.byteslice(0, MAX_BYTE_SIZE).scrub("") end normalized_value.downcase! From 0eaf302ec275790fea2a0056a49887ea4c1b1f2c Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Wed, 19 Nov 2025 17:53:19 -0500 Subject: [PATCH 44/78] remove process_spec from main rake task --- Rakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Rakefile b/Rakefile index 123181ab717..8d7dd48bbb4 100644 --- a/Rakefile +++ b/Rakefile @@ -91,7 +91,7 @@ namespace :spec do RSpec::Core::RakeTask.new(:main) do |t, args| t.pattern = 'spec/**/*_spec.rb' t.exclude_pattern = 'spec/**/{appsec/integration,contrib,benchmark,redis,auto_instrument,opentelemetry,open_feature,profiling,crashtracking,error_tracking,rubocop,data_streams}/**/*_spec.rb,' \ - ' spec/**/{auto_instrument,opentelemetry,process_discovery,stable_config,ddsketch}_spec.rb,' \ + ' spec/**/{auto_instrument,opentelemetry,process_discovery,stable_config,ddsketch}_spec.rb,spec/datadog/core/environment/process_spec.rb,' \ ' spec/datadog/gem_packaging_spec.rb' t.rspec_opts = args.to_a.join(' ') end From fbfecfe581e279745a9695a378a4648466741a0c Mon Sep 17 00:00:00 2001 From: wantsui Date: Thu, 20 Nov 2025 14:25:17 -0500 Subject: [PATCH 45/78] Update spec/datadog/core/normalizer_spec.rb Co-authored-by: Oleg Pudeyev <156273877+p-datadog@users.noreply.github.com> --- spec/datadog/core/normalizer_spec.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/spec/datadog/core/normalizer_spec.rb b/spec/datadog/core/normalizer_spec.rb index 6900dd15d22..e4fe6100692 100644 --- a/spec/datadog/core/normalizer_spec.rb +++ b/spec/datadog/core/normalizer_spec.rb @@ -56,6 +56,7 @@ end end end + describe 'Follows the normalization logic from the Trace Agent for tag values' do test_cases = [ {in: '1test', out: '1test'}, From cc2225f6358df5ee70030f535e68f4d6b1001e41 Mon Sep 17 00:00:00 2001 From: wantsui Date: Thu, 20 Nov 2025 14:25:34 -0500 Subject: [PATCH 46/78] Update spec/datadog/tracing/transport/trace_formatter_spec.rb Co-authored-by: Oleg Pudeyev <156273877+p-datadog@users.noreply.github.com> --- spec/datadog/tracing/transport/trace_formatter_spec.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/spec/datadog/tracing/transport/trace_formatter_spec.rb b/spec/datadog/tracing/transport/trace_formatter_spec.rb index 2a1de61cfec..2b29ae80566 100644 --- a/spec/datadog/tracing/transport/trace_formatter_spec.rb +++ b/spec/datadog/tracing/transport/trace_formatter_spec.rb @@ -243,6 +243,7 @@ expect(first_span.meta).to include(Datadog::Core::Environment::Ext::TAG_PROCESS_TAGS) expect(first_span.meta[Datadog::Core::Environment::Ext::TAG_PROCESS_TAGS]).to eq(Datadog::Core::Environment::Process.serialized) end + it 'does not add process tags to non first spans' do format! trace.spans.each_with_index do |span, index| From a77e63b8c079a1b183666a368da40a3993577ae9 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Thu, 20 Nov 2025 15:31:50 -0500 Subject: [PATCH 47/78] Remove the unless check and replace with an assertion that the file exist, small fixes, and add comments to the normalizer.rb explaining the expected usage --- lib/datadog/core/normalizer.rb | 10 ++++++++-- sig/datadog/core/normalizer.rbs | 1 - spec/datadog/core/environment/process_spec.rb | 6 ++---- spec/datadog/core/normalizer_spec.rb | 2 +- spec/datadog/tracing/transport/trace_formatter_spec.rb | 2 +- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/lib/datadog/core/normalizer.rb b/lib/datadog/core/normalizer.rb index d5419bc1fed..4b413099ceb 100644 --- a/lib/datadog/core/normalizer.rb +++ b/lib/datadog/core/normalizer.rb @@ -3,13 +3,16 @@ module Datadog module Core module Normalizer + # Normalization logic used for tag keys and values that the Trace Agent has for traces + # Useful for ensuring that tag keys and values are normalized consistently + # An use case for now is Process Tags which need to be sent across various intakes (profiling, tracing, etc.) consistently + module_function INVALID_TAG_CHARACTERS = %r{[^\p{L}0-9_\-:./]} LEADING_INVALID_CHARS_NO_DIGITS = %r{\A[^\p{L}:]++} LEADING_INVALID_CHARS_WITH_DIGITS = %r{\A[^\p{L}0-9:./\-]++} - MAX_BYTE_SIZE = 200 - MAX_BYTE_SIZE_BUFFER = MAX_BYTE_SIZE * 2 + MAX_BYTE_SIZE = 200 # Represents the max tag length TRAILING_UNDERSCORES = %r{_++\z} VALID_ASCII_TAG = %r{\A[a-z:][a-z0-9:./-]*\z} @@ -23,6 +26,9 @@ module Normalizer # - Consecutive underscores are merged into a single underscore # - Maximum length is 200 characters # If it's a tag value, allow it to start with a digit + # @param original_value [String] The original string + # @param remove_digit_start_char [Boolean] - whether to remove the leading digit (currently only used for tag values) + # @return [String] The normalized string def self.normalize(original_value, remove_digit_start_char: false) transformed_value = original_value.to_s.encode('UTF-8', invalid: :replace, undef: :replace) transformed_value.strip! diff --git a/sig/datadog/core/normalizer.rbs b/sig/datadog/core/normalizer.rbs index 899e46f5ce7..63eb43a6de2 100644 --- a/sig/datadog/core/normalizer.rbs +++ b/sig/datadog/core/normalizer.rbs @@ -6,7 +6,6 @@ module Datadog LEADING_INVALID_CHARS_WITH_DIGITS: ::Regexp TRAILING_UNDERSCORES: ::Regexp MAX_BYTE_SIZE: ::Integer - MAX_BYTE_SIZE_BUFFER: ::Integer VALID_ASCII_TAG: ::Regexp def self.normalize: (untyped original_value, ?remove_digit_start_char: bool) -> ::String diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index 13a92418d18..b6621398384 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -24,10 +24,8 @@ Dir.mktmpdir do |tmp_dir| Dir.chdir(tmp_dir) do Bundler.with_unbundled_env do - _, stderr, status = Open3.capture3('rails new test@_app --minimal --skip-active-record --skip-test --skip-keeps --skip-git --skip-docker') - unless status.success? && File.exist?("test@_app/Gemfile") - skip("rails new failed: #{stderr}") - end + _, _, _ = Open3.capture3('rails new test@_app --minimal --skip-active-record --skip-test --skip-keeps --skip-git --skip-docker') + expect(File.exist?("test@_app/Gemfile")).to be true end File.open("test@_app/Gemfile", 'a') do |file| diff --git a/spec/datadog/core/normalizer_spec.rb b/spec/datadog/core/normalizer_spec.rb index e4fe6100692..c37bf462c9c 100644 --- a/spec/datadog/core/normalizer_spec.rb +++ b/spec/datadog/core/normalizer_spec.rb @@ -56,7 +56,7 @@ end end end - + describe 'Follows the normalization logic from the Trace Agent for tag values' do test_cases = [ {in: '1test', out: '1test'}, diff --git a/spec/datadog/tracing/transport/trace_formatter_spec.rb b/spec/datadog/tracing/transport/trace_formatter_spec.rb index 2b29ae80566..4007dca2963 100644 --- a/spec/datadog/tracing/transport/trace_formatter_spec.rb +++ b/spec/datadog/tracing/transport/trace_formatter_spec.rb @@ -243,7 +243,7 @@ expect(first_span.meta).to include(Datadog::Core::Environment::Ext::TAG_PROCESS_TAGS) expect(first_span.meta[Datadog::Core::Environment::Ext::TAG_PROCESS_TAGS]).to eq(Datadog::Core::Environment::Process.serialized) end - + it 'does not add process tags to non first spans' do format! trace.spans.each_with_index do |span, index| From 439e81a66c07e6a9e389657aa976dd79cd9c34a5 Mon Sep 17 00:00:00 2001 From: wantsui Date: Thu, 20 Nov 2025 15:37:14 -0500 Subject: [PATCH 48/78] Update spec/datadog/core/environment/process_spec.rb Co-authored-by: Oleg Pudeyev <156273877+p-datadog@users.noreply.github.com> --- spec/datadog/core/environment/process_spec.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index b6621398384..6d6aa73519a 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -17,6 +17,8 @@ end describe 'Scenario: Real applications' do + skip_unless_integration_testing_enabled + context 'when running a real Rails application' do it 'detects Rails process information correctly' do project_root_directory = Dir.pwd From 9e45ade4454a0565a274cacd7023da5a46f888d1 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Thu, 20 Nov 2025 15:42:10 -0500 Subject: [PATCH 49/78] fix lint --- spec/datadog/core/environment/process_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index 6d6aa73519a..df3f55b7336 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -18,7 +18,7 @@ describe 'Scenario: Real applications' do skip_unless_integration_testing_enabled - + context 'when running a real Rails application' do it 'detects Rails process information correctly' do project_root_directory = Dir.pwd From 0ab4fef29598e6ff51b45d0aa7486c4f4c93fb30 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Thu, 20 Nov 2025 16:39:04 -0500 Subject: [PATCH 50/78] Rename Normalizer to TagNormalizer. --- lib/datadog/core/environment/process.rb | 10 +++++----- lib/datadog/core/{normalizer.rb => tag_normalizer.rb} | 2 +- .../core/{normalizer.rbs => tag_normalizer.rbs} | 3 ++- .../{normalizer_spec.rb => tag_normalizer_spec.rb} | 4 ++-- 4 files changed, 10 insertions(+), 9 deletions(-) rename lib/datadog/core/{normalizer.rb => tag_normalizer.rb} (99%) rename sig/datadog/core/{normalizer.rbs => tag_normalizer.rbs} (93%) rename spec/datadog/core/{normalizer_spec.rb => tag_normalizer_spec.rb} (97%) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index 83cf30a31b6..cd7d850392a 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true require_relative 'ext' -require_relative '../normalizer' +require_relative '../tag_normalizer' module Datadog module Core @@ -15,10 +15,10 @@ module Process def serialized return @serialized if defined?(@serialized) tags = [] - tags << "#{Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{Normalizer.normalize(entrypoint_workdir, remove_digit_start_char: false)}" if entrypoint_workdir - tags << "#{Environment::Ext::TAG_ENTRYPOINT_NAME}:#{Normalizer.normalize(entrypoint_name, remove_digit_start_char: false)}" if entrypoint_name - tags << "#{Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{Normalizer.normalize(entrypoint_basedir, remove_digit_start_char: false)}" if entrypoint_basedir - tags << "#{Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{Normalizer.normalize(entrypoint_type, remove_digit_start_char: false)}" if entrypoint_type + tags << "#{Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{TagNormalizer.normalize(entrypoint_workdir, remove_digit_start_char: false)}" if entrypoint_workdir + tags << "#{Environment::Ext::TAG_ENTRYPOINT_NAME}:#{TagNormalizer.normalize(entrypoint_name, remove_digit_start_char: false)}" if entrypoint_name + tags << "#{Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{TagNormalizer.normalize(entrypoint_basedir, remove_digit_start_char: false)}" if entrypoint_basedir + tags << "#{Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{TagNormalizer.normalize(entrypoint_type, remove_digit_start_char: false)}" if entrypoint_type @serialized = tags.join(',').freeze end diff --git a/lib/datadog/core/normalizer.rb b/lib/datadog/core/tag_normalizer.rb similarity index 99% rename from lib/datadog/core/normalizer.rb rename to lib/datadog/core/tag_normalizer.rb index 4b413099ceb..63bed4b8521 100644 --- a/lib/datadog/core/normalizer.rb +++ b/lib/datadog/core/tag_normalizer.rb @@ -2,7 +2,7 @@ module Datadog module Core - module Normalizer + module TagNormalizer # Normalization logic used for tag keys and values that the Trace Agent has for traces # Useful for ensuring that tag keys and values are normalized consistently # An use case for now is Process Tags which need to be sent across various intakes (profiling, tracing, etc.) consistently diff --git a/sig/datadog/core/normalizer.rbs b/sig/datadog/core/tag_normalizer.rbs similarity index 93% rename from sig/datadog/core/normalizer.rbs rename to sig/datadog/core/tag_normalizer.rbs index 63eb43a6de2..d293a745e7a 100644 --- a/sig/datadog/core/normalizer.rbs +++ b/sig/datadog/core/tag_normalizer.rbs @@ -1,6 +1,6 @@ module Datadog module Core - module Normalizer + module TagNormalizer INVALID_TAG_CHARACTERS: ::Regexp LEADING_INVALID_CHARS_NO_DIGITS: ::Regexp LEADING_INVALID_CHARS_WITH_DIGITS: ::Regexp @@ -12,3 +12,4 @@ module Datadog end end end + diff --git a/spec/datadog/core/normalizer_spec.rb b/spec/datadog/core/tag_normalizer_spec.rb similarity index 97% rename from spec/datadog/core/normalizer_spec.rb rename to spec/datadog/core/tag_normalizer_spec.rb index c37bf462c9c..55178e25793 100644 --- a/spec/datadog/core/normalizer_spec.rb +++ b/spec/datadog/core/tag_normalizer_spec.rb @@ -1,7 +1,7 @@ require 'spec_helper' -require 'datadog/core/normalizer' +require 'datadog/core/tag_normalizer' -RSpec.describe Datadog::Core::Normalizer do +RSpec.describe Datadog::Core::TagNormalizer do describe 'Follows the normalization logic from the Trace Agent for tag keys' do # Test cases from the Trace Agent for consistency # https://github.com/DataDog/datadog-agent/blob/45799c842bbd216bcda208737f9f11cade6fdd95/pkg/trace/traceutil/normalize_test.go#L17 From 6577d3f4d66833cced628b5447c7aa8040c32d28 Mon Sep 17 00:00:00 2001 From: wantsui Date: Thu, 20 Nov 2025 16:53:55 -0500 Subject: [PATCH 51/78] Update lib/datadog/core/environment/process.rb Co-authored-by: Oleg Pudeyev <156273877+p-datadog@users.noreply.github.com> --- lib/datadog/core/environment/process.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index cd7d850392a..0c404db96fb 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -7,6 +7,8 @@ module Datadog module Core module Environment # Retrieves process level information such that it can be attached to various payloads + # + # @api private module Process extend self From a336c662ad60bb38fe590d047e42bcbd9dae5da7 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Thu, 20 Nov 2025 17:33:37 -0500 Subject: [PATCH 52/78] Add api private comment to the tag normalizer and refactor away the extend self on process.rb --- lib/datadog/core/environment/process.rb | 16 +++++++--------- lib/datadog/core/tag_normalizer.rb | 1 + 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index 0c404db96fb..faa0640dcfb 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -10,11 +10,9 @@ module Environment # # @api private module Process - extend self - # This method returns a key/value part of serialized tags in the format of k1:v1,k2:v2,k3:v3 # @return [String] comma-separated normalized key:value pairs - def serialized + def self.serialized return @serialized if defined?(@serialized) tags = [] tags << "#{Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{TagNormalizer.normalize(entrypoint_workdir, remove_digit_start_char: false)}" if entrypoint_workdir @@ -24,19 +22,17 @@ def serialized @serialized = tags.join(',').freeze end - private - # Returns the last segment of the working directory of the process # Example: /app/myapp -> myapp # @return [String] the last segment of the working directory - def entrypoint_workdir + def self.entrypoint_workdir File.basename(Dir.pwd) end # Returns the entrypoint type of the process # In Ruby, the entrypoint type is always 'script' # @return [String] the type of the process, which is fixed in Ruby - def entrypoint_type + def self.entrypoint_type Environment::Ext::PROCESS_TYPE end @@ -44,7 +40,7 @@ def entrypoint_type # Example 1: /bin/mybin -> mybin # Example 2: ruby /test/myapp.rb -> myapp # @return [String] the last segment of base directory of the script - def entrypoint_name + def self.entrypoint_name File.basename($0) end @@ -52,9 +48,11 @@ def entrypoint_name # Example 1: /bin/mybin -> bin # Example 2: ruby /test/myapp.js -> test # @return [String] the last segment of the base directory of the script - def entrypoint_basedir + def self.entrypoint_basedir File.basename(File.expand_path(File.dirname($0))) end + + private_class_method :entrypoint_workdir, :entrypoint_type, :entrypoint_name, :entrypoint_basedir end end end diff --git a/lib/datadog/core/tag_normalizer.rb b/lib/datadog/core/tag_normalizer.rb index 63bed4b8521..c6cee933bac 100644 --- a/lib/datadog/core/tag_normalizer.rb +++ b/lib/datadog/core/tag_normalizer.rb @@ -2,6 +2,7 @@ module Datadog module Core + # @api private module TagNormalizer # Normalization logic used for tag keys and values that the Trace Agent has for traces # Useful for ensuring that tag keys and values are normalized consistently From 0d229dee9bcccc00d1f28512a320cc0f2c6408ad Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Thu, 20 Nov 2025 17:56:13 -0500 Subject: [PATCH 53/78] Fix steep errors on the process rbs file --- sig/datadog/core/environment/process.rbs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sig/datadog/core/environment/process.rbs b/sig/datadog/core/environment/process.rbs index 1d1bab90b42..828ca6fe163 100644 --- a/sig/datadog/core/environment/process.rbs +++ b/sig/datadog/core/environment/process.rbs @@ -4,17 +4,17 @@ module Datadog module Process @serialized: ::String - def serialized: () -> ::String + def self.serialized: () -> ::String private - def entrypoint_workdir: () -> ::String + def self.entrypoint_workdir: () -> ::String - def entrypoint_type: () -> ::String + def self.entrypoint_type: () -> ::String - def entrypoint_name: () -> ::String + def self.entrypoint_name: () -> ::String - def entrypoint_basedir: () -> ::String + def self.entrypoint_basedir: () -> ::String end end end From e83bc4a9e498debdbc2c6d1dba3d1a5e7863d63a Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Thu, 20 Nov 2025 19:04:18 -0500 Subject: [PATCH 54/78] Refactor the utils encode call so it can be used in the tag normalizer and update tests to show some new assertions --- lib/datadog/core/tag_normalizer.rb | 2 +- lib/datadog/core/utils.rb | 7 +++- sig/datadog/core/utils.rbs | 2 +- spec/datadog/core/utils_spec.rb | 51 ++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 3 deletions(-) diff --git a/lib/datadog/core/tag_normalizer.rb b/lib/datadog/core/tag_normalizer.rb index c6cee933bac..48b047d4a67 100644 --- a/lib/datadog/core/tag_normalizer.rb +++ b/lib/datadog/core/tag_normalizer.rb @@ -31,7 +31,7 @@ module TagNormalizer # @param remove_digit_start_char [Boolean] - whether to remove the leading digit (currently only used for tag values) # @return [String] The normalized string def self.normalize(original_value, remove_digit_start_char: false) - transformed_value = original_value.to_s.encode('UTF-8', invalid: :replace, undef: :replace) + transformed_value = Utils.utf8_encode(original_value, replace_invalid: true) transformed_value.strip! return "" if transformed_value.empty? diff --git a/lib/datadog/core/utils.rb b/lib/datadog/core/utils.rb index c0db770c117..68fe33f13ea 100644 --- a/lib/datadog/core/utils.rb +++ b/lib/datadog/core/utils.rb @@ -42,15 +42,20 @@ def self.truncate(value, size, omission = '...') # @param [String,#to_s] str object to be converted to a UTF-8 string # @param [Boolean] binary whether to expect binary data in the `str` parameter # @param [String] placeholder string to be returned when encoding fails + # @param [Boolean] replace_invalid whether to replace invalid characters (Trace Agent tags expectation) # @return a UTF-8 string version of `str` # @!visibility private - def self.utf8_encode(str, binary: false, placeholder: EMPTY_STRING) + def self.utf8_encode(str, binary: false, replace_invalid: false, placeholder: EMPTY_STRING) str = str.to_s if binary # This option is useful for "gracefully" displaying binary data that # often contains text such as marshalled objects str.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') + elsif replace_invalid + # A non binary mode that replaces invalid characters + # Main use case is to be on par with the Trace Agent's encoding logic for tag normalization + str.encode('UTF-8', invalid: :replace, undef: :replace) elsif str.encoding == ::Encoding::UTF_8 str elsif str.empty? diff --git a/sig/datadog/core/utils.rbs b/sig/datadog/core/utils.rbs index dfaa94d8e0c..57c861a7cd0 100644 --- a/sig/datadog/core/utils.rbs +++ b/sig/datadog/core/utils.rbs @@ -5,7 +5,7 @@ module Datadog EMPTY_STRING: untyped def self.truncate: (untyped value, untyped size, ?::String omission) -> untyped - def self.utf8_encode: (untyped str, ?binary: bool, ?placeholder: untyped) -> untyped + def self.utf8_encode: (untyped str, ?binary: bool, ?replace_invalid: bool, ?placeholder: untyped) -> untyped def self.encode_tags: (untyped hash) -> untyped def self.without_warnings: () { () -> untyped } -> untyped diff --git a/spec/datadog/core/utils_spec.rb b/spec/datadog/core/utils_spec.rb index 22faab09483..5dfa3544f0b 100644 --- a/spec/datadog/core/utils_spec.rb +++ b/spec/datadog/core/utils_spec.rb @@ -95,6 +95,57 @@ is_expected.to eq('valid part') end end + + context 'with replace_invalid: true' do + let(:options) { {replace_invalid: true} } + + it 'returns a valid UTF-8 string without raising an error' do + expect { subject }.not_to raise_error + expect(subject.encoding).to eq(Encoding::UTF_8) + end + end + end + + context 'with valid and invalid characters in the string' do + let(:str) { "test\x99\x8faaa".force_encoding(Encoding::ASCII_8BIT) } + + context 'with replace_invalid: true' do + let(:options) { {replace_invalid: true} } + + it 'returns valid UTF-8 string' do + expect(subject).to include('aaa') + expect(subject.encoding).to eq(Encoding::UTF_8) + end + end + + context 'with replace_invalid: false' do + let(:options) { {} } + + it 'returns an empty string' do + is_expected.to eq(Datadog::Core::Utils::EMPTY_STRING) + end + end + end + + context 'with Unicode characters' do + let(:str) { 'ünicöde' } + + context 'with replace_invalid: true' do + let(:options) { {replace_invalid: true} } + + it 'returns a valid UTF-8 string' do + is_expected.to eq(str) + expect(subject.encoding).to eq(Encoding::UTF_8) + end + end + + context 'with replace_invalid: false' do + let(:options) { {} } + + it 'preserves the original string' do + is_expected.to eq(str) + end + end end end From ce1759f6cc423e6ee73548d0f463b175d6551d56 Mon Sep 17 00:00:00 2001 From: wantsui Date: Fri, 21 Nov 2025 09:12:28 -0500 Subject: [PATCH 55/78] Update Rakefile Co-authored-by: Sergey Fedorov --- Rakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Rakefile b/Rakefile index ab5c63e4a2e..a6e95bc8e87 100644 --- a/Rakefile +++ b/Rakefile @@ -92,7 +92,7 @@ namespace :spec do RSpec::Core::RakeTask.new(:main) do |t, args| t.pattern = 'spec/**/*_spec.rb' t.exclude_pattern = 'spec/**/{appsec/integration,contrib,benchmark,redis,auto_instrument,opentelemetry,open_feature,profiling,crashtracking,error_tracking,rubocop,data_streams}/**/*_spec.rb,' \ - ' spec/**/{auto_instrument,opentelemetry,process_discovery,stable_config,ddsketch,open_feature}_spec.rb,spec/datadog/core/environment/process_spec.rb,' \ + ' spec/**/{auto_instrument,opentelemetry,process_discovery,stable_config,ddsketch,open_feature,process}_spec.rb' \ ' spec/datadog/gem_packaging_spec.rb' t.rspec_opts = args.to_a.join(' ') end From 8b978c686af093ae7dc9605542f1f202a1ea13e1 Mon Sep 17 00:00:00 2001 From: wantsui Date: Fri, 21 Nov 2025 09:12:53 -0500 Subject: [PATCH 56/78] Update lib/datadog/core/tag_normalizer.rb Co-authored-by: Sergey Fedorov --- lib/datadog/core/tag_normalizer.rb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/datadog/core/tag_normalizer.rb b/lib/datadog/core/tag_normalizer.rb index 48b047d4a67..f362e593ae5 100644 --- a/lib/datadog/core/tag_normalizer.rb +++ b/lib/datadog/core/tag_normalizer.rb @@ -38,10 +38,11 @@ def self.normalize(original_value, remove_digit_start_char: false) return transformed_value if transformed_value.bytesize <= MAX_BYTE_SIZE && transformed_value.match?(VALID_ASCII_TAG) - normalized_value = if transformed_value.ascii_only? && transformed_value.length <= MAX_BYTE_SIZE - transformed_value - else - transformed_value.byteslice(0, MAX_BYTE_SIZE).scrub("") + normalized_value = transformed_value + + if normalized_value.ascii_only? && normalized_value.length <= MAX_BYTE_SIZE + normalized_value = normalized_value.byteslice(0, MAX_BYTE_SIZE) + normalized_value.scrub!("") end normalized_value.downcase! From f4c9d4907dbf64c8a2e6d92eeb3b79a0e11bf021 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 21 Nov 2025 17:53:19 -0500 Subject: [PATCH 57/78] Add lint fixes and remove unneeded regex at the end. --- lib/datadog/core/tag_normalizer.rb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/datadog/core/tag_normalizer.rb b/lib/datadog/core/tag_normalizer.rb index f362e593ae5..81d6f5be2dc 100644 --- a/lib/datadog/core/tag_normalizer.rb +++ b/lib/datadog/core/tag_normalizer.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require_relative 'utils' + module Datadog module Core # @api private @@ -14,7 +16,6 @@ module TagNormalizer LEADING_INVALID_CHARS_NO_DIGITS = %r{\A[^\p{L}:]++} LEADING_INVALID_CHARS_WITH_DIGITS = %r{\A[^\p{L}0-9:./\-]++} MAX_BYTE_SIZE = 200 # Represents the max tag length - TRAILING_UNDERSCORES = %r{_++\z} VALID_ASCII_TAG = %r{\A[a-z:][a-z0-9:./-]*\z} # Based on https://github.com/DataDog/datadog-agent/blob/45799c842bbd216bcda208737f9f11cade6fdd95/pkg/trace/traceutil/normalize.go#L131 @@ -39,8 +40,8 @@ def self.normalize(original_value, remove_digit_start_char: false) transformed_value.match?(VALID_ASCII_TAG) normalized_value = transformed_value - - if normalized_value.ascii_only? && normalized_value.length <= MAX_BYTE_SIZE + + if normalized_value.bytesize > MAX_BYTE_SIZE normalized_value = normalized_value.byteslice(0, MAX_BYTE_SIZE) normalized_value.scrub!("") end @@ -53,7 +54,7 @@ def self.normalize(original_value, remove_digit_start_char: false) normalized_value.sub!(leading_invalid_regex, "") normalized_value.squeeze!('_') if normalized_value.include?('__') - normalized_value.sub!(TRAILING_UNDERSCORES, "") + normalized_value.delete_suffix!('_') normalized_value end From eae4eb999d8274210382cdd06439cd0bc7090c73 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 21 Nov 2025 17:54:27 -0500 Subject: [PATCH 58/78] fix rbs file for deleted variable --- sig/datadog/core/tag_normalizer.rbs | 1 - 1 file changed, 1 deletion(-) diff --git a/sig/datadog/core/tag_normalizer.rbs b/sig/datadog/core/tag_normalizer.rbs index d293a745e7a..d13d09ec621 100644 --- a/sig/datadog/core/tag_normalizer.rbs +++ b/sig/datadog/core/tag_normalizer.rbs @@ -4,7 +4,6 @@ module Datadog INVALID_TAG_CHARACTERS: ::Regexp LEADING_INVALID_CHARS_NO_DIGITS: ::Regexp LEADING_INVALID_CHARS_WITH_DIGITS: ::Regexp - TRAILING_UNDERSCORES: ::Regexp MAX_BYTE_SIZE: ::Integer VALID_ASCII_TAG: ::Regexp From f3f14806c1b1838a4b198dd19c4b7ece78801adb Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 21 Nov 2025 17:59:54 -0500 Subject: [PATCH 59/78] remove unneeded conditional --- lib/datadog/core/environment/process.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index faa0640dcfb..b5998ce3734 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -15,10 +15,10 @@ module Process def self.serialized return @serialized if defined?(@serialized) tags = [] - tags << "#{Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{TagNormalizer.normalize(entrypoint_workdir, remove_digit_start_char: false)}" if entrypoint_workdir - tags << "#{Environment::Ext::TAG_ENTRYPOINT_NAME}:#{TagNormalizer.normalize(entrypoint_name, remove_digit_start_char: false)}" if entrypoint_name - tags << "#{Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{TagNormalizer.normalize(entrypoint_basedir, remove_digit_start_char: false)}" if entrypoint_basedir - tags << "#{Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{TagNormalizer.normalize(entrypoint_type, remove_digit_start_char: false)}" if entrypoint_type + tags << "#{Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{TagNormalizer.normalize(entrypoint_workdir, remove_digit_start_char: false)}" + tags << "#{Environment::Ext::TAG_ENTRYPOINT_NAME}:#{TagNormalizer.normalize(entrypoint_name, remove_digit_start_char: false)}" + tags << "#{Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{TagNormalizer.normalize(entrypoint_basedir, remove_digit_start_char: false)}" + tags << "#{Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{TagNormalizer.normalize(entrypoint_type, remove_digit_start_char: false)}" @serialized = tags.join(',').freeze end From b48d20d2e17a5d07f9dbd07a7fc707e2ea192b19 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 21 Nov 2025 18:10:22 -0500 Subject: [PATCH 60/78] Add a log if the process tags cannot be obtained --- lib/datadog/core/environment/process.rb | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index b5998ce3734..588f2098ab5 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -15,10 +15,15 @@ module Process def self.serialized return @serialized if defined?(@serialized) tags = [] - tags << "#{Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{TagNormalizer.normalize(entrypoint_workdir, remove_digit_start_char: false)}" - tags << "#{Environment::Ext::TAG_ENTRYPOINT_NAME}:#{TagNormalizer.normalize(entrypoint_name, remove_digit_start_char: false)}" - tags << "#{Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{TagNormalizer.normalize(entrypoint_basedir, remove_digit_start_char: false)}" - tags << "#{Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{TagNormalizer.normalize(entrypoint_type, remove_digit_start_char: false)}" + + begin + tags << "#{Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{TagNormalizer.normalize(entrypoint_workdir.to_s, remove_digit_start_char: false)}" + tags << "#{Environment::Ext::TAG_ENTRYPOINT_NAME}:#{TagNormalizer.normalize(entrypoint_name.to_s, remove_digit_start_char: false)}" + tags << "#{Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{TagNormalizer.normalize(entrypoint_basedir.to_s, remove_digit_start_char: false)}" + tags << "#{Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{TagNormalizer.normalize(entrypoint_type, remove_digit_start_char: false)}" + rescue => e + Datadog.logger.debug("failed to get process_tags: #{e.message}") + end @serialized = tags.join(',').freeze end From 3d332919b603613a640e76ea16a5962c005e4ff1 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 21 Nov 2025 18:26:59 -0500 Subject: [PATCH 61/78] Fix regex and reuse the same test cases to show that the leading digits are allowed for tag values --- lib/datadog/core/tag_normalizer.rb | 2 +- spec/datadog/core/tag_normalizer_spec.rb | 43 ++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/lib/datadog/core/tag_normalizer.rb b/lib/datadog/core/tag_normalizer.rb index 81d6f5be2dc..9d68c0ae5c1 100644 --- a/lib/datadog/core/tag_normalizer.rb +++ b/lib/datadog/core/tag_normalizer.rb @@ -14,7 +14,7 @@ module TagNormalizer INVALID_TAG_CHARACTERS = %r{[^\p{L}0-9_\-:./]} LEADING_INVALID_CHARS_NO_DIGITS = %r{\A[^\p{L}:]++} - LEADING_INVALID_CHARS_WITH_DIGITS = %r{\A[^\p{L}0-9:./\-]++} + LEADING_INVALID_CHARS_WITH_DIGITS = %r{\A[^\p{L}0-9:./]++} MAX_BYTE_SIZE = 200 # Represents the max tag length VALID_ASCII_TAG = %r{\A[a-z:][a-z0-9:./-]*\z} diff --git a/spec/datadog/core/tag_normalizer_spec.rb b/spec/datadog/core/tag_normalizer_spec.rb index 55178e25793..d31538fab05 100644 --- a/spec/datadog/core/tag_normalizer_spec.rb +++ b/spec/datadog/core/tag_normalizer_spec.rb @@ -59,8 +59,47 @@ describe 'Follows the normalization logic from the Trace Agent for tag values' do test_cases = [ - {in: '1test', out: '1test'}, - {in: 'atest', out: 'atest'}, + # Reusing the same Trace Agent inputs, except a few of the outputs have changed + {in: '#test_starting_hash', out: 'test_starting_hash'}, + {in: 'TestCAPSandSuch', out: 'testcapsandsuch'}, + {in: 'Test Conversion Of Weird !@#$%^&**() Characters', out: 'test_conversion_of_weird_characters'}, + {in: '$#weird_starting', out: 'weird_starting'}, + {in: 'allowed:c0l0ns', out: 'allowed:c0l0ns'}, + {in: '1love', out: '1love'}, # differs when remove_digit_start_char is false + {in: 'ünicöde', out: 'ünicöde'}, + {in: 'ünicöde:metäl', out: 'ünicöde:metäl'}, + {in: 'Data🐨dog🐶 繋がっ⛰てて', out: 'data_dog_繋がっ_てて'}, + {in: ' spaces ', out: 'spaces'}, + {in: ' #hashtag!@#spaces #__<># ', out: 'hashtag_spaces'}, + {in: ':testing', out: ':testing'}, + {in: '_foo', out: 'foo'}, + {in: ':::test', out: ':::test'}, + {in: 'contiguous_____underscores', out: 'contiguous_underscores'}, + {in: 'foo_', out: 'foo'}, + {in: '', out: ''}, + {in: ' ', out: ''}, + {in: 'ok', out: 'ok'}, + {in: 'AlsO:ök', out: 'also:ök'}, + {in: ':still_ok', out: ':still_ok'}, + {in: '___trim', out: 'trim'}, + {in: '12.:trim@', out: '12.:trim'}, # differs when remove_digit_start_char is false + {in: '12.:trim@@', out: '12.:trim'}, # differs when remove_digit_start_char is false + {in: 'fun:ky__tag/1', out: 'fun:ky_tag/1'}, + {in: 'fun:ky@tag/2', out: 'fun:ky_tag/2'}, + {in: 'fun:ky@@@tag/3', out: 'fun:ky_tag/3'}, + {in: 'tag:1/2.3', out: 'tag:1/2.3'}, + {in: '---fun:k####y_ta@#g/1_@@#', out: 'fun:k_y_ta_g/1'}, + {in: 'AlsO:œ#@ö))œk', out: 'also:œ_ö_œk'}, + {in: "test\x99\x8faaa", out: 'test_aaa'}, + {in: "test\x99\x8f", out: 'test'}, + {in: 'a' * 888, out: 'a' * 200}, + {in: ' regulartag ', out: 'regulartag'}, + {in: "\u017Fodd_\u017Fcase\u017F", out: "\u017Fodd_\u017Fcase\u017F"}, + {in: '™Ö™Ö™™Ö™', out: 'ö_ö_ö'}, + {in: "a�", out: 'a'}, + {in: "a��", out: 'a'}, + {in: "a��b", out: 'a_b'}, + {in: 'a' + ('🐶' * 799) + 'b', out: 'a'}, ] test_cases.each do |test_case| From 4e3f8f462b1f3667ef8b5b5084ebb6a983b2c369 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 21 Nov 2025 18:43:51 -0500 Subject: [PATCH 62/78] Attempt to retrieve as many non empty string process tags as possible before setting process tags. --- lib/datadog/core/environment/process.rb | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index 588f2098ab5..1e2b9348a84 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -17,9 +17,15 @@ def self.serialized tags = [] begin - tags << "#{Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{TagNormalizer.normalize(entrypoint_workdir.to_s, remove_digit_start_char: false)}" - tags << "#{Environment::Ext::TAG_ENTRYPOINT_NAME}:#{TagNormalizer.normalize(entrypoint_name.to_s, remove_digit_start_char: false)}" - tags << "#{Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{TagNormalizer.normalize(entrypoint_basedir.to_s, remove_digit_start_char: false)}" + workdir = TagNormalizer.normalize(entrypoint_workdir.to_s, remove_digit_start_char: false) + tags << "#{Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{workdir}" unless workdir.empty? + + entry_name = TagNormalizer.normalize(entrypoint_name.to_s, remove_digit_start_char: false) + tags << "#{Environment::Ext::TAG_ENTRYPOINT_NAME}:#{entry_name}" unless entry_name.empty? + + basedir = TagNormalizer.normalize(entrypoint_basedir.to_s, remove_digit_start_char: false) + tags << "#{Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{basedir}" unless basedir.empty? + tags << "#{Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{TagNormalizer.normalize(entrypoint_type, remove_digit_start_char: false)}" rescue => e Datadog.logger.debug("failed to get process_tags: #{e.message}") From 5f6908c7494f5547279a8d8effb5ff69ab1ccd15 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Fri, 21 Nov 2025 19:08:34 -0500 Subject: [PATCH 63/78] Fix hard to spot missing comma in Rakefile that was breaking tests --- Rakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Rakefile b/Rakefile index a6e95bc8e87..2dab815f4e1 100644 --- a/Rakefile +++ b/Rakefile @@ -92,7 +92,7 @@ namespace :spec do RSpec::Core::RakeTask.new(:main) do |t, args| t.pattern = 'spec/**/*_spec.rb' t.exclude_pattern = 'spec/**/{appsec/integration,contrib,benchmark,redis,auto_instrument,opentelemetry,open_feature,profiling,crashtracking,error_tracking,rubocop,data_streams}/**/*_spec.rb,' \ - ' spec/**/{auto_instrument,opentelemetry,process_discovery,stable_config,ddsketch,open_feature,process}_spec.rb' \ + ' spec/**/{auto_instrument,opentelemetry,process_discovery,stable_config,ddsketch,open_feature,process}_spec.rb,' \ ' spec/datadog/gem_packaging_spec.rb' t.rspec_opts = args.to_a.join(' ') end From 29ea669b499a52435bba541c6837cd458268202e Mon Sep 17 00:00:00 2001 From: wantsui Date: Tue, 25 Nov 2025 17:23:39 -0500 Subject: [PATCH 64/78] Update lib/datadog/core/environment/process.rb Co-authored-by: Oleg Pudeyev <156273877+p-datadog@users.noreply.github.com> --- lib/datadog/core/environment/process.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index 1e2b9348a84..a4d34eddcdb 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -28,7 +28,7 @@ def self.serialized tags << "#{Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{TagNormalizer.normalize(entrypoint_type, remove_digit_start_char: false)}" rescue => e - Datadog.logger.debug("failed to get process_tags: #{e.message}") + Datadog.logger.debug("failed to get process_tags: #{e.class}: #{e}") end @serialized = tags.join(',').freeze end From 3b353885360ab52081b4b1d79588d50b52880a45 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 25 Nov 2025 22:13:27 -0500 Subject: [PATCH 65/78] Remove the usage of utils from the tag normalizer until 3.0 due to the requirement to change the default encoding behavior and adjust tag normalizer variables. --- lib/datadog/core/tag_normalizer.rb | 36 ++++++++++++------------- lib/datadog/core/utils.rb | 9 +++---- sig/datadog/core/utils.rbs | 2 +- spec/datadog/core/utils_spec.rb | 43 +++--------------------------- 4 files changed, 26 insertions(+), 64 deletions(-) diff --git a/lib/datadog/core/tag_normalizer.rb b/lib/datadog/core/tag_normalizer.rb index 9d68c0ae5c1..f2cab0a85f6 100644 --- a/lib/datadog/core/tag_normalizer.rb +++ b/lib/datadog/core/tag_normalizer.rb @@ -32,31 +32,31 @@ module TagNormalizer # @param remove_digit_start_char [Boolean] - whether to remove the leading digit (currently only used for tag values) # @return [String] The normalized string def self.normalize(original_value, remove_digit_start_char: false) - transformed_value = Utils.utf8_encode(original_value, replace_invalid: true) - transformed_value.strip! - return "" if transformed_value.empty? - - return transformed_value if transformed_value.bytesize <= MAX_BYTE_SIZE && - transformed_value.match?(VALID_ASCII_TAG) - - normalized_value = transformed_value - - if normalized_value.bytesize > MAX_BYTE_SIZE - normalized_value = normalized_value.byteslice(0, MAX_BYTE_SIZE) - normalized_value.scrub!("") + # DEV-3.0: Ideally this encode call should be replaced with Datadog::Core::Utils.utf8_encode once it + # is safe to modify the default behavior. + value = original_value.to_s.encode('UTF-8', invalid: :replace, undef: :replace) + value.strip! + return "" if value.empty? + + return value if value.bytesize <= MAX_BYTE_SIZE && + value.match?(VALID_ASCII_TAG) + + if value.bytesize > MAX_BYTE_SIZE + value = value.byteslice(0, MAX_BYTE_SIZE) + value.scrub!("") end - normalized_value.downcase! - normalized_value.gsub!(INVALID_TAG_CHARACTERS, '_') + value.downcase! + value.gsub!(INVALID_TAG_CHARACTERS, '_') # The Trace Agent allows tag values to start with a number so this logic is here too leading_invalid_regex = remove_digit_start_char ? LEADING_INVALID_CHARS_NO_DIGITS : LEADING_INVALID_CHARS_WITH_DIGITS - normalized_value.sub!(leading_invalid_regex, "") + value.sub!(leading_invalid_regex, "") - normalized_value.squeeze!('_') if normalized_value.include?('__') - normalized_value.delete_suffix!('_') + value.squeeze!('_') if value.include?('__') + value.delete_suffix!('_') - normalized_value + value end end end diff --git a/lib/datadog/core/utils.rb b/lib/datadog/core/utils.rb index 68fe33f13ea..b5e82637329 100644 --- a/lib/datadog/core/utils.rb +++ b/lib/datadog/core/utils.rb @@ -38,24 +38,21 @@ def self.truncate(value, size, omission = '...') # Ensure `str` is a valid UTF-8, ready to be # sent through the tracer transport. + # DEV-3.0: This method should unconditionally handle invalid byte sequences + # DEV-3.0: and return a safe string to display. # # @param [String,#to_s] str object to be converted to a UTF-8 string # @param [Boolean] binary whether to expect binary data in the `str` parameter # @param [String] placeholder string to be returned when encoding fails - # @param [Boolean] replace_invalid whether to replace invalid characters (Trace Agent tags expectation) # @return a UTF-8 string version of `str` # @!visibility private - def self.utf8_encode(str, binary: false, replace_invalid: false, placeholder: EMPTY_STRING) + def self.utf8_encode(str, binary: false, placeholder: EMPTY_STRING) str = str.to_s if binary # This option is useful for "gracefully" displaying binary data that # often contains text such as marshalled objects str.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') - elsif replace_invalid - # A non binary mode that replaces invalid characters - # Main use case is to be on par with the Trace Agent's encoding logic for tag normalization - str.encode('UTF-8', invalid: :replace, undef: :replace) elsif str.encoding == ::Encoding::UTF_8 str elsif str.empty? diff --git a/sig/datadog/core/utils.rbs b/sig/datadog/core/utils.rbs index 57c861a7cd0..dfaa94d8e0c 100644 --- a/sig/datadog/core/utils.rbs +++ b/sig/datadog/core/utils.rbs @@ -5,7 +5,7 @@ module Datadog EMPTY_STRING: untyped def self.truncate: (untyped value, untyped size, ?::String omission) -> untyped - def self.utf8_encode: (untyped str, ?binary: bool, ?replace_invalid: bool, ?placeholder: untyped) -> untyped + def self.utf8_encode: (untyped str, ?binary: bool, ?placeholder: untyped) -> untyped def self.encode_tags: (untyped hash) -> untyped def self.without_warnings: () { () -> untyped } -> untyped diff --git a/spec/datadog/core/utils_spec.rb b/spec/datadog/core/utils_spec.rb index 5dfa3544f0b..92ea26a9fbb 100644 --- a/spec/datadog/core/utils_spec.rb +++ b/spec/datadog/core/utils_spec.rb @@ -95,56 +95,21 @@ is_expected.to eq('valid part') end end - - context 'with replace_invalid: true' do - let(:options) { {replace_invalid: true} } - - it 'returns a valid UTF-8 string without raising an error' do - expect { subject }.not_to raise_error - expect(subject.encoding).to eq(Encoding::UTF_8) - end - end end context 'with valid and invalid characters in the string' do let(:str) { "test\x99\x8faaa".force_encoding(Encoding::ASCII_8BIT) } - context 'with replace_invalid: true' do - let(:options) { {replace_invalid: true} } - - it 'returns valid UTF-8 string' do - expect(subject).to include('aaa') - expect(subject.encoding).to eq(Encoding::UTF_8) - end - end - - context 'with replace_invalid: false' do - let(:options) { {} } - - it 'returns an empty string' do - is_expected.to eq(Datadog::Core::Utils::EMPTY_STRING) - end + it 'returns an empty string' do + is_expected.to eq(Datadog::Core::Utils::EMPTY_STRING) end end context 'with Unicode characters' do let(:str) { 'ünicöde' } - context 'with replace_invalid: true' do - let(:options) { {replace_invalid: true} } - - it 'returns a valid UTF-8 string' do - is_expected.to eq(str) - expect(subject.encoding).to eq(Encoding::UTF_8) - end - end - - context 'with replace_invalid: false' do - let(:options) { {} } - - it 'preserves the original string' do - is_expected.to eq(str) - end + it 'preserves the original string' do + is_expected.to eq(str) end end end From 856d0e69219098520f7d04d53e5c9f8348340309 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 25 Nov 2025 22:51:23 -0500 Subject: [PATCH 66/78] Add tests to show the values based on different 0 overrides. --- spec/datadog/core/environment/process_spec.rb | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index df3f55b7336..de77a694468 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -6,6 +6,21 @@ describe '::serialized' do subject(:serialized) { described_class.serialized } + def with_program_name(value) + original_0 = $0 + $0 = value + reset_serialized! + + yield + ensure + $0 = original_0 + reset_serialized! + end + + def reset_serialized! + described_class.remove_instance_variable(:@serialized) if described_class.instance_variable_defined?(:@serialized) + end + it { is_expected.to be_a_kind_of(String) } it 'returns the same object when called multiple times' do @@ -14,6 +29,42 @@ second_call = described_class.serialized expect(first_call).to equal(second_call) end + + it 'uses the basedir for /expectedbasedir/executable' do + with_program_name('/expectedbasedir/executable') do + expect(described_class.serialized).to include('entrypoint.workdir:app') + expect(described_class.serialized).to include('entrypoint.name:executable') + expect(described_class.serialized).to include('entrypoint.basedir:expectedbasedir') + expect(described_class.serialized).to include('entrypoint.type:script') + end + end + + it 'uses the basedir for irb' do + with_program_name('irb') do + expect(described_class.serialized).to include('entrypoint.workdir:app') + expect(described_class.serialized).to include('entrypoint.name:irb') + expect(described_class.serialized).to include('entrypoint.basedir:app') + expect(described_class.serialized).to include('entrypoint.type:script') + end + end + + it 'uses the basedir for irb' do + with_program_name('my/path/rubyapp.rb') do + expect(described_class.serialized).to include('entrypoint.workdir:app') + expect(described_class.serialized).to include('entrypoint.name:rubyapp.rb') + expect(described_class.serialized).to include('entrypoint.basedir:path') + expect(described_class.serialized).to include('entrypoint.type:script') + end + end + + it 'uses the basedir for irb' do + with_program_name('bin/rails s') do + expect(described_class.serialized).to include('entrypoint.workdir:app') + expect(described_class.serialized).to include('entrypoint.name:rails_s') + expect(described_class.serialized).to include('entrypoint.basedir:bin') + expect(described_class.serialized).to include('entrypoint.type:script') + end + end end describe 'Scenario: Real applications' do From 77e4e5163bfc0fc825249dee8def012f90803f17 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 25 Nov 2025 22:58:08 -0500 Subject: [PATCH 67/78] Fix test string. --- spec/datadog/core/environment/process_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index de77a694468..b32c220e05e 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -48,7 +48,7 @@ def reset_serialized! end end - it 'uses the basedir for irb' do + it 'uses the basedir for my/path/rubyapp.rb' do with_program_name('my/path/rubyapp.rb') do expect(described_class.serialized).to include('entrypoint.workdir:app') expect(described_class.serialized).to include('entrypoint.name:rubyapp.rb') @@ -57,7 +57,7 @@ def reset_serialized! end end - it 'uses the basedir for irb' do + it 'uses the basedir for bin/rails s' do with_program_name('bin/rails s') do expect(described_class.serialized).to include('entrypoint.workdir:app') expect(described_class.serialized).to include('entrypoint.name:rails_s') From 330219a10f6814b85fa1fa782f120aef0719a3af Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 2 Dec 2025 14:38:44 -0500 Subject: [PATCH 68/78] refactor out the rescue and process and update the test --- lib/datadog/core/environment/process.rb | 19 ++++++++----------- spec/datadog/core/environment/process_spec.rb | 15 +++++++++------ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index a4d34eddcdb..ed7c5fe6bfd 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -16,20 +16,17 @@ def self.serialized return @serialized if defined?(@serialized) tags = [] - begin - workdir = TagNormalizer.normalize(entrypoint_workdir.to_s, remove_digit_start_char: false) - tags << "#{Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{workdir}" unless workdir.empty? + workdir = TagNormalizer.normalize(entrypoint_workdir.to_s, remove_digit_start_char: false) + tags << "#{Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{workdir}" unless workdir.empty? - entry_name = TagNormalizer.normalize(entrypoint_name.to_s, remove_digit_start_char: false) - tags << "#{Environment::Ext::TAG_ENTRYPOINT_NAME}:#{entry_name}" unless entry_name.empty? + entry_name = TagNormalizer.normalize(entrypoint_name.to_s, remove_digit_start_char: false) + tags << "#{Environment::Ext::TAG_ENTRYPOINT_NAME}:#{entry_name}" unless entry_name.empty? - basedir = TagNormalizer.normalize(entrypoint_basedir.to_s, remove_digit_start_char: false) - tags << "#{Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{basedir}" unless basedir.empty? + basedir = TagNormalizer.normalize(entrypoint_basedir.to_s, remove_digit_start_char: false) + tags << "#{Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{basedir}" unless basedir.empty? + + tags << "#{Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{TagNormalizer.normalize(entrypoint_type, remove_digit_start_char: false)}" - tags << "#{Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{TagNormalizer.normalize(entrypoint_type, remove_digit_start_char: false)}" - rescue => e - Datadog.logger.debug("failed to get process_tags: #{e.class}: #{e}") - end @serialized = tags.join(',').freeze end diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index b32c220e05e..a623d635426 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -6,14 +6,17 @@ describe '::serialized' do subject(:serialized) { described_class.serialized } - def with_program_name(value) + def with_process_env(program_name:, pwd: nil) original_0 = $0 - $0 = value + original_pwd = Dir.pwd + $0 = program_name + allow(Dir).to receive(:pwd).and_return(pwd) if pwd reset_serialized! yield ensure $0 = original_0 + allow(Dir).to receive(:pwd).and_return(original_pwd) if pwd reset_serialized! end @@ -31,7 +34,7 @@ def reset_serialized! end it 'uses the basedir for /expectedbasedir/executable' do - with_program_name('/expectedbasedir/executable') do + with_process_env(program_name: '/expectedbasedir/executable', pwd: '/app') do expect(described_class.serialized).to include('entrypoint.workdir:app') expect(described_class.serialized).to include('entrypoint.name:executable') expect(described_class.serialized).to include('entrypoint.basedir:expectedbasedir') @@ -40,7 +43,7 @@ def reset_serialized! end it 'uses the basedir for irb' do - with_program_name('irb') do + with_process_env(program_name: 'irb', pwd: '/app') do expect(described_class.serialized).to include('entrypoint.workdir:app') expect(described_class.serialized).to include('entrypoint.name:irb') expect(described_class.serialized).to include('entrypoint.basedir:app') @@ -49,7 +52,7 @@ def reset_serialized! end it 'uses the basedir for my/path/rubyapp.rb' do - with_program_name('my/path/rubyapp.rb') do + with_process_env(program_name: 'my/path/rubyapp.rb', pwd: '/app') do expect(described_class.serialized).to include('entrypoint.workdir:app') expect(described_class.serialized).to include('entrypoint.name:rubyapp.rb') expect(described_class.serialized).to include('entrypoint.basedir:path') @@ -58,7 +61,7 @@ def reset_serialized! end it 'uses the basedir for bin/rails s' do - with_program_name('bin/rails s') do + with_process_env(program_name: 'bin/rails s', pwd: '/app') do expect(described_class.serialized).to include('entrypoint.workdir:app') expect(described_class.serialized).to include('entrypoint.name:rails_s') expect(described_class.serialized).to include('entrypoint.basedir:bin') From 19e3bc201e957df54b63a19729f6bf7f1d70bc4a Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 2 Dec 2025 15:12:18 -0500 Subject: [PATCH 69/78] Fix mocking. --- spec/datadog/core/environment/process_spec.rb | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index a623d635426..780453216ad 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -11,12 +11,15 @@ def with_process_env(program_name:, pwd: nil) original_pwd = Dir.pwd $0 = program_name allow(Dir).to receive(:pwd).and_return(pwd) if pwd + allow(File).to receive(:expand_path).and_call_original + allow(File).to receive(:expand_path).with('.').and_return('/app') reset_serialized! yield ensure $0 = original_0 allow(Dir).to receive(:pwd).and_return(original_pwd) if pwd + RSpec::Mocks.space.proxy_for(File).reset reset_serialized! end @@ -61,9 +64,9 @@ def reset_serialized! end it 'uses the basedir for bin/rails s' do - with_process_env(program_name: 'bin/rails s', pwd: '/app') do + with_process_env(program_name: 'bin/rails', pwd: '/app') do expect(described_class.serialized).to include('entrypoint.workdir:app') - expect(described_class.serialized).to include('entrypoint.name:rails_s') + expect(described_class.serialized).to include('entrypoint.name:rails') expect(described_class.serialized).to include('entrypoint.basedir:bin') expect(described_class.serialized).to include('entrypoint.type:script') end From d24d98b3a14fc5e21f8a5b21a5d4db5f2aeb3cd1 Mon Sep 17 00:00:00 2001 From: wantsui Date: Tue, 2 Dec 2025 15:16:05 -0500 Subject: [PATCH 70/78] Update Matrixfile Co-authored-by: Marco Costa --- Matrixfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Matrixfile b/Matrixfile index 82ee07ac127..9c74991984c 100644 --- a/Matrixfile +++ b/Matrixfile @@ -15,7 +15,10 @@ '' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ 3.5 / ❌ jruby', }, 'core_with_rails' => { + # Run with Rails integration 'rails8' => '❌ 2.5 / ❌ 2.6 / ❌ 2.7 / ❌ 3.0 / ❌ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ 3.5 / ❌ jruby', + # Run for all other combinations + '' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ❌ 3.2 / ❌ 3.3 / ❌ 3.4 / ❌ 3.5 / ✅ jruby', }, 'error_tracking' => { '' => '❌ 2.5 / ❌ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ 3.5 / ❌ jruby', From 5c12b5206e06aabdca7de7963c6630c17cafbb98 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 2 Dec 2025 15:29:08 -0500 Subject: [PATCH 71/78] Do not run tests for all ruby versions. --- spec/datadog/core/environment/process_spec.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index 780453216ad..29aa680363d 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -77,7 +77,11 @@ def reset_serialized! skip_unless_integration_testing_enabled context 'when running a real Rails application' do - it 'detects Rails process information correctly' do + before do + skip 'Rails 8 is only supported on CRuby' unless PlatformHelpers.mri? + end + + it 'detects Rails process information correctly', ruby: '>= 3.2' do project_root_directory = Dir.pwd Dir.mktmpdir do |tmp_dir| From 1d61402c86cb66618fb682f8f7b11f14812227ad Mon Sep 17 00:00:00 2001 From: wantsui Date: Tue, 2 Dec 2025 16:50:11 -0500 Subject: [PATCH 72/78] Update lib/datadog/core/environment/process.rb Co-authored-by: Oleg Pudeyev <156273877+p-datadog@users.noreply.github.com> --- lib/datadog/core/environment/process.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index ed7c5fe6bfd..925c5cf6242 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -48,6 +48,12 @@ def self.entrypoint_type # Example 1: /bin/mybin -> mybin # Example 2: ruby /test/myapp.rb -> myapp # @return [String] the last segment of base directory of the script + # + # @note Determining true entrypoint name is rather complicated. This method + # is the initial implementation but it does not produce optimal output in all cases. + # For example, all Rails applications launched via `rails server` get `rails` + # as their entrypoint name. + # We might improve the behavior in the future if there is customer demand for it. def self.entrypoint_name File.basename($0) end From cba300e7bae22a7dd3141908eb7403bee5db47a3 Mon Sep 17 00:00:00 2001 From: wantsui Date: Tue, 2 Dec 2025 16:50:24 -0500 Subject: [PATCH 73/78] Update lib/datadog/core/environment/process.rb Co-authored-by: Oleg Pudeyev <156273877+p-datadog@users.noreply.github.com> --- lib/datadog/core/environment/process.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index 925c5cf6242..a9904d7e9a5 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -62,6 +62,12 @@ def self.entrypoint_name # Example 1: /bin/mybin -> bin # Example 2: ruby /test/myapp.js -> test # @return [String] the last segment of the base directory of the script + # + # @note As with entrypoint name, determining true entrypoint directory is complicated. + # This method has an initial implementation that does not necessarily return good + # results in all cases. For example, for Rails applications launched via `rails server` + # the entrypoint basedir is `bin` which is not very helpful. + # We might improve this in the future if there is customer demand. def self.entrypoint_basedir File.basename(File.expand_path(File.dirname($0))) end From 5cae372231a95a00c3e25195f37f0f39b3ba60c7 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Tue, 2 Dec 2025 18:05:31 -0500 Subject: [PATCH 74/78] Add a foo bar test condition with colons and commas, which revealed a need to add the additional process values normalization logic for service names since they can only be 100 characters and cannot have colons. --- lib/datadog/core/environment/process.rb | 8 +- lib/datadog/core/tag_normalizer.rb | 26 ++++++- sig/datadog/core/tag_normalizer.rbs | 3 + spec/datadog/core/environment/process_spec.rb | 75 ++++++++++++------- spec/datadog/core/tag_normalizer_spec.rb | 52 +++++++++++++ 5 files changed, 133 insertions(+), 31 deletions(-) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index a9904d7e9a5..5ac07494534 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -16,13 +16,13 @@ def self.serialized return @serialized if defined?(@serialized) tags = [] - workdir = TagNormalizer.normalize(entrypoint_workdir.to_s, remove_digit_start_char: false) + workdir = TagNormalizer.process_values_normalize(entrypoint_workdir.to_s) tags << "#{Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{workdir}" unless workdir.empty? - entry_name = TagNormalizer.normalize(entrypoint_name.to_s, remove_digit_start_char: false) + entry_name = TagNormalizer.process_values_normalize(entrypoint_name.to_s) tags << "#{Environment::Ext::TAG_ENTRYPOINT_NAME}:#{entry_name}" unless entry_name.empty? - basedir = TagNormalizer.normalize(entrypoint_basedir.to_s, remove_digit_start_char: false) + basedir = TagNormalizer.process_values_normalize(entrypoint_basedir.to_s) tags << "#{Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{basedir}" unless basedir.empty? tags << "#{Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{TagNormalizer.normalize(entrypoint_type, remove_digit_start_char: false)}" @@ -62,7 +62,7 @@ def self.entrypoint_name # Example 1: /bin/mybin -> bin # Example 2: ruby /test/myapp.js -> test # @return [String] the last segment of the base directory of the script - # + # # @note As with entrypoint name, determining true entrypoint directory is complicated. # This method has an initial implementation that does not necessarily return good # results in all cases. For example, for Rails applications launched via `rails server` diff --git a/lib/datadog/core/tag_normalizer.rb b/lib/datadog/core/tag_normalizer.rb index f2cab0a85f6..bad0b834830 100644 --- a/lib/datadog/core/tag_normalizer.rb +++ b/lib/datadog/core/tag_normalizer.rb @@ -15,11 +15,12 @@ module TagNormalizer INVALID_TAG_CHARACTERS = %r{[^\p{L}0-9_\-:./]} LEADING_INVALID_CHARS_NO_DIGITS = %r{\A[^\p{L}:]++} LEADING_INVALID_CHARS_WITH_DIGITS = %r{\A[^\p{L}0-9:./]++} - MAX_BYTE_SIZE = 200 # Represents the max tag length + MAX_BYTE_SIZE = 200 # Represents the general max tag length + MAX_PROCESS_VALUES_BYTE_SIZE = 100 # Represents the max tag length for process tags VALID_ASCII_TAG = %r{\A[a-z:][a-z0-9:./-]*\z} # Based on https://github.com/DataDog/datadog-agent/blob/45799c842bbd216bcda208737f9f11cade6fdd95/pkg/trace/traceutil/normalize.go#L131 - # Specifically: + # Specifically for general normalization: # - Must be valid UTF-8 # - Invalid characters are replaced with an underscore # - Leading non-letter characters are removed but colons are kept @@ -58,6 +59,27 @@ def self.normalize(original_value, remove_digit_start_char: false) value end + + # Process tags values follow an additional piece of normalization: + # - must not be more than 100 bytes + # - and must not contain colons + # @param value [String] The original string + # @return [String] The normalized string + def self.process_values_normalize(value) + value = normalize(value) + return value if value.empty? + + value.tr!(':', '_') + + value.squeeze!('_') if value.include?('__') + + if value.bytesize > MAX_PROCESS_VALUES_BYTE_SIZE + value = value.byteslice(0, MAX_PROCESS_VALUES_BYTE_SIZE) || value + value.scrub!("") + end + + value + end end end end diff --git a/sig/datadog/core/tag_normalizer.rbs b/sig/datadog/core/tag_normalizer.rbs index d13d09ec621..8863c9f398e 100644 --- a/sig/datadog/core/tag_normalizer.rbs +++ b/sig/datadog/core/tag_normalizer.rbs @@ -5,9 +5,12 @@ module Datadog LEADING_INVALID_CHARS_NO_DIGITS: ::Regexp LEADING_INVALID_CHARS_WITH_DIGITS: ::Regexp MAX_BYTE_SIZE: ::Integer + MAX_PROCESS_VALUES_BYTE_SIZE: ::Integer VALID_ASCII_TAG: ::Regexp def self.normalize: (untyped original_value, ?remove_digit_start_char: bool) -> ::String + + def self.process_values_normalize: (untyped value) -> ::String end end end diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index 29aa680363d..7eaed956ac4 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -6,27 +6,28 @@ describe '::serialized' do subject(:serialized) { described_class.serialized } - def with_process_env(program_name:, pwd: nil) - original_0 = $0 - original_pwd = Dir.pwd - $0 = program_name - allow(Dir).to receive(:pwd).and_return(pwd) if pwd - allow(File).to receive(:expand_path).and_call_original - allow(File).to receive(:expand_path).with('.').and_return('/app') - reset_serialized! - - yield - ensure - $0 = original_0 - allow(Dir).to receive(:pwd).and_return(original_pwd) if pwd - RSpec::Mocks.space.proxy_for(File).reset - reset_serialized! - end - def reset_serialized! described_class.remove_instance_variable(:@serialized) if described_class.instance_variable_defined?(:@serialized) end + shared_context 'with mocked process environment' do + let(:pwd) { '/app' } + + before do + @original_0 = $0 + $0 = program_name + allow(Dir).to receive(:pwd).and_return(pwd) + allow(File).to receive(:expand_path).and_call_original + allow(File).to receive(:expand_path).with('.').and_return('/app') + reset_serialized! + end + + after do + $0 = @original_0 + reset_serialized! + end + end + it { is_expected.to be_a_kind_of(String) } it 'returns the same object when called multiple times' do @@ -36,8 +37,11 @@ def reset_serialized! expect(first_call).to equal(second_call) end - it 'uses the basedir for /expectedbasedir/executable' do - with_process_env(program_name: '/expectedbasedir/executable', pwd: '/app') do + context 'with /expectedbasedir/executable' do + include_context 'with mocked process environment' + let(:program_name) { '/expectedbasedir/executable' } + + it 'uses the basedir correctly' do expect(described_class.serialized).to include('entrypoint.workdir:app') expect(described_class.serialized).to include('entrypoint.name:executable') expect(described_class.serialized).to include('entrypoint.basedir:expectedbasedir') @@ -45,8 +49,11 @@ def reset_serialized! end end - it 'uses the basedir for irb' do - with_process_env(program_name: 'irb', pwd: '/app') do + context 'with irb' do + include_context 'with mocked process environment' + let(:program_name) { 'irb' } + + it 'uses the basedir correctly' do expect(described_class.serialized).to include('entrypoint.workdir:app') expect(described_class.serialized).to include('entrypoint.name:irb') expect(described_class.serialized).to include('entrypoint.basedir:app') @@ -54,8 +61,11 @@ def reset_serialized! end end - it 'uses the basedir for my/path/rubyapp.rb' do - with_process_env(program_name: 'my/path/rubyapp.rb', pwd: '/app') do + context 'with my/path/rubyapp.rb' do + include_context 'with mocked process environment' + let(:program_name) { 'my/path/rubyapp.rb' } + + it 'extracts out serialized tags correctly' do expect(described_class.serialized).to include('entrypoint.workdir:app') expect(described_class.serialized).to include('entrypoint.name:rubyapp.rb') expect(described_class.serialized).to include('entrypoint.basedir:path') @@ -63,8 +73,23 @@ def reset_serialized! end end - it 'uses the basedir for bin/rails s' do - with_process_env(program_name: 'bin/rails', pwd: '/app') do + context 'with my/path/foo:,bar' do + include_context 'with mocked process environment' + let(:program_name) { 'my/path/foo:,bar' } + + it 'extracts out serialized tags correctly' do + expect(described_class.serialized).to include('entrypoint.workdir:app') + expect(described_class.serialized).to include('entrypoint.name:foo_bar') + expect(described_class.serialized).to include('entrypoint.basedir:path') + expect(described_class.serialized).to include('entrypoint.type:script') + end + end + + context 'with bin/rails' do + include_context 'with mocked process environment' + let(:program_name) { 'bin/rails' } + + it 'extracts out serialized tags correctly' do expect(described_class.serialized).to include('entrypoint.workdir:app') expect(described_class.serialized).to include('entrypoint.name:rails') expect(described_class.serialized).to include('entrypoint.basedir:bin') diff --git a/spec/datadog/core/tag_normalizer_spec.rb b/spec/datadog/core/tag_normalizer_spec.rb index d31538fab05..4fe5394d5dd 100644 --- a/spec/datadog/core/tag_normalizer_spec.rb +++ b/spec/datadog/core/tag_normalizer_spec.rb @@ -109,4 +109,56 @@ end end end + + describe 'Follows the additional normalization for process tag values' do + test_cases = [ + # Reusing the same Trace Agent inputs, but now for process values which cannot have colons and be over 100 characters + {in: '#test_starting_hash', out: 'test_starting_hash'}, + {in: 'TestCAPSandSuch', out: 'testcapsandsuch'}, + {in: 'Test Conversion Of Weird !@#$%^&**() Characters', out: 'test_conversion_of_weird_characters'}, + {in: '$#weird_starting', out: 'weird_starting'}, + {in: 'allowed:c0l0ns', out: 'allowed_c0l0ns'}, # colon not allowed for process values + {in: '1love', out: '1love'}, # differs when remove_digit_start_char is false + {in: 'ünicöde', out: 'ünicöde'}, + {in: 'ünicöde:metäl', out: 'ünicöde_metäl'}, # colon not allowed for process values + {in: 'Data🐨dog🐶 繋がっ⛰てて', out: 'data_dog_繋がっ_てて'}, + {in: ' spaces ', out: 'spaces'}, + {in: ' #hashtag!@#spaces #__<># ', out: 'hashtag_spaces'}, + {in: ':testing', out: '_testing'}, # colon not allowed for process values + {in: '_foo', out: 'foo'}, + {in: ':::test', out: '_test'}, # colon not allowed for process values + {in: 'contiguous_____underscores', out: 'contiguous_underscores'}, + {in: 'foo_', out: 'foo'}, + {in: '', out: ''}, + {in: ' ', out: ''}, + {in: 'ok', out: 'ok'}, + {in: 'AlsO:ök', out: 'also_ök'}, # colon not allowed for process values + {in: ':still_ok', out: '_still_ok'}, # colon not allowed for process values + {in: '___trim', out: 'trim'}, + {in: '12.:trim@', out: '12._trim'}, # colon not allowed for process values + {in: '12.:trim@@', out: '12._trim'}, # colon not allowed for process values + {in: 'fun:ky__tag/1', out: 'fun_ky_tag/1'}, # colon not allowed for process values + {in: 'fun:ky@tag/2', out: 'fun_ky_tag/2'}, # colon not allowed for process values + {in: 'fun:ky@@@tag/3', out: 'fun_ky_tag/3'}, # colon not allowed for process values + {in: 'tag:1/2.3', out: 'tag_1/2.3'}, # colon not allowed for process values + {in: '---fun:k####y_ta@#g/1_@@#', out: 'fun_k_y_ta_g/1'}, # colon not allowed for process values + {in: 'AlsO:œ#@ö))œk', out: 'also_œ_ö_œk'}, # colon not allowed for process values + {in: "test\x99\x8faaa", out: 'test_aaa'}, + {in: "test\x99\x8f", out: 'test'}, + {in: 'a' * 888, out: 'a' * 100}, # 100 characters max for process values + {in: ' regulartag ', out: 'regulartag'}, + {in: "\u017Fodd_\u017Fcase\u017F", out: "\u017Fodd_\u017Fcase\u017F"}, + {in: '™Ö™Ö™™Ö™', out: 'ö_ö_ö'}, + {in: "a�", out: 'a'}, + {in: "a��", out: 'a'}, + {in: "a��b", out: 'a_b'}, + {in: 'a' + ('🐶' * 799) + 'b', out: 'a'}, + ] + + test_cases.each do |test_case| + it "normalizes #{test_case[:in].inspect} to #{test_case[:out].inspect}" do + expect(described_class.process_values_normalize(test_case[:in])).to eq(test_case[:out]) + end + end + end end From e0bfe8c189b2f03a0d7d585e3a6dff251b677e6f Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 3 Dec 2025 08:32:27 -0500 Subject: [PATCH 75/78] use around syntax --- spec/datadog/core/environment/process_spec.rb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/spec/datadog/core/environment/process_spec.rb b/spec/datadog/core/environment/process_spec.rb index 7eaed956ac4..02489d42af1 100644 --- a/spec/datadog/core/environment/process_spec.rb +++ b/spec/datadog/core/environment/process_spec.rb @@ -13,9 +13,14 @@ def reset_serialized! shared_context 'with mocked process environment' do let(:pwd) { '/app' } - before do + around do |example| @original_0 = $0 $0 = program_name + example.run + $0 = @original_0 + end + + before do allow(Dir).to receive(:pwd).and_return(pwd) allow(File).to receive(:expand_path).and_call_original allow(File).to receive(:expand_path).with('.').and_return('/app') @@ -23,7 +28,6 @@ def reset_serialized! end after do - $0 = @original_0 reset_serialized! end end From 6c872a03507fb2cc0dd2a7605390abc2b54db455 Mon Sep 17 00:00:00 2001 From: Marco Costa Date: Wed, 3 Dec 2025 10:59:24 -0800 Subject: [PATCH 76/78] Update lib/datadog/core/tag_normalizer.rb Co-authored-by: Oleg Pudeyev <156273877+p-datadog@users.noreply.github.com> --- lib/datadog/core/tag_normalizer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/datadog/core/tag_normalizer.rb b/lib/datadog/core/tag_normalizer.rb index bad0b834830..39b9a0f3422 100644 --- a/lib/datadog/core/tag_normalizer.rb +++ b/lib/datadog/core/tag_normalizer.rb @@ -16,7 +16,7 @@ module TagNormalizer LEADING_INVALID_CHARS_NO_DIGITS = %r{\A[^\p{L}:]++} LEADING_INVALID_CHARS_WITH_DIGITS = %r{\A[^\p{L}0-9:./]++} MAX_BYTE_SIZE = 200 # Represents the general max tag length - MAX_PROCESS_VALUES_BYTE_SIZE = 100 # Represents the max tag length for process tags + MAX_PROCESS_VALUE_BYTE_SIZE = 100 # Represents the max tag length for process tags VALID_ASCII_TAG = %r{\A[a-z:][a-z0-9:./-]*\z} # Based on https://github.com/DataDog/datadog-agent/blob/45799c842bbd216bcda208737f9f11cade6fdd95/pkg/trace/traceutil/normalize.go#L131 From 01ac7b0cc3419d881cb1b1abcedf784613e3531c Mon Sep 17 00:00:00 2001 From: Marco Costa Date: Wed, 3 Dec 2025 11:00:36 -0800 Subject: [PATCH 77/78] Update lib/datadog/core/tag_normalizer.rb Co-authored-by: Sergey Fedorov --- lib/datadog/core/tag_normalizer.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/datadog/core/tag_normalizer.rb b/lib/datadog/core/tag_normalizer.rb index 39b9a0f3422..0c3d37d0fc0 100644 --- a/lib/datadog/core/tag_normalizer.rb +++ b/lib/datadog/core/tag_normalizer.rb @@ -70,7 +70,6 @@ def self.process_values_normalize(value) return value if value.empty? value.tr!(':', '_') - value.squeeze!('_') if value.include?('__') if value.bytesize > MAX_PROCESS_VALUES_BYTE_SIZE From 50d3392cef4ba21cf9da3f591da74b3f73c9a179 Mon Sep 17 00:00:00 2001 From: wan <26727996+wantsui@users.noreply.github.com> Date: Wed, 3 Dec 2025 21:55:18 -0500 Subject: [PATCH 78/78] Rename process_values_normalize to normalize_process_value and adjust tests for the singular version of MAX_PROCESS_VALUE_BYTE_SIZE --- lib/datadog/core/environment/process.rb | 6 +++--- lib/datadog/core/tag_normalizer.rb | 6 +++--- sig/datadog/core/tag_normalizer.rbs | 4 ++-- spec/datadog/core/tag_normalizer_spec.rb | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/datadog/core/environment/process.rb b/lib/datadog/core/environment/process.rb index 5ac07494534..a5a9664bbf3 100644 --- a/lib/datadog/core/environment/process.rb +++ b/lib/datadog/core/environment/process.rb @@ -16,13 +16,13 @@ def self.serialized return @serialized if defined?(@serialized) tags = [] - workdir = TagNormalizer.process_values_normalize(entrypoint_workdir.to_s) + workdir = TagNormalizer.normalize_process_value(entrypoint_workdir.to_s) tags << "#{Environment::Ext::TAG_ENTRYPOINT_WORKDIR}:#{workdir}" unless workdir.empty? - entry_name = TagNormalizer.process_values_normalize(entrypoint_name.to_s) + entry_name = TagNormalizer.normalize_process_value(entrypoint_name.to_s) tags << "#{Environment::Ext::TAG_ENTRYPOINT_NAME}:#{entry_name}" unless entry_name.empty? - basedir = TagNormalizer.process_values_normalize(entrypoint_basedir.to_s) + basedir = TagNormalizer.normalize_process_value(entrypoint_basedir.to_s) tags << "#{Environment::Ext::TAG_ENTRYPOINT_BASEDIR}:#{basedir}" unless basedir.empty? tags << "#{Environment::Ext::TAG_ENTRYPOINT_TYPE}:#{TagNormalizer.normalize(entrypoint_type, remove_digit_start_char: false)}" diff --git a/lib/datadog/core/tag_normalizer.rb b/lib/datadog/core/tag_normalizer.rb index 0c3d37d0fc0..30f407f8dae 100644 --- a/lib/datadog/core/tag_normalizer.rb +++ b/lib/datadog/core/tag_normalizer.rb @@ -65,15 +65,15 @@ def self.normalize(original_value, remove_digit_start_char: false) # - and must not contain colons # @param value [String] The original string # @return [String] The normalized string - def self.process_values_normalize(value) + def self.normalize_process_value(value) value = normalize(value) return value if value.empty? value.tr!(':', '_') value.squeeze!('_') if value.include?('__') - if value.bytesize > MAX_PROCESS_VALUES_BYTE_SIZE - value = value.byteslice(0, MAX_PROCESS_VALUES_BYTE_SIZE) || value + if value.bytesize > MAX_PROCESS_VALUE_BYTE_SIZE + value = value.byteslice(0, MAX_PROCESS_VALUE_BYTE_SIZE) || value value.scrub!("") end diff --git a/sig/datadog/core/tag_normalizer.rbs b/sig/datadog/core/tag_normalizer.rbs index 8863c9f398e..c6a92dccecf 100644 --- a/sig/datadog/core/tag_normalizer.rbs +++ b/sig/datadog/core/tag_normalizer.rbs @@ -5,12 +5,12 @@ module Datadog LEADING_INVALID_CHARS_NO_DIGITS: ::Regexp LEADING_INVALID_CHARS_WITH_DIGITS: ::Regexp MAX_BYTE_SIZE: ::Integer - MAX_PROCESS_VALUES_BYTE_SIZE: ::Integer + MAX_PROCESS_VALUE_BYTE_SIZE: ::Integer VALID_ASCII_TAG: ::Regexp def self.normalize: (untyped original_value, ?remove_digit_start_char: bool) -> ::String - def self.process_values_normalize: (untyped value) -> ::String + def self.normalize_process_value: (untyped value) -> ::String end end end diff --git a/spec/datadog/core/tag_normalizer_spec.rb b/spec/datadog/core/tag_normalizer_spec.rb index 4fe5394d5dd..efd050fb0ef 100644 --- a/spec/datadog/core/tag_normalizer_spec.rb +++ b/spec/datadog/core/tag_normalizer_spec.rb @@ -157,7 +157,7 @@ test_cases.each do |test_case| it "normalizes #{test_case[:in].inspect} to #{test_case[:out].inspect}" do - expect(described_class.process_values_normalize(test_case[:in])).to eq(test_case[:out]) + expect(described_class.normalize_process_value(test_case[:in])).to eq(test_case[:out]) end end end