From 68d2f872e5f265d39e5fecd54076e0e79631ab37 Mon Sep 17 00:00:00 2001 From: Jeff Muizelaar Date: Wed, 16 Jul 2025 13:42:59 -0400 Subject: [PATCH 1/2] Add stacks to unknown event markers --- etw-reader/examples/dump.rs | 23 ++++++++++++++++ samply/src/windows/etw_gecko.rs | 37 ++++++++++++++++++++++++- samply/src/windows/profile_context.rs | 39 +++++++++++++++++++++++---- 3 files changed, 93 insertions(+), 6 deletions(-) diff --git a/etw-reader/examples/dump.rs b/etw-reader/examples/dump.rs index cc843d56..85e57028 100644 --- a/etw-reader/examples/dump.rs +++ b/etw-reader/examples/dump.rs @@ -186,6 +186,29 @@ fn main() { metadata_start += metadata_size as usize; } } + Etw::EVENT_HEADER_EXT_TYPE_STACK_TRACE64 => { + // see ProcessExtendedData in TraceLog.cs from perfview + println!("extended: STACK_TRACE64"); + let data: &[u8] = unsafe { + std::slice::from_raw_parts( + i.DataPtr as *const u8, + i.DataSize as usize, + ) + }; + let match_id = u64::from_ne_bytes( + <[u8; 8]>::try_from(&data[0..8]).unwrap(), + ); + println!(" match_id: {}", match_id); + let mut address_start = 8; + while address_start < data.len() { + let address = u64::from_ne_bytes( + <[u8; 8]>::try_from(&data[address_start..address_start + 8]) + .unwrap(), + ); + println!(" address: {:x}", address); + address_start += 8; + } + } _ => { println!("extended: {:?}", i); } diff --git a/samply/src/windows/etw_gecko.rs b/samply/src/windows/etw_gecko.rs index be5c3271..a0e8af8d 100644 --- a/samply/src/windows/etw_gecko.rs +++ b/samply/src/windows/etw_gecko.rs @@ -5,6 +5,7 @@ use std::time::Instant; use debugid::DebugId; use fxprof_processed_profile::debugid; use uuid::Uuid; +use windows::Win32::System::Diagnostics::Etw; use super::coreclr::CoreClrContext; use super::etw_reader::parser::{Address, Parser, TryParse}; @@ -531,9 +532,43 @@ fn process_trace( return; } + let task_and_op = s.name().split_once('/').unwrap().1; let text = event_properties_to_string(&s, &mut parser, None); - context.handle_unknown_event(timestamp_raw, tid, task_and_op, text); + let marker = context.handle_unknown_event(timestamp_raw, tid, task_and_op, text); + + if e.ExtendedDataCount > 0 && marker.is_some() { + let items = unsafe { + std::slice::from_raw_parts(e.ExtendedData, e.ExtendedDataCount as usize) + }; + for i in items { + match i.ExtType as u32 { + Etw::EVENT_HEADER_EXT_TYPE_STACK_TRACE64 => { + // see ProcessExtendedData in TraceLog.cs from perfview + let data: &[u8] = unsafe { + std::slice::from_raw_parts( + i.DataPtr as *const u8, + i.DataSize as usize, + ) + }; + let _match_id = u64::from_ne_bytes( + <[u8; 8]>::try_from(&data[0..8]).unwrap(), + ); + let stack_address_iter = data[8..] + .chunks_exact(8) + .map(|a| u64::from_ne_bytes(a.try_into().unwrap())); + if let Some(marker) = marker { + let tid = e.EventHeader.ThreadId; + let pid = e.EventHeader.ProcessId; + + context.handle_marker_stack(timestamp_raw, pid, tid, stack_address_iter, marker); + + } + } + _ => { } + } + } + } } } }) diff --git a/samply/src/windows/profile_context.rs b/samply/src/windows/profile_context.rs index f72d30b2..94af46cd 100644 --- a/samply/src/windows/profile_context.rs +++ b/samply/src/windows/profile_context.rs @@ -1104,6 +1104,35 @@ impl ProfileContext { ); } + pub fn handle_marker_stack( + &mut self, + timestamp_raw: u64, + pid: u32, + tid: u32, + stack_address_iter: impl Iterator, + marker_handle: MarkerHandle, + ) { + let Some(process) = self.processes.get_by_pid_and_timestamp(pid, timestamp_raw) else { + return; + }; + + let Some(thread) = self.threads.get_by_tid(tid) else { + return; + }; + + let stack: Vec = to_stack_frames(stack_address_iter, self.address_classifier); + let stack_index = self.unresolved_stacks.convert(stack.into_iter().rev()); + let thread_handle = thread.handle; + let timestamp = self.timestamp_converter.convert_time(timestamp_raw); + process.unresolved_samples.attach_stack_to_marker( + thread_handle, + timestamp, + timestamp_raw, + stack_index, + marker_handle, + ); + } + pub fn handle_stack_arm64( &mut self, timestamp_raw: u64, @@ -1981,24 +2010,24 @@ impl ProfileContext { tid: u32, task_and_op: &str, stringified_properties: String, - ) { + ) -> Option { if !self.profile_creation_props.unknown_event_markers { - return; + return None; } let Some(thread_handle) = self.thread_handle_at_time(tid, timestamp_raw) else { - return; + return None; }; let timestamp = self.timestamp_converter.convert_time(timestamp_raw); let timing = MarkerTiming::Instant(timestamp); let marker_name = self.profile.handle_for_string(task_and_op); let description = self.profile.handle_for_string(&stringified_properties); - self.profile.add_marker( + Some(self.profile.add_marker( thread_handle, timing, FreeformMarker(marker_name, description), - ); + )) //println!("unhandled {}", s.name()) } From c9532c1ea4e343a20ec86b0526e75c9426c28bfa Mon Sep 17 00:00:00 2001 From: Jeff Muizelaar Date: Wed, 16 Jul 2025 16:26:27 -0400 Subject: [PATCH 2/2] Track GPU allocations --- samply/src/shared/process_sample_data.rs | 8 +++ samply/src/shared/unresolved_samples.rs | 27 +++++++++ samply/src/windows/etw_gecko.rs | 58 +++++++++++++++++++ .../src/windows/etw_reader/custom_schemas.rs | 2 +- samply/src/windows/profile_context.rs | 30 ++++++++++ 5 files changed, 124 insertions(+), 1 deletion(-) diff --git a/samply/src/shared/process_sample_data.rs b/samply/src/shared/process_sample_data.rs index 3f93a286..a2082ae7 100644 --- a/samply/src/shared/process_sample_data.rs +++ b/samply/src/shared/process_sample_data.rs @@ -9,6 +9,7 @@ use super::stack_depth_limiting_frame_iter::StackDepthLimitingFrameIter; use super::types::StackFrame; use super::unresolved_samples::{ SampleData, SampleOrMarker, UnresolvedSampleOrMarker, UnresolvedSamples, UnresolvedStacks, + AllocationSampleData }; #[derive(Debug, Clone)] @@ -112,6 +113,13 @@ impl ProcessSampleData { SampleOrMarker::MarkerHandle(mh) => { profile.set_marker_stack(thread_handle, mh, stack_handle); } + SampleOrMarker::AllocationSample(AllocationSampleData { + address, + size, + + }) => { + profile.add_allocation_sample(thread_handle, timestamp, stack_handle, address, size); + } } } diff --git a/samply/src/shared/unresolved_samples.rs b/samply/src/shared/unresolved_samples.rs index 0b0c6bd5..3440709a 100644 --- a/samply/src/shared/unresolved_samples.rs +++ b/samply/src/shared/unresolved_samples.rs @@ -113,6 +113,26 @@ impl UnresolvedSamples { } } + pub fn add_allocation_sample( + &mut self, + thread_handle: ThreadHandle, + timestamp: Timestamp, + timestamp_mono: u64, + stack: UnresolvedStackHandle, + address: u64, + size: i64, + extra_label_frame: Option, + ) { + self.samples_and_markers.push(UnresolvedSampleOrMarker { + thread_handle, + timestamp, + timestamp_mono, + stack, + extra_label_frame, + sample_or_marker: SampleOrMarker::AllocationSample(AllocationSampleData { address, size }), + }); + } + pub fn attach_stack_to_marker( &mut self, thread_handle: ThreadHandle, @@ -145,6 +165,7 @@ pub struct UnresolvedSampleOrMarker { #[derive(Debug, Clone)] pub enum SampleOrMarker { Sample(SampleData), + AllocationSample(AllocationSampleData), MarkerHandle(MarkerHandle), } @@ -154,6 +175,12 @@ pub struct SampleData { pub weight: i32, } +#[derive(Debug, Clone)] +pub struct AllocationSampleData { + pub address: u64, + pub size: i64, +} + #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] pub struct UnresolvedStackHandle(u32); diff --git a/samply/src/windows/etw_gecko.rs b/samply/src/windows/etw_gecko.rs index a0e8af8d..867cc6a1 100644 --- a/samply/src/windows/etw_gecko.rs +++ b/samply/src/windows/etw_gecko.rs @@ -523,6 +523,64 @@ fn process_trace( is_in_range, ); } + + "D3DUmdLogging/MapAllocation/Start" + | "D3DUmdLogging/MapAllocation/End" => { + //println!("MapAllocation"); + if !context.is_in_time_range(timestamp_raw) { + return; + } + let tid = e.EventHeader.ThreadId; + if !context.has_thread_at_time(tid, timestamp_raw) { + return; + } + + let size: u64 = parser.try_parse("Size").unwrap(); + let address: u64 = parser.try_parse("hDxgAllocation").unwrap(); + + + let task_and_op = s.name().split_once('/').unwrap().1; + let text = event_properties_to_string(&s, &mut parser, None); + let marker = context.handle_unknown_event(timestamp_raw, tid, task_and_op, text); + + if e.ExtendedDataCount > 0 && marker.is_some() { + let items = unsafe { + std::slice::from_raw_parts(e.ExtendedData, e.ExtendedDataCount as usize) + }; + for i in items { + match i.ExtType as u32 { + Etw::EVENT_HEADER_EXT_TYPE_STACK_TRACE64 => { + // see ProcessExtendedData in TraceLog.cs from perfview + let data: &[u8] = unsafe { + std::slice::from_raw_parts( + i.DataPtr as *const u8, + i.DataSize as usize, + ) + }; + let _match_id = u64::from_ne_bytes( + <[u8; 8]>::try_from(&data[0..8]).unwrap(), + ); + let stack_address_iter = data[8..] + .chunks_exact(8) + .map(|a| u64::from_ne_bytes(a.try_into().unwrap())); + if let Some(marker) = marker { + let tid = e.EventHeader.ThreadId; + let pid = e.EventHeader.ProcessId; + + context.handle_marker_stack(timestamp_raw, pid, tid, stack_address_iter.clone(), marker); + let size: i64 = match e.EventHeader.EventDescriptor.Opcode { + 1 => size.try_into().unwrap(), // start => allocate + 2 => -TryInto::::try_into(size).unwrap(), // end => deallocate + _ => panic!() + }; + context.handle_allocation_sample(timestamp_raw, pid, tid, size, address, stack_address_iter); + } + } + _ => { } + } + } + } + } _ => { if !context.is_in_time_range(timestamp_raw) { return; diff --git a/samply/src/windows/etw_reader/custom_schemas.rs b/samply/src/windows/etw_reader/custom_schemas.rs index 2c4191ef..05fb2382 100644 --- a/samply/src/windows/etw_reader/custom_schemas.rs +++ b/samply/src/windows/etw_reader/custom_schemas.rs @@ -698,7 +698,7 @@ const D3DUmdLogging_PROPS: [PropDesc; 6] = [ }, PropDesc { name: "Size", - in_type: TdhInType::InTypeUInt32, + in_type: TdhInType::InTypeUInt64, out_type: TdhOutType::OutTypeUInt64, }, // XXX: use an enum for these diff --git a/samply/src/windows/profile_context.rs b/samply/src/windows/profile_context.rs index 94af46cd..871b5ca0 100644 --- a/samply/src/windows/profile_context.rs +++ b/samply/src/windows/profile_context.rs @@ -924,6 +924,9 @@ impl ProfileContext { .insert((tid, timestamp_raw), thread_handle); } + + + pub fn handle_thread_start( &mut self, timestamp_raw: u64, @@ -1104,6 +1107,33 @@ impl ProfileContext { ); } + pub fn handle_allocation_sample( + &mut self, + timestamp_raw: u64, + pid: u32, + tid: u32, + size: i64, + address: u64, + stack_address_iter: impl Iterator, + ) { + let Some(process) = self.processes.get_by_pid_and_timestamp(pid, timestamp_raw) else { + return; + }; + + let Some(thread) = self.threads.get_by_tid(tid) else { + return; + }; + + let timestamp = self.timestamp_converter.convert_time(timestamp_raw); + let stack: Vec = to_stack_frames(stack_address_iter, self.address_classifier); + + let stack_index = self.unresolved_stacks.convert(stack.into_iter().rev()); + + + process.unresolved_samples.add_allocation_sample(process.main_thread_handle, timestamp, timestamp_raw, stack_index, address, size, None); + } + + pub fn handle_marker_stack( &mut self, timestamp_raw: u64,