diff --git a/collector/src/bin/rustc-fake.rs b/collector/src/bin/rustc-fake.rs index 387fafa41..1ec7affb2 100644 --- a/collector/src/bin/rustc-fake.rs +++ b/collector/src/bin/rustc-fake.rs @@ -119,6 +119,7 @@ fn main() { let prof_out_dir = create_self_profile_dir(); if wrapper == "PerfStatSelfProfile" { cmd.arg(format!("-Zself-profile={}", prof_out_dir.to_str().unwrap())); + cmd.arg("-Zself-profile-counter=instructions:u"); let _ = fs::remove_dir_all(&prof_out_dir); let _ = fs::create_dir_all(&prof_out_dir); } diff --git a/site/frontend/src/pages/detailed-query/page.vue b/site/frontend/src/pages/detailed-query/page.vue index 904417a97..1f63140bd 100644 --- a/site/frontend/src/pages/detailed-query/page.vue +++ b/site/frontend/src/pages/detailed-query/page.vue @@ -391,8 +391,18 @@ loadData();

- 'Time (%)' is the percentage of the cpu-clock time spent on this query - (we do not use wall-time as we want to account for parallelism). + 'Instructions (%)' is the percentage of instructions executed on this + query (we do not use wall-time as we want to account for parallelism). +

+

+ Note: self-profile measurements have been + recently switched + from wall-time to HW counters (instruction count). If comparing with + an older artifact, the timings might not be directly comparable.

Executions do not include cached executions.

@@ -408,21 +418,21 @@ loadData(); Time (%)Instructions (%) Time (s)Instructions Time deltaInstructions delta @@ -442,14 +452,14 @@ loadData(); Incremental loading (s)Incremental loading (icounts) >, } + // Due to backwards compatibility, self profile event timing data is represented as durations, + // however since https://github.com/rust-lang/rustc-perf/pull/1647 it actually represents + // HW counter data (instruction counts). #[derive(Serialize, Deserialize, Clone, Debug)] pub struct QueryData { pub label: QueryLabel, - // Nanoseconds + // Instruction count pub time: u64, + // Instruction count pub self_time: u64, pub percent_total_time: f32, pub number_of_cache_misses: u32, pub number_of_cache_hits: u32, pub invocation_count: u32, - // Nanoseconds + // Instruction count pub blocked_time: u64, - // Nanoseconds + // Instruction count pub incremental_load_time: u64, } diff --git a/site/src/request_handlers/self_profile.rs b/site/src/request_handlers/self_profile.rs index db1707451..7c16dcf62 100644 --- a/site/src/request_handlers/self_profile.rs +++ b/site/src/request_handlers/self_profile.rs @@ -460,7 +460,7 @@ pub async fn handle_self_profile( .benchmark(selector::Selector::One(bench_name.to_string())) .profile(selector::Selector::One(profile.parse().unwrap())) .scenario(selector::Selector::One(scenario)) - .metric(selector::Selector::One(Metric::CpuClock)); + .metric(selector::Selector::One(Metric::InstructionsUser)); // Helper for finding an `ArtifactId` based on a commit sha let find_aid = |commit: &str| { @@ -475,9 +475,9 @@ pub async fn handle_self_profile( } let commits = Arc::new(commits); - let mut cpu_responses = ctxt.statistic_series(query, commits.clone()).await?; - assert_eq!(cpu_responses.len(), 1, "all selectors are exact"); - let mut cpu_response = cpu_responses.remove(0).series; + let mut instructions_responses = ctxt.statistic_series(query, commits.clone()).await?; + assert_eq!(instructions_responses.len(), 1, "all selectors are exact"); + let mut instructions_response = instructions_responses.remove(0).series; let mut self_profile = get_or_download_self_profile( ctxt, @@ -485,7 +485,7 @@ pub async fn handle_self_profile( bench_name, profile, scenario, - cpu_response.next().unwrap().1, + instructions_response.next().unwrap().1, ) .await?; let base_self_profile = match commits.get(1) { @@ -496,7 +496,7 @@ pub async fn handle_self_profile( bench_name, profile, scenario, - cpu_response.next().unwrap().1, + instructions_response.next().unwrap().1, ) .await?, ), diff --git a/site/src/self_profile.rs b/site/src/self_profile.rs index 1fee911e7..79e6fbe5b 100644 --- a/site/src/self_profile.rs +++ b/site/src/self_profile.rs @@ -318,7 +318,7 @@ pub(crate) async fn get_or_download_self_profile( } fn get_self_profile_data( - cpu_clock: Option, + total_instructions: Option, profile: &analyzeme::AnalysisResults, ) -> ServerResult { let total_self_time: Duration = profile.query_data.iter().map(|qd| qd.self_time).sum(); @@ -345,7 +345,7 @@ fn get_self_profile_data( time: profile.total_time.as_nanos() as u64, self_time: total_self_time.as_nanos() as u64, // TODO: check against wall-time from perf stats - percent_total_time: cpu_clock + percent_total_time: total_instructions .map(|w| ((total_self_time.as_secs_f64() / w) * 100.0) as f32) // sentinel "we couldn't compute this time" .unwrap_or(-100.0),