diff --git a/collector/src/bin/rustc-fake.rs b/collector/src/bin/rustc-fake.rs
index 387fafa41..1ec7affb2 100644
--- a/collector/src/bin/rustc-fake.rs
+++ b/collector/src/bin/rustc-fake.rs
@@ -119,6 +119,7 @@ fn main() {
let prof_out_dir = create_self_profile_dir();
if wrapper == "PerfStatSelfProfile" {
cmd.arg(format!("-Zself-profile={}", prof_out_dir.to_str().unwrap()));
+ cmd.arg("-Zself-profile-counter=instructions:u");
let _ = fs::remove_dir_all(&prof_out_dir);
let _ = fs::create_dir_all(&prof_out_dir);
}
diff --git a/site/frontend/src/pages/detailed-query/page.vue b/site/frontend/src/pages/detailed-query/page.vue
index 904417a97..1f63140bd 100644
--- a/site/frontend/src/pages/detailed-query/page.vue
+++ b/site/frontend/src/pages/detailed-query/page.vue
@@ -391,8 +391,18 @@ loadData();
- 'Time (%)' is the percentage of the cpu-clock time spent on this query
- (we do not use wall-time as we want to account for parallelism).
+ 'Instructions (%)' is the percentage of instructions executed on this
+ query (we do not use wall-time as we want to account for parallelism).
+
+
+ Note: self-profile measurements have been
+ recently switched
+ from wall-time to HW counters (instruction count). If comparing with
+ an older artifact, the timings might not be directly comparable.
Executions do not include cached executions.
@@ -408,21 +418,21 @@ loadData();
Time (%)Instructions (%)
Time (s)Instructions
|
Time deltaInstructions delta
|
@@ -442,14 +452,14 @@ loadData();
|
Incremental loading (s)Incremental loading (icounts)
|
>,
}
+ // Due to backwards compatibility, self profile event timing data is represented as durations,
+ // however since https://github.com/rust-lang/rustc-perf/pull/1647 it actually represents
+ // HW counter data (instruction counts).
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct QueryData {
pub label: QueryLabel,
- // Nanoseconds
+ // Instruction count
pub time: u64,
+ // Instruction count
pub self_time: u64,
pub percent_total_time: f32,
pub number_of_cache_misses: u32,
pub number_of_cache_hits: u32,
pub invocation_count: u32,
- // Nanoseconds
+ // Instruction count
pub blocked_time: u64,
- // Nanoseconds
+ // Instruction count
pub incremental_load_time: u64,
}
diff --git a/site/src/request_handlers/self_profile.rs b/site/src/request_handlers/self_profile.rs
index db1707451..7c16dcf62 100644
--- a/site/src/request_handlers/self_profile.rs
+++ b/site/src/request_handlers/self_profile.rs
@@ -460,7 +460,7 @@ pub async fn handle_self_profile(
.benchmark(selector::Selector::One(bench_name.to_string()))
.profile(selector::Selector::One(profile.parse().unwrap()))
.scenario(selector::Selector::One(scenario))
- .metric(selector::Selector::One(Metric::CpuClock));
+ .metric(selector::Selector::One(Metric::InstructionsUser));
// Helper for finding an `ArtifactId` based on a commit sha
let find_aid = |commit: &str| {
@@ -475,9 +475,9 @@ pub async fn handle_self_profile(
}
let commits = Arc::new(commits);
- let mut cpu_responses = ctxt.statistic_series(query, commits.clone()).await?;
- assert_eq!(cpu_responses.len(), 1, "all selectors are exact");
- let mut cpu_response = cpu_responses.remove(0).series;
+ let mut instructions_responses = ctxt.statistic_series(query, commits.clone()).await?;
+ assert_eq!(instructions_responses.len(), 1, "all selectors are exact");
+ let mut instructions_response = instructions_responses.remove(0).series;
let mut self_profile = get_or_download_self_profile(
ctxt,
@@ -485,7 +485,7 @@ pub async fn handle_self_profile(
bench_name,
profile,
scenario,
- cpu_response.next().unwrap().1,
+ instructions_response.next().unwrap().1,
)
.await?;
let base_self_profile = match commits.get(1) {
@@ -496,7 +496,7 @@ pub async fn handle_self_profile(
bench_name,
profile,
scenario,
- cpu_response.next().unwrap().1,
+ instructions_response.next().unwrap().1,
)
.await?,
),
diff --git a/site/src/self_profile.rs b/site/src/self_profile.rs
index 1fee911e7..79e6fbe5b 100644
--- a/site/src/self_profile.rs
+++ b/site/src/self_profile.rs
@@ -318,7 +318,7 @@ pub(crate) async fn get_or_download_self_profile(
}
fn get_self_profile_data(
- cpu_clock: Option,
+ total_instructions: Option,
profile: &analyzeme::AnalysisResults,
) -> ServerResult {
let total_self_time: Duration = profile.query_data.iter().map(|qd| qd.self_time).sum();
@@ -345,7 +345,7 @@ fn get_self_profile_data(
time: profile.total_time.as_nanos() as u64,
self_time: total_self_time.as_nanos() as u64,
// TODO: check against wall-time from perf stats
- percent_total_time: cpu_clock
+ percent_total_time: total_instructions
.map(|w| ((total_self_time.as_secs_f64() / w) * 100.0) as f32)
// sentinel "we couldn't compute this time"
.unwrap_or(-100.0),
|