@@ -88,6 +88,7 @@ use std::fmt::Display;
8888use std:: intrinsics:: unlikely;
8989use std:: path:: Path ;
9090use std:: sync:: Arc ;
91+ use std:: sync:: atomic:: { AtomicU64 , Ordering } ;
9192use std:: time:: { Duration , Instant } ;
9293use std:: { fs, process} ;
9394
@@ -117,6 +118,7 @@ bitflags::bitflags! {
117118
118119 const DEFAULT = Self :: GENERIC_ACTIVITIES . bits( ) |
119120 Self :: QUERY_PROVIDERS . bits( ) |
121+ Self :: QUERY_CACHE_HITS . bits( ) |
120122 Self :: QUERY_BLOCKED . bits( ) |
121123 Self :: INCR_CACHE_LOADS . bits( ) |
122124 Self :: INCR_RESULT_HASHING . bits( ) |
@@ -145,6 +147,7 @@ const EVENT_FILTERS_BY_NAME: &[(&str, EventFilter)] = &[
145147] ;
146148
147149/// Something that uniquely identifies a query invocation.
150+ #[ derive( PartialEq , Eq , Hash ) ]
148151pub struct QueryInvocationId ( pub u32 ) ;
149152
150153/// Which format to use for `-Z time-passes`
@@ -411,10 +414,7 @@ impl SelfProfilerRef {
411414 #[ inline( never) ]
412415 #[ cold]
413416 fn cold_call ( profiler_ref : & SelfProfilerRef , query_invocation_id : QueryInvocationId ) {
414- profiler_ref. instant_query_event (
415- |profiler| profiler. query_cache_hit_event_kind ,
416- query_invocation_id,
417- ) ;
417+ profiler_ref. profiler . as_ref ( ) . unwrap ( ) . increment_query_cache_hit ( query_invocation_id) ;
418418 }
419419
420420 if unlikely ( self . event_filter_mask . contains ( EventFilter :: QUERY_CACHE_HITS ) ) {
@@ -459,22 +459,6 @@ impl SelfProfilerRef {
459459 } )
460460 }
461461
462- #[ inline( always) ]
463- fn instant_query_event (
464- & self ,
465- event_kind : fn ( & SelfProfiler ) -> StringId ,
466- query_invocation_id : QueryInvocationId ,
467- ) {
468- let event_id = StringId :: new_virtual ( query_invocation_id. 0 ) ;
469- let thread_id = get_thread_id ( ) ;
470- let profiler = self . profiler . as_ref ( ) . unwrap ( ) ;
471- profiler. profiler . record_instant_event (
472- event_kind ( profiler) ,
473- EventId :: from_virtual ( event_id) ,
474- thread_id,
475- ) ;
476- }
477-
478462 pub fn with_profiler ( & self , f : impl FnOnce ( & SelfProfiler ) ) {
479463 if let Some ( profiler) = & self . profiler {
480464 f ( profiler)
@@ -489,6 +473,30 @@ impl SelfProfilerRef {
489473 self . profiler . as_ref ( ) . map ( |p| p. get_or_alloc_cached_string ( s) )
490474 }
491475
476+ /// Store query cache hits to the self-profile log.
477+ /// Should be called once at the end of the compilation session.
478+ ///
479+ /// The cache hits are stored per **query invocation**, not **per query kind/type**.
480+ /// `analyzeme` can later deduplicate individual query labels from the QueryInvocationId event
481+ /// IDs.
482+ pub fn store_query_cache_hits ( & self ) {
483+ if self . event_filter_mask . contains ( EventFilter :: QUERY_CACHE_HITS ) {
484+ let profiler = self . profiler . as_ref ( ) . unwrap ( ) ;
485+ let query_hits = profiler. query_hits . read ( ) ;
486+ let builder = EventIdBuilder :: new ( & profiler. profiler ) ;
487+ let thread_id = get_thread_id ( ) ;
488+ for ( query_invocation, hit_count) in query_hits. iter ( ) {
489+ let event_id = builder. from_label ( StringId :: new_virtual ( query_invocation. 0 ) ) ;
490+ profiler. profiler . record_integer_event (
491+ profiler. query_cache_hit_count_event_kind ,
492+ event_id,
493+ thread_id,
494+ hit_count. load ( Ordering :: Relaxed ) ,
495+ ) ;
496+ }
497+ }
498+ }
499+
492500 #[ inline]
493501 pub fn enabled ( & self ) -> bool {
494502 self . profiler . is_some ( )
@@ -537,13 +545,24 @@ pub struct SelfProfiler {
537545
538546 string_cache : RwLock < FxHashMap < String , StringId > > ,
539547
548+ /// Recording individual query cache hits as "instant" measureme events
549+ /// is incredibly expensive. Instead of doing that, we simply aggregate
550+ /// cache hit *counts* per query invocation, and then store the final count
551+ /// of cache hits per invocation at the end of the compilation session.
552+ ///
553+ /// With this approach, we don't know the individual thread IDs and timestamps
554+ /// of cache hits, but it has very little overhead on top of `-Zself-profile`.
555+ /// Recording the cache hits as individual events made compilation 3-5x slower.
556+ query_hits : RwLock < FxHashMap < QueryInvocationId , AtomicU64 > > ,
557+
540558 query_event_kind : StringId ,
541559 generic_activity_event_kind : StringId ,
542560 incremental_load_result_event_kind : StringId ,
543561 incremental_result_hashing_event_kind : StringId ,
544562 query_blocked_event_kind : StringId ,
545- query_cache_hit_event_kind : StringId ,
546563 artifact_size_event_kind : StringId ,
564+ /// Total cache hits per query invocation
565+ query_cache_hit_count_event_kind : StringId ,
547566}
548567
549568impl SelfProfiler {
@@ -571,8 +590,8 @@ impl SelfProfiler {
571590 let incremental_result_hashing_event_kind =
572591 profiler. alloc_string ( "IncrementalResultHashing" ) ;
573592 let query_blocked_event_kind = profiler. alloc_string ( "QueryBlocked" ) ;
574- let query_cache_hit_event_kind = profiler. alloc_string ( "QueryCacheHit" ) ;
575593 let artifact_size_event_kind = profiler. alloc_string ( "ArtifactSize" ) ;
594+ let query_cache_hit_count_event_kind = profiler. alloc_string ( "QueryCacheHitCount" ) ;
576595
577596 let mut event_filter_mask = EventFilter :: empty ( ) ;
578597
@@ -616,8 +635,9 @@ impl SelfProfiler {
616635 incremental_load_result_event_kind,
617636 incremental_result_hashing_event_kind,
618637 query_blocked_event_kind,
619- query_cache_hit_event_kind,
620638 artifact_size_event_kind,
639+ query_cache_hit_count_event_kind,
640+ query_hits : Default :: default ( ) ,
621641 } )
622642 }
623643
@@ -627,6 +647,21 @@ impl SelfProfiler {
627647 self . profiler . alloc_string ( s)
628648 }
629649
650+ /// Store a cache hit of a query invocation
651+ pub fn increment_query_cache_hit ( & self , id : QueryInvocationId ) {
652+ // Fast path: assume that the query was already encountered before, and just record
653+ // a cache hit
654+ let mut guard = self . query_hits . upgradable_read ( ) ;
655+ if let Some ( counter) = guard. get ( & id) {
656+ // We only want the count, no other synchronization is required
657+ counter. fetch_add ( 1 , Ordering :: Relaxed ) ;
658+ return ;
659+ }
660+ guard. with_upgraded ( |map| {
661+ map. insert ( id, AtomicU64 :: from ( 1 ) ) ;
662+ } ) ;
663+ }
664+
630665 /// Gets a `StringId` for the given string. This method makes sure that
631666 /// any strings going through it will only be allocated once in the
632667 /// profiling data.
0 commit comments