1717package collector
1818
1919import (
20+ "errors"
2021 "fmt"
2122 "log/slog"
2223 "os"
@@ -26,15 +27,17 @@ import (
2627 "strconv"
2728 "sync"
2829
30+ "golang.org/x/exp/maps"
31+
2932 "github.com/alecthomas/kingpin/v2"
3033 "github.com/prometheus/client_golang/prometheus"
3134 "github.com/prometheus/procfs"
3235 "github.com/prometheus/procfs/sysfs"
33- "golang.org/x/exp/maps"
3436)
3537
3638type cpuCollector struct {
37- fs procfs.FS
39+ procfs procfs.FS
40+ sysfs sysfs.FS
3841 cpu * prometheus.Desc
3942 cpuInfo * prometheus.Desc
4043 cpuFrequencyHz * prometheus.Desc
@@ -45,6 +48,7 @@ type cpuCollector struct {
4548 cpuPackageThrottle * prometheus.Desc
4649 cpuIsolated * prometheus.Desc
4750 logger * slog.Logger
51+ cpuOnline * prometheus.Desc
4852 cpuStats map [int64 ]procfs.CPUStat
4953 cpuStatsMutex sync.Mutex
5054 isolatedCpus []uint16
@@ -70,17 +74,17 @@ func init() {
7074
7175// NewCPUCollector returns a new Collector exposing kernel/system statistics.
7276func NewCPUCollector (logger * slog.Logger ) (Collector , error ) {
73- fs , err := procfs .NewFS (* procPath )
77+ pfs , err := procfs .NewFS (* procPath )
7478 if err != nil {
7579 return nil , fmt .Errorf ("failed to open procfs: %w" , err )
7680 }
7781
78- sysfs , err := sysfs .NewFS (* sysPath )
82+ sfs , err := sysfs .NewFS (* sysPath )
7983 if err != nil {
8084 return nil , fmt .Errorf ("failed to open sysfs: %w" , err )
8185 }
8286
83- isolcpus , err := sysfs .IsolatedCPUs ()
87+ isolcpus , err := sfs .IsolatedCPUs ()
8488 if err != nil {
8589 if ! os .IsNotExist (err ) {
8690 return nil , fmt .Errorf ("Unable to get isolated cpus: %w" , err )
@@ -89,8 +93,9 @@ func NewCPUCollector(logger *slog.Logger) (Collector, error) {
8993 }
9094
9195 c := & cpuCollector {
92- fs : fs ,
93- cpu : nodeCPUSecondsDesc ,
96+ procfs : pfs ,
97+ sysfs : sfs ,
98+ cpu : nodeCPUSecondsDesc ,
9499 cpuInfo : prometheus .NewDesc (
95100 prometheus .BuildFQName (namespace , cpuCollectorSubsystem , "info" ),
96101 "CPU information from /proc/cpuinfo." ,
@@ -131,6 +136,11 @@ func NewCPUCollector(logger *slog.Logger) (Collector, error) {
131136 "Whether each core is isolated, information from /sys/devices/system/cpu/isolated." ,
132137 []string {"cpu" }, nil ,
133138 ),
139+ cpuOnline : prometheus .NewDesc (
140+ prometheus .BuildFQName (namespace , cpuCollectorSubsystem , "online" ),
141+ "CPUs that are online and being scheduled." ,
142+ []string {"cpu" }, nil ,
143+ ),
134144 logger : logger ,
135145 isolatedCpus : isolcpus ,
136146 cpuStats : make (map [int64 ]procfs.CPUStat ),
@@ -177,12 +187,21 @@ func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error {
177187 if c .isolatedCpus != nil {
178188 c .updateIsolated (ch )
179189 }
180- return c .updateThermalThrottle (ch )
190+ err := c .updateThermalThrottle (ch )
191+ if err != nil {
192+ return err
193+ }
194+ err = c .updateOnline (ch )
195+ if err != nil {
196+ return err
197+ }
198+
199+ return nil
181200}
182201
183202// updateInfo reads /proc/cpuinfo
184203func (c * cpuCollector ) updateInfo (ch chan <- prometheus.Metric ) error {
185- info , err := c .fs .CPUInfo ()
204+ info , err := c .procfs .CPUInfo ()
186205 if err != nil {
187206 return err
188207 }
@@ -333,9 +352,31 @@ func (c *cpuCollector) updateIsolated(ch chan<- prometheus.Metric) {
333352 }
334353}
335354
355+ // updateOnline reads /sys/devices/system/cpu/cpu*/online through sysfs and exports online status metrics.
356+ func (c * cpuCollector ) updateOnline (ch chan <- prometheus.Metric ) error {
357+ cpus , err := c .sysfs .CPUs ()
358+ if err != nil {
359+ return err
360+ }
361+ // No-op if the system does not support CPU online stats.
362+ cpu0 := cpus [0 ]
363+ if _ , err := cpu0 .Online (); err != nil && errors .Is (err , os .ErrNotExist ) {
364+ return nil
365+ }
366+ for _ , cpu := range cpus {
367+ setOnline := float64 (0 )
368+ if online , _ := cpu .Online (); online {
369+ setOnline = 1
370+ }
371+ ch <- prometheus .MustNewConstMetric (c .cpuOnline , prometheus .GaugeValue , setOnline , cpu .Number ())
372+ }
373+
374+ return nil
375+ }
376+
336377// updateStat reads /proc/stat through procfs and exports CPU-related metrics.
337378func (c * cpuCollector ) updateStat (ch chan <- prometheus.Metric ) error {
338- stats , err := c .fs .Stat ()
379+ stats , err := c .procfs .Stat ()
339380 if err != nil {
340381 return err
341382 }
0 commit comments