-
Notifications
You must be signed in to change notification settings - Fork 2.2k
fix: cpu affinity #4815
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
fix: cpu affinity #4815
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -169,6 +169,7 @@ jobs: | |
|
||
- name: integration test (systemd driver) | ||
run: | | ||
sudo taskset -pc 0-1 1 | ||
# Delegate all cgroup v2 controllers to rootless user via --systemd-cgroup. | ||
# The default (since systemd v252) is "pids memory cpu". | ||
sudo mkdir -p /etc/systemd/system/[email protected] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -198,6 +198,17 @@ func (p *setnsProcess) setFinalCPUAffinity() error { | |
return nil | ||
} | ||
|
||
func (p *setnsProcess) hasExecCPUAffinity() bool { | ||
aff := p.config.CPUAffinity | ||
if aff == nil { | ||
return false | ||
} | ||
if aff.Initial != nil || aff.Final != nil { | ||
return true | ||
} | ||
return false | ||
} | ||
|
||
func (p *setnsProcess) start() (retErr error) { | ||
defer p.comm.closeParent() | ||
|
||
|
@@ -258,6 +269,13 @@ func (p *setnsProcess) start() (retErr error) { | |
if err := p.setFinalCPUAffinity(); err != nil { | ||
return err | ||
} | ||
|
||
if !p.hasExecCPUAffinity() { | ||
if err := resetAffinityMask(p.pid()); err != nil { | ||
return err | ||
} | ||
} | ||
|
||
if p.intelRdtPath != "" { | ||
// if Intel RDT "resource control" filesystem path exists | ||
_, err := os.Stat(p.intelRdtPath) | ||
|
@@ -615,6 +633,11 @@ func (p *initProcess) start() (retErr error) { | |
return fmt.Errorf("unable to apply cgroup configuration: %w", err) | ||
} | ||
} | ||
|
||
if err := resetAffinityMask(p.pid()); err != nil { | ||
return err | ||
} | ||
|
||
if p.intelRdtManager != nil { | ||
if err := p.intelRdtManager.Apply(p.pid()); err != nil { | ||
return fmt.Errorf("unable to apply Intel RDT configuration: %w", err) | ||
|
@@ -981,3 +1004,24 @@ func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) { | |
} | ||
return i, nil | ||
} | ||
|
||
// Set all inherited cpu affinity. Old kernels do that automatically, but | ||
// new kernels remember the affinity that was set before the cgroup move. | ||
// This is undesirable, because it inherits the systemd affinity when the container | ||
// should really move to the container space cpus. | ||
// here we can't use runtime.NumCPU() to get cpu counts because it call sched_getaffinity to get cpu counts. | ||
// If systemd set CPUAffinity then use runtime.NumCPU() can't get real cpu counts. | ||
func resetAffinityMask(pid int) error { | ||
cpus, err := utils.SystemCPUCores() | ||
if err != nil { | ||
return err | ||
} | ||
cpuset := unix.CPUSet{} | ||
for i := 0; i < int(cpus); i++ { | ||
cpuset.Set(i) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was this tested with nested containers? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed. I think the code in this PR will only work if you use lxcfs with nested containers (which nobody does with runc). I suspect that we would instead need to parse There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alternatively, would just passing I believe this is trying to take advantage of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh actually, However, I would like to know if In fact, it might be even simpler to just generate a set of 8192 CPUs and get the kernel to clamp it for us? The kernel automatically clamps the size of EDIT: Testing this, it seems |
||
if err := unix.SchedSetaffinity(pid, &cpuset); err != nil { | ||
return fmt.Errorf("error resetting pid %d affinity: %w", pid, err) | ||
} | ||
return nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
package utils | ||
|
||
import ( | ||
"bufio" | ||
"encoding/json" | ||
"fmt" | ||
"io" | ||
"os" | ||
"path/filepath" | ||
|
@@ -113,3 +115,32 @@ func Annotations(labels []string) (bundle string, userAnnotations map[string]str | |
} | ||
return | ||
} | ||
|
||
// SystemCPUCores parses CPU usage information from a reader providing | ||
// /proc/stat format data. It returns the number of CPUs. | ||
func SystemCPUCores() (cpuNum uint32, _ error) { | ||
f, err := os.Open("/proc/stat") | ||
if err != nil { | ||
return 0, err | ||
} | ||
defer f.Close() | ||
return readSystemCPU(f) | ||
} | ||
|
||
func readSystemCPU(r io.Reader) (cpuNum uint32, _ error) { | ||
ningmingxiao marked this conversation as resolved.
Show resolved
Hide resolved
|
||
reader := bufio.NewReader(r) | ||
for { | ||
line, err := reader.ReadString('\n') | ||
if err != nil { | ||
return 0, fmt.Errorf("error scanning /proc/stat file: %w", err) | ||
} | ||
// just count the line start with cpuN(N is cpu No) | ||
if line[:3] != "cpu" { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the comment that all cpu* lines are at the beginning belongs here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why did you mark this as solved? Am I missing something? |
||
break | ||
} | ||
if '0' <= line[3] && line[3] <= '9' { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here we need a commet explaining that there is a "cpu" line that we should ignore and only count cpu lines followed by a number. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. idem |
||
cpuNum++ | ||
} | ||
} | ||
return cpuNum, nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be moved to the bats script