Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ require (
github.com/moby/sys/user v0.4.0
github.com/moby/sys/userns v0.1.0
github.com/mrunalp/fileutils v0.5.1
github.com/opencontainers/cgroups v0.0.4
github.com/opencontainers/cgroups v0.0.5
github.com/opencontainers/runtime-spec v1.2.2-0.20250818071321-383cadbf08c0
github.com/opencontainers/selinux v1.12.0
github.com/seccomp/libseccomp-golang v0.11.1
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g
github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm/Q=
github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers/cgroups v0.0.4 h1:XVj8P/IHVms/j+7eh8ggdkTLAxjz84ZzuFyGoE28DR4=
github.com/opencontainers/cgroups v0.0.4/go.mod h1:s8lktyhlGUqM7OSRL5P7eAW6Wb+kWPNvt4qvVfzA5vs=
github.com/opencontainers/cgroups v0.0.5 h1:DRITAqcOnY0uSBzIpt1RYWLjh5DPDiqUs4fY6Y0ktls=
github.com/opencontainers/cgroups v0.0.5/go.mod h1:oWVzJsKK0gG9SCRBfTpnn16WcGEqDI8PAcpMGbqWxcs=
github.com/opencontainers/runtime-spec v1.2.2-0.20250818071321-383cadbf08c0 h1:RLn0YfUWkiqPGtgUANvJrcjIkCHGRl3jcz/c557M28M=
github.com/opencontainers/runtime-spec v1.2.2-0.20250818071321-383cadbf08c0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.12.0 h1:6n5JV4Cf+4y0KNXW48TLj5DwfXpvWlxXplUkdTrmPb8=
Expand Down
31 changes: 0 additions & 31 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"io"
"os"
"os/exec"
"path"
"path/filepath"
"reflect"
"strconv"
Expand Down Expand Up @@ -655,40 +654,10 @@ func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, comm *processComm
bootstrapData: data,
container: c,
},
cgroupPaths: state.CgroupPaths,
rootlessCgroups: c.config.RootlessCgroups,
intelRdtPath: state.IntelRdtPath,
initProcessPid: state.InitProcessPid,
}
if len(p.SubCgroupPaths) > 0 {
if add, ok := p.SubCgroupPaths[""]; ok {
// cgroup v1: using the same path for all controllers.
// cgroup v2: the only possible way.
for k := range proc.cgroupPaths {
subPath := path.Join(proc.cgroupPaths[k], add)
if !strings.HasPrefix(subPath, proc.cgroupPaths[k]) {
return nil, fmt.Errorf("%s is not a sub cgroup path", add)
}
proc.cgroupPaths[k] = subPath
}
// cgroup v2: do not try to join init process's cgroup
// as a fallback (see (*setnsProcess).start).
proc.initProcessPid = 0
} else {
// Per-controller paths.
for ctrl, add := range p.SubCgroupPaths {
if val, ok := proc.cgroupPaths[ctrl]; ok {
subPath := path.Join(val, add)
if !strings.HasPrefix(subPath, val) {
return nil, fmt.Errorf("%s is not a sub cgroup path", add)
}
proc.cgroupPaths[ctrl] = subPath
} else {
return nil, fmt.Errorf("unknown controller %s in SubCgroupPaths", ctrl)
}
}
}
}
return proc, nil
}

Expand Down
4 changes: 4 additions & 0 deletions libcontainer/container_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ func (m *mockCgroupManager) Apply(pid int) error {
return nil
}

func (m *mockCgroupManager) AddPid(_ string, _ int) error {
return nil
}

func (m *mockCgroupManager) Set(_ *cgroups.Resources) error {
return nil
}
Expand Down
98 changes: 75 additions & 23 deletions libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ import (
"errors"
"fmt"
"io"
"maps"
"net"
"os"
"os/exec"
"path"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -153,7 +156,6 @@ func (p *containerProcess) wait() (*os.ProcessState, error) { //nolint:unparam

type setnsProcess struct {
containerProcess
cgroupPaths map[string]string
rootlessCgroups bool
intelRdtPath string
initProcessPid int
Expand Down Expand Up @@ -244,6 +246,76 @@ func (p *setnsProcess) setFinalCPUAffinity() error {
return nil
}

func (p *setnsProcess) addIntoCgroupV1() error {
if sub, ok := p.process.SubCgroupPaths[""]; ok || len(p.process.SubCgroupPaths) == 0 {
// Either same sub-cgroup for all paths, or no sub-cgroup.
err := p.manager.AddPid(sub, p.pid())
if err != nil && !p.rootlessCgroups {
return fmt.Errorf("error adding pid %d to cgroups: %w", p.pid(), err)
}
return nil
}

// Per-controller sub-cgroup paths. Not supported by AddPid (or systemd),
// so we have to calculate and check all sub-cgroup paths, and write
// directly to cgroupfs.
paths := maps.Clone(p.manager.GetPaths())
for ctrl, sub := range p.process.SubCgroupPaths {
base, ok := paths[ctrl]
if !ok {
return fmt.Errorf("unknown controller %s in SubCgroupPaths", ctrl)
}
cgPath := path.Join(base, sub)
if !strings.HasPrefix(cgPath, base) {
return fmt.Errorf("bad sub cgroup path: %s", sub)
}
paths[ctrl] = cgPath
}

for _, path := range paths {
if err := cgroups.WriteCgroupProc(path, p.pid()); err != nil && !p.rootlessCgroups {
return fmt.Errorf("error adding pid %d to cgroups: %w", p.pid(), err)
}
}

return nil
}

func (p *setnsProcess) addIntoCgroupV2() error {
sub := p.process.SubCgroupPaths[""]
err := p.manager.AddPid(sub, p.pid())
if err != nil && !p.rootlessCgroups {
// On cgroup v2 + nesting + domain controllers, adding to initial cgroup may fail with EBUSY.
// https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
// Try to join the cgroup of InitProcessPid, unless sub-cgroup is explicitly set.
if p.initProcessPid != 0 && sub == "" {
initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid)
initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile)
if initCgErr == nil {
if initCgPath, ok := initCg[""]; ok {
initCgDirpath := filepath.Join(fs2.UnifiedMountpoint, initCgPath)
logrus.Debugf("adding pid %d to cgroup failed (%v), attempting to join %s",
p.pid(), err, initCgDirpath)
// NOTE: initCgDirPath is not guaranteed to exist because we didn't pause the container.
err = cgroups.WriteCgroupProc(initCgDirpath, p.pid())
}
}
Comment on lines +288 to +302
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am guessing you don't want to remove this despite your comment in #2416 (comment) ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally, I wish this to be removed, but not in this PR, as it will break the test case added in PR #2416 (and may also break some funny users' workloads). I would like to hear from @AkihiroSuda first.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's keep it compatible with the existing releases of runc.
I wish we could simplify the implementation though.

}
if err != nil {
return fmt.Errorf("error adding pid %d to cgroups: %w", p.pid(), err)
}
}

return nil
}

func (p *setnsProcess) addIntoCgroup() error {
if cgroups.IsCgroup2UnifiedMode() {
return p.addIntoCgroupV2()
}
return p.addIntoCgroupV1()
}

func (p *setnsProcess) start() (retErr error) {
defer p.comm.closeParent()

Expand Down Expand Up @@ -277,28 +349,8 @@ func (p *setnsProcess) start() (retErr error) {
if err := p.execSetns(); err != nil {
return fmt.Errorf("error executing setns process: %w", err)
}
for _, path := range p.cgroupPaths {
if err := cgroups.WriteCgroupProc(path, p.pid()); err != nil && !p.rootlessCgroups {
// On cgroup v2 + nesting + domain controllers, WriteCgroupProc may fail with EBUSY.
// https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
// Try to join the cgroup of InitProcessPid.
if cgroups.IsCgroup2UnifiedMode() && p.initProcessPid != 0 {
initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid)
initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile)
if initCgErr == nil {
if initCgPath, ok := initCg[""]; ok {
initCgDirpath := filepath.Join(fs2.UnifiedMountpoint, initCgPath)
logrus.Debugf("adding pid %d to cgroups %v failed (%v), attempting to join %q (obtained from %s)",
p.pid(), p.cgroupPaths, err, initCg, initCgDirpath)
// NOTE: initCgDirPath is not guaranteed to exist because we didn't pause the container.
err = cgroups.WriteCgroupProc(initCgDirpath, p.pid())
}
}
}
if err != nil {
return fmt.Errorf("error adding pid %d to cgroups: %w", p.pid(), err)
}
}
if err := p.addIntoCgroup(); err != nil {
return err
}
// Set final CPU affinity right after the process is moved into container's cgroup.
if err := p.setFinalCPUAffinity(); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion script/setup_rootless.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ ssh-keygen -t ecdsa -N "" -f "$HOME/.ssh/rootless.key"
sudo mkdir -p -m 0700 /home/rootless/.ssh
sudo cp "$HOME/.ssh/rootless.key" /home/rootless/.ssh/id_ecdsa
sudo cp "$HOME/.ssh/rootless.key.pub" /home/rootless/.ssh/authorized_keys
sudo chown -R rootless.rootless /home/rootless
sudo chown -R rootless:rootless /home/rootless
12 changes: 6 additions & 6 deletions tests/integration/exec.bats
Original file line number Diff line number Diff line change
Expand Up @@ -226,17 +226,17 @@ function check_exec_debug() {
# Check we can't join parent cgroup.
runc exec --cgroup ".." test_busybox cat /proc/self/cgroup
[ "$status" -ne 0 ]
[[ "$output" == *" .. is not a sub cgroup path"* ]]
[[ "$output" == *"bad sub cgroup path"* ]]

# Check we can't join non-existing subcgroup.
runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup
[ "$status" -ne 0 ]
[[ "$output" == *" adding pid "*"/nonexistent/cgroup.procs: no such file "* ]]
[[ "$output" == *" adding pid "*"o such file or directory"* ]]

# Check we can't join non-existing subcgroup (for a particular controller).
runc exec --cgroup cpu:nonexistent test_busybox cat /proc/self/cgroup
[ "$status" -ne 0 ]
[[ "$output" == *" adding pid "*"/nonexistent/cgroup.procs: no such file "* ]]
[[ "$output" == *" adding pid "*"o such file or directory"* ]]

# Check we can't specify non-existent controller.
runc exec --cgroup whaaat:/ test_busybox true
Expand Down Expand Up @@ -277,12 +277,12 @@ function check_exec_debug() {
# Check we can't join parent cgroup.
runc exec --cgroup ".." test_busybox cat /proc/self/cgroup
[ "$status" -ne 0 ]
[[ "$output" == *" .. is not a sub cgroup path"* ]]
[[ "$output" == *"bad sub cgroup path"* ]]

# Check we can't join non-existing subcgroup.
runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup
[ "$status" -ne 0 ]
[[ "$output" == *" adding pid "*"/nonexistent/cgroup.procs: no such file "* ]]
[[ "$output" == *" adding pid "*"o such file or directory"* ]]

# Check we can join top-level cgroup (implicit).
runc exec test_busybox grep '^0::/$' /proc/self/cgroup
Expand Down Expand Up @@ -318,7 +318,7 @@ function check_exec_debug() {
# Check that --cgroup / disables the init cgroup fallback.
runc exec --cgroup / test_busybox true
[ "$status" -ne 0 ]
[[ "$output" == *" adding pid "*" to cgroups"*"/cgroup.procs: device or resource busy"* ]]
[[ "$output" == *" adding pid "*" to cgroups"*"evice or resource busy"* ]]

# Check that explicit --cgroup foobar works.
runc exec --cgroup foobar test_busybox grep '^0::/foobar$' /proc/self/cgroup
Expand Down
5 changes: 5 additions & 0 deletions vendor/github.com/opencontainers/cgroups/cgroups.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions vendor/github.com/opencontainers/cgroups/fs/fs.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions vendor/github.com/opencontainers/cgroups/fs2/fs2.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions vendor/github.com/opencontainers/cgroups/systemd/common.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions vendor/github.com/opencontainers/cgroups/systemd/v1.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading