From 302d6383b0b5fd416c7a2b8f691329017f2ed669 Mon Sep 17 00:00:00 2001 From: Debarshi Ray Date: Fri, 11 Oct 2024 17:25:51 +0200 Subject: [PATCH] Don't bump RLIMIT_NOFILE in exec sessions with '--ulimit host' Starting from commit 9126b45a3f2255b1 ("Up default Podman rlimits to avoid max open files"), Podman started bumping its soft limit for the maximum number of open file descriptors (RLIMIT_NOFILE or ulimit -n) to permit exposing a large number of ports to a container. This was later fine-tuned in commit a2c1a2df54f3660c ("podman: bump RLIMIT_NOFILE also without CAP_SYS_RESOURCE"). Unfortunately, this also increases the limits for 'podman exec' sessions running in containers created with: $ podman create --network host --ulimit host ... This is what Toolbx uses to provide a containerized interactive command line environment for software development and troubleshooting the host operating system. It confuses developers and system administrators debugging a process that's leaking file descriptors and crashing on the host OS. The crashes either don't reproduce inside the container or they take a lot longer to reproduce, both of which are frustrating. Therefore, it will be good to retain the limits, at least for this specific scenario. It turns out that since this code was written, the Go runtime has had two interesting changes. Starting from Go 1.19 [1], the Go runtime bumps the soft limit for RLIMIT_NOFILE for all Go programs [2]. This means that there's no longer any need for Podman to bump it's own limits, because it switched from requiring Go 1.18 to 1.20 in commit 4dd58f226d937b83 ("Move golang requirement from 1.18 to 1.20"). It's probably good to still log the detected limits, in case Go's behaviour changes. Not everybody was happy with this [3], because the higher limits got propagated to child processes spawned by Go programs. Among other things, this can break old programs using select(2) [4]. So, Go's behaviour was fine-tuned to restore the original soft limit for RLIMIT_NOFILE when forking a child process [5]. With these two changes in Go, which Podman already uses, if the bumping of RLIMIT_NOFILE is left to the Go runtime, then the limits are no longer increased for 'podman exec' sessions. Otherwise, if Podman continues to bump the soft limit for RLIMIT_NOFILE on its own, then it prevents the Go runtime from restoring the original limits when forking, and leads to the higher limits in 'podman exec' sessions. The existing 'podman run --ulimit host ... ulimit -Hn' test in test/e2e/run_test.go was extended to also check the soft limit. The similar test for 'podman exec' was moved from test/e2e/toolbox_test.go to test/e2e/exec_test.go for consistency and because there's nothing Toolbx specific about it. The test was similarly extended, and updated to be more idiomatic. Due to the behaviour of the Go runtime noted above, and since the tests are written in Go, the current or soft limit for RLIMIT_NOFILE returned by syscall.Getrlimit() is the same as the hard limit. The Alpine Linux image doesn't have a standalone binary for 'ulimit' and it's picky about the order in which the options are listed. The -H or -S must come first, followed by a space, and then the -n. [1] https://go.dev/doc/go1.19#runtime [2] Go commit 8427429c592588af ("os: raise open file rlimit at startup") https://github.com/golang/go/commit/8427429c592588af https://github.com/golang/go/issues/46279 [3] https://github.com/containerd/containerd/issues/8249 [4] http://0pointer.net/blog/file-descriptor-limits.html [5] Go commit f5eef58e4381259c ("syscall: restore original NOFILE ...") https://github.com/golang/go/commit/f5eef58e4381259c https://github.com/golang/go/issues/46279 Fixes: https://github.com/containers/podman/issues/17681 Signed-off-by: Debarshi Ray --- cmd/podman/early_init_darwin.go | 23 +++++++------------- cmd/podman/early_init_linux.go | 29 ++++++++------------------ test/e2e/exec_test.go | 35 +++++++++++++++++++++++++++++++ test/e2e/run_test.go | 10 +++++++++ test/e2e/toolbox_test.go | 37 --------------------------------- 5 files changed, 62 insertions(+), 72 deletions(-) diff --git a/cmd/podman/early_init_darwin.go b/cmd/podman/early_init_darwin.go index b21c23e182..0a0583e84a 100644 --- a/cmd/podman/early_init_darwin.go +++ b/cmd/podman/early_init_darwin.go @@ -1,28 +1,21 @@ package main import ( - "fmt" - "os" "syscall" + + "github.com/sirupsen/logrus" ) -func setRLimitsNoFile() error { +func checkRLimits() { var rLimitNoFile syscall.Rlimit if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rLimitNoFile); err != nil { - return fmt.Errorf("getting RLIMITS_NOFILE: %w", err) - } - err := syscall.Setrlimit(syscall.RLIMIT_NOFILE, &syscall.Rlimit{ - Max: rLimitNoFile.Max, - Cur: rLimitNoFile.Max, - }) - if err != nil { - return fmt.Errorf("setting new RLIMITS_NOFILE: %w", err) + logrus.Debugf("Error getting RLIMITS_NOFILE: %s", err) + return } - return nil + + logrus.Debugf("Got RLIMITS_NOFILE: cur=%d, max=%d", rLimitNoFile.Cur, rLimitNoFile.Max) } func earlyInitHook() { - if err := setRLimitsNoFile(); err != nil { - fmt.Fprintf(os.Stderr, "Failed to set RLIMITS_NOFILE: %s\n", err.Error()) - } + checkRLimits() } diff --git a/cmd/podman/early_init_linux.go b/cmd/podman/early_init_linux.go index 1298fad401..3888f56083 100644 --- a/cmd/podman/early_init_linux.go +++ b/cmd/podman/early_init_linux.go @@ -1,27 +1,19 @@ package main import ( - "fmt" - "os" "syscall" - "github.com/containers/podman/v5/libpod/define" + "github.com/sirupsen/logrus" ) -func setRLimits() error { - rlimits := new(syscall.Rlimit) - rlimits.Cur = define.RLimitDefaultValue - rlimits.Max = define.RLimitDefaultValue - if err := syscall.Setrlimit(syscall.RLIMIT_NOFILE, rlimits); err != nil { - if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, rlimits); err != nil { - return fmt.Errorf("getting rlimits: %w", err) - } - rlimits.Cur = rlimits.Max - if err := syscall.Setrlimit(syscall.RLIMIT_NOFILE, rlimits); err != nil { - return fmt.Errorf("setting new rlimits: %w", err) - } +func checkRLimits() { + var rLimitNoFile syscall.Rlimit + if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rLimitNoFile); err != nil { + logrus.Debugf("Error getting RLIMITS_NOFILE: %s", err) + return } - return nil + + logrus.Debugf("Got RLIMITS_NOFILE: cur=%d, max=%d", rLimitNoFile.Cur, rLimitNoFile.Max) } func setUMask() { @@ -30,9 +22,6 @@ func setUMask() { } func earlyInitHook() { - if err := setRLimits(); err != nil { - fmt.Fprintf(os.Stderr, "Failed to set rlimits: %s\n", err.Error()) - } - + checkRLimits() setUMask() } diff --git a/test/e2e/exec_test.go b/test/e2e/exec_test.go index 2b5b411d5b..8163a8c9a9 100644 --- a/test/e2e/exec_test.go +++ b/test/e2e/exec_test.go @@ -6,7 +6,9 @@ import ( "fmt" "os" "path/filepath" + "strconv" "strings" + "syscall" . "github.com/containers/podman/v5/test/utils" . "github.com/onsi/ginkgo/v2" @@ -309,6 +311,39 @@ var _ = Describe("Podman exec", func() { Expect(session.OutputToString()).To(ContainSubstring("0000000000000000")) }) + It("podman exec limits host test", func() { + SkipIfRemote("This can only be used for local tests") + + var l syscall.Rlimit + + err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &l) + Expect(err).ToNot(HaveOccurred()) + + setup := podmanTest.RunTopContainerWithArgs("test1", []string{"--ulimit", "host"}) + setup.WaitWithDefaultTimeout() + Expect(setup).Should(ExitCleanly()) + + session := podmanTest.Podman([]string{"exec", "test1", "sh", "-c", "ulimit -H -n"}) + session.WaitWithDefaultTimeout() + Expect(session).Should(ExitCleanly()) + + ulimitCtrStr := strings.TrimSpace(session.OutputToString()) + ulimitCtr, err := strconv.ParseUint(ulimitCtrStr, 10, 0) + Expect(err).ToNot(HaveOccurred()) + + Expect(ulimitCtr).Should(BeNumerically("==", l.Max)) + + session = podmanTest.Podman([]string{"exec", "test1", "sh", "-c", "ulimit -S -n"}) + session.WaitWithDefaultTimeout() + Expect(session).Should(ExitCleanly()) + + ulimitCtrStr = strings.TrimSpace(session.OutputToString()) + ulimitCtr, err = strconv.ParseUint(ulimitCtrStr, 10, 0) + Expect(err).ToNot(HaveOccurred()) + + Expect(ulimitCtr).Should(BeNumerically("<", l.Max)) + }) + // #10927 ("no logs from conmon"), one of our nastiest flakes It("podman exec terminal doesn't hang", FlakeAttempts(3), func() { setup := podmanTest.Podman([]string{"run", "-dti", "--name", "test1", fedoraMinimal, "sleep", "+Inf"}) diff --git a/test/e2e/run_test.go b/test/e2e/run_test.go index b956a8a9cd..f22523f393 100644 --- a/test/e2e/run_test.go +++ b/test/e2e/run_test.go @@ -762,6 +762,16 @@ USER bin`, BB) Expect(err).ToNot(HaveOccurred()) Expect(ulimitCtr).Should(BeNumerically(">=", l.Max)) + + session = podmanTest.Podman([]string{"run", "--rm", "--ulimit", "host", fedoraMinimal, "ulimit", "-Sn"}) + session.WaitWithDefaultTimeout() + Expect(session).Should(ExitCleanly()) + + ulimitCtrStr = strings.TrimSpace(session.OutputToString()) + ulimitCtr, err = strconv.ParseUint(ulimitCtrStr, 10, 0) + Expect(err).ToNot(HaveOccurred()) + + Expect(ulimitCtr).Should(BeNumerically("<", l.Max)) }) It("podman run with cidfile", func() { diff --git a/test/e2e/toolbox_test.go b/test/e2e/toolbox_test.go index b24650b56c..d3a184e207 100644 --- a/test/e2e/toolbox_test.go +++ b/test/e2e/toolbox_test.go @@ -34,7 +34,6 @@ import ( "path" "strconv" "strings" - "syscall" "github.com/containers/podman/v5/libpod/define" . "github.com/containers/podman/v5/test/utils" @@ -60,42 +59,6 @@ var _ = Describe("Toolbox-specific testing", func() { Expect(session.OutputToString()).To(ContainSubstring("0:123")) }) - It("podman create --ulimit host + podman exec - correctly mirrors hosts ulimits", func() { - if podmanTest.RemoteTest { - Skip("Ulimit check does not work with a remote client") - } - info := GetHostDistributionInfo() - if info.Distribution == "debian" { - // "expected 1048576 to be >= 1073741816" - Skip("FIXME 2024-05-28 fails on debian, maybe because of systemd 256?") - } - var session *PodmanSessionIntegration - var containerHardLimit int - var rlimit syscall.Rlimit - var err error - - err = syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rlimit) - Expect(err).ToNot(HaveOccurred()) - GinkgoWriter.Printf("Expected value: %d", rlimit.Max) - - session = podmanTest.Podman([]string{"create", "--name", "test", "--ulimit", "host", ALPINE, - "sleep", "1000"}) - session.WaitWithDefaultTimeout() - Expect(session).Should(ExitCleanly()) - - session = podmanTest.Podman([]string{"start", "test"}) - session.WaitWithDefaultTimeout() - Expect(session).Should(ExitCleanly()) - - session = podmanTest.Podman([]string{"exec", "test", "sh", "-c", - "ulimit -H -n"}) - session.WaitWithDefaultTimeout() - Expect(session).Should(ExitCleanly()) - containerHardLimit, err = strconv.Atoi(strings.Trim(session.OutputToString(), "\n")) - Expect(err).ToNot(HaveOccurred()) - Expect(containerHardLimit).To(BeNumerically(">=", rlimit.Max)) - }) - It("podman create --ipc=host --pid=host + podman exec - correct shared memory limit size", func() { // Comparison of the size of /dev/shm on the host being equal to the one in // a container