From 493a840345f869119c19f336239555c9228e1967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jiri=20Dan=C4=9Bk?= Date: Fri, 19 Sep 2025 15:46:39 +0200 Subject: [PATCH 1/3] NO-JIRA: tests(buildinputs) improve project root detection, also `Dockerfile.*` detection (#2522) --- scripts/buildinputs/buildinputs_test.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/buildinputs/buildinputs_test.go b/scripts/buildinputs/buildinputs_test.go index 31e81bef90..621257b6e7 100644 --- a/scripts/buildinputs/buildinputs_test.go +++ b/scripts/buildinputs/buildinputs_test.go @@ -4,13 +4,15 @@ import ( "encoding/json" "os" "path/filepath" + "runtime" + "strings" "testing" ) func globDockerfiles(dir string) ([]string, error) { files := make([]string, 0) err := filepath.Walk(dir, func(path string, f os.FileInfo, err error) error { - if filepath.Base(path) == "Dockerfile" { + if strings.HasPrefix(filepath.Base(path), "Dockerfile.") { files = append(files, path) } return nil @@ -21,8 +23,14 @@ func globDockerfiles(dir string) ([]string, error) { // TestParseAllDockerfiles checks there are no panics when processing all Dockerfiles we have func TestParseAllDockerfiles(t *testing.T) { - projectRoot := "../../" + _, currentFilePath, _, ok := runtime.Caller(0) + if !ok { + t.Fatal("failed to get caller information") + } + + projectRoot := filepath.Join(filepath.Dir(currentFilePath), "../../") dockerfiles := noErr2(globDockerfiles(projectRoot)) + t.Logf("found %d Dockerfiles in %s", len(dockerfiles), projectRoot) if len(dockerfiles) < 6 { t.Fatalf("not enough Dockerfiles found, got %+v", dockerfiles) From 82c55fab37d745557b18909c5060b09729f12d52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jiri=20Dan=C4=9Bk?= Date: Fri, 19 Sep 2025 15:47:23 +0200 Subject: [PATCH 2/3] NO-JIRA: tests(buildinputs) implement a `heredoc.Doc()` helper to dedent backtick-strings (#2522) --- scripts/buildinputs/buildinputs_test.go | 1 + scripts/buildinputs/go.mod | 1 + scripts/buildinputs/heredoc.go | 62 +++++++++++++++++++++++++ scripts/buildinputs/heredoc_test.go | 23 +++++++++ 4 files changed, 87 insertions(+) create mode 100644 scripts/buildinputs/heredoc.go create mode 100644 scripts/buildinputs/heredoc_test.go diff --git a/scripts/buildinputs/buildinputs_test.go b/scripts/buildinputs/buildinputs_test.go index 621257b6e7..c7a3bdf66b 100644 --- a/scripts/buildinputs/buildinputs_test.go +++ b/scripts/buildinputs/buildinputs_test.go @@ -4,6 +4,7 @@ import ( "encoding/json" "os" "path/filepath" + "reflect" "runtime" "strings" "testing" diff --git a/scripts/buildinputs/go.mod b/scripts/buildinputs/go.mod index 8317badab6..0d50fe425b 100644 --- a/scripts/buildinputs/go.mod +++ b/scripts/buildinputs/go.mod @@ -4,6 +4,7 @@ go 1.24 require ( github.com/containerd/platforms v1.0.0-rc.1 + github.com/google/go-cmp v0.7.0 github.com/moby/buildkit v0.23.2 github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/image-spec v1.1.1 diff --git a/scripts/buildinputs/heredoc.go b/scripts/buildinputs/heredoc.go new file mode 100644 index 0000000000..c47f06dff0 --- /dev/null +++ b/scripts/buildinputs/heredoc.go @@ -0,0 +1,62 @@ +// llm-powered reimplementation of github.com/MakeNowJust/heredoc +package main + +import ( + "math" + "strings" +) + +// Doc removes common leading whitespace from every line in a string. +func Doc(s string) string { + lines := strings.Split(s, "\n") + minIndent := math.MaxInt32 + + // First, find the minimum indentation of non-empty lines. + for _, line := range lines { + if len(strings.TrimSpace(line)) == 0 { + continue // Skip empty or whitespace-only lines + } + + indent := 0 + for _, r := range line { + if r == ' ' || r == '\t' { + indent++ + } else { + break + } + } + + if indent < minIndent { + minIndent = indent + } + } + + // If no common indentation is found, return the original string. + if minIndent == math.MaxInt32 { + return s + } + + // Create a builder to efficiently construct the new string. + var builder strings.Builder + for i, line := range lines { + if i == 0 && line == "" { + continue // Skip the first line if it's empty. + } + if len(strings.TrimSpace(line)) == 0 { + if i != len(lines)-1 { + // Unless this is the last line, in which case we drop trailing whitespace. + builder.WriteString(line) // Keep empty lines as they are. + } + } else { + // Trim the minimum common indentation from the start of the line. + builder.WriteString(line[minIndent:]) + } + + // Add the newline back, except for the very last line. + if i < len(lines)-1 { + builder.WriteString("\n") + } + } + + return builder.String() +} diff --git a/scripts/buildinputs/heredoc_test.go b/scripts/buildinputs/heredoc_test.go new file mode 100644 index 0000000000..271bf428c5 --- /dev/null +++ b/scripts/buildinputs/heredoc_test.go @@ -0,0 +1,23 @@ +package main + +import ( + "testing" + + "github.com/google/go-cmp/cmp" +) + +func TestDoc(t *testing.T) { + input := ` + a + b + ` + diff(t, "a\nb\n", Doc(input)) +} + +// diff errors with a diff between expected and actual if they are not equal. +func diff(t *testing.T, expected, actual string) { + t.Helper() + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("mismatch (-want +got):\n%s", diff) + } +} From d9d8cb70982be29f44f235c731fa0c626681fb2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jiri=20Dan=C4=9Bk?= Date: Fri, 19 Sep 2025 17:58:44 +0200 Subject: [PATCH 3/3] Issue #2521: fix(scripts/buildinputs): fix build input detection to deal with mounts (#2522) ``` + /Users/jdanek/IdeaProjects/notebooks/scripts/sandbox.py --dockerfile codeserver/ubi9-python-3.12/Dockerfile.cpu --platform linux/amd64 -- podman build --no-cache --platform=linux/amd64 --label release=2025a --tag quay.io/opendatahub/workbench-images:codeserver-ubi9-python-3.12-2025a_20250919 --file codeserver/ubi9-python-3.12/Dockerfile.cpu --build-arg BASE_IMAGE=registry.access.redhat.com/ubi9/python-312:latest '{};' __file__='/Users/jdanek/IdeaProjects/notebooks/scripts/sandbox.py' started with args=Namespace(dockerfile=PosixPath('codeserver/ubi9-python-3.12/Dockerfile.cpu'), platform='linux/amd64', remaining=['--', 'podman', 'build', '--no-cache', '--platform=linux/amd64', '--label', 'release=2025a', '--tag', 'quay.io/opendatahub/workbench-images:codeserver-ubi9-python-3.12-2025a_20250919', '--file', 'codeserver/ubi9-python-3.12/Dockerfile.cpu', '--build-arg', 'BASE_IMAGE=registry.access.redhat.com/ubi9/python-312:latest', '{};']) prereqs=[PosixPath('codeserver/ubi9-python-3.12/test'), PosixPath('foo'), PosixPath('tmp/test_log.txt')] Traceback (most recent call last): File "/Users/jdanek/IdeaProjects/notebooks/scripts/sandbox.py", line 91, in sys.exit(main()) ^^^^^^ File "/Users/jdanek/IdeaProjects/notebooks/scripts/sandbox.py", line 49, in main setup_sandbox(prereqs, pathlib.Path(tmpdir)) File "/Users/jdanek/IdeaProjects/notebooks/scripts/sandbox.py", line 87, in setup_sandbox shutil.copy(dep, tmpdir / dep.parent) File "/opt/homebrew/Cellar/python@3.12/3.12.11/Frameworks/Python.framework/Versions/3.12/lib/python3.12/shutil.py", line 435, in copy copyfile(src, dst, follow_symlinks=follow_symlinks) File "/opt/homebrew/Cellar/python@3.12/3.12.11/Frameworks/Python.framework/Versions/3.12/lib/python3.12/shutil.py", line 260, in copyfile with open(src, 'rb') as fsrc: ^^^^^^^^^^^^^^^ FileNotFoundError: [Errno 2] No such file or directory: 'foo' gmake: *** [Makefile:177: codeserver-ubi9-python-3.12] Error 1 ``` --- scripts/buildinputs/buildinputs.go | 8 ++- scripts/buildinputs/buildinputs_test.go | 68 +++++++++++++++++++++--- scripts/buildinputs/dockerfile.go | 69 +++++++++++++++++-------- scripts/buildinputs/go.mod | 10 ++-- scripts/buildinputs/go.sum | 6 +++ scripts/sandbox.py | 2 +- 6 files changed, 130 insertions(+), 33 deletions(-) diff --git a/scripts/buildinputs/buildinputs.go b/scripts/buildinputs/buildinputs.go index 0303316392..7c915113b6 100644 --- a/scripts/buildinputs/buildinputs.go +++ b/scripts/buildinputs/buildinputs.go @@ -1,6 +1,7 @@ package main import ( + "encoding/json" "flag" "fmt" "os" @@ -26,6 +27,11 @@ func main() { flag.Parse() for _, dockerfile := range flag.Args() { deps := getDockerfileDeps(dockerfile, targetArch) - fmt.Println(deps) + // nil slice encodes to null, which is not what we want + if deps == nil { + deps = []string{} + } + encoder := json.NewEncoder(os.Stdout) + noErr(encoder.Encode(deps)) } } diff --git a/scripts/buildinputs/buildinputs_test.go b/scripts/buildinputs/buildinputs_test.go index c7a3bdf66b..338136b8c5 100644 --- a/scripts/buildinputs/buildinputs_test.go +++ b/scripts/buildinputs/buildinputs_test.go @@ -1,11 +1,11 @@ package main import ( - "encoding/json" "os" "path/filepath" "reflect" "runtime" + "slices" "strings" "testing" ) @@ -40,14 +40,15 @@ func TestParseAllDockerfiles(t *testing.T) { for _, dockerfile := range dockerfiles { t.Run(dockerfile, func(t *testing.T) { result := getDockerfileDeps(dockerfile, "amd64") - if result == "" { + if len(result) == 0 { // no deps in the dockerfile return } - data := make([]string, 0) - noErr(json.Unmarshal([]byte(result), &data)) - for _, path := range data { - stat := noErr2(os.Stat(filepath.Join(projectRoot, path))) + for _, path := range result { + stat, err := os.Stat(filepath.Join(projectRoot, path)) + if err != nil { + t.Fatal(err) + } if stat.IsDir() { // log this very interesting observation t.Logf("dockerfile copies in a whole directory: %s", path) @@ -56,3 +57,58 @@ func TestParseAllDockerfiles(t *testing.T) { }) } } + +// TestParseDockerfileWithBindMount checks for a bug where a Dockerfile with RUN --mount=type=bind,src=foo,dst=bar would report it has no inputs +func TestParseDockerfileWithBindMount(t *testing.T) { + dockerfile := filepath.Join(t.TempDir(), "Dockerfile") + // language=Dockerfile + noErr(os.WriteFile(dockerfile, []byte(Doc(` + FROM codeserver AS tests + ARG CODESERVER_SOURCE_CODE=codeserver/ubi9-python-3.12 + COPY ${CODESERVER_SOURCE_CODE}/test /tmp/test + RUN --mount=type=tmpfs,target=/opt/app-root/src --mount=type=bind,src=foo,dst=bar <<'EOF' + set -Eeuxo pipefail + python3 /tmp/test/test_startup.py |& tee /tmp/test_log.txt + EOF + `)), 0644)) + + //dockerfile = "/Users/jdanek/IdeaProjects/notebooks/jupyter/rocm/pytorch/ubi9-python-3.12/Dockerfile.rocm" + + result := getDockerfileDeps(dockerfile, "amd64") + expected := []string{"codeserver/ubi9-python-3.12/test", "foo"} + if !reflect.DeepEqual( + slices.Sorted(slices.Values(result)), + slices.Sorted(slices.Values(expected)), + ) { + t.Errorf("expected %v but got %v", expected, result) + } +} + +func TestParseFileWithStageCopy(t *testing.T) { + dockerfile := filepath.Join(t.TempDir(), "Dockerfile") + // language=Dockerfile + noErr(os.WriteFile(dockerfile, []byte(Doc(` + FROM codeserver + COPY --from=registry.access.redhat.com/ubi9/ubi /etc/yum.repos.d/ubi.repo /etc/yum.repos.d/ubi.repo + `)), 0644)) + + result := getDockerfileDeps(dockerfile, "amd64") + if len(result) != 0 { + t.Fatalf("unexpected deps reported for the dockerfile: %s", result) + } +} + +func TestParseFileWithStageMount(t *testing.T) { + dockerfile := filepath.Join(t.TempDir(), "Dockerfile") + // language=Dockerfile + noErr(os.WriteFile(dockerfile, []byte(Doc(` + FROM javabuilder + RUN --mount=type=bind,from=build,source=/.m2_repository,target=/.m2_repository \ + mvn package + `)), 0644)) + + result := getDockerfileDeps(dockerfile, "amd64") + if len(result) != 0 { + t.Fatalf("unexpected deps reported for the dockerfile: %s", result) + } +} diff --git a/scripts/buildinputs/dockerfile.go b/scripts/buildinputs/dockerfile.go index bcdc76a520..015c79b09e 100644 --- a/scripts/buildinputs/dockerfile.go +++ b/scripts/buildinputs/dockerfile.go @@ -3,9 +3,9 @@ package main import ( "context" "encoding/json" - "fmt" "log" "os" + "path/filepath" "strings" "github.com/containerd/platforms" @@ -20,7 +20,7 @@ import ( "github.com/pkg/errors" ) -func getDockerfileDeps(dockerfile string, targetArch string) string { +func getDockerfileDeps(dockerfile string, targetArch string) []string { ctx := context.Background() data := noErr2(os.ReadFile(dockerfile)) @@ -47,42 +47,69 @@ func getDockerfileDeps(dockerfile string, targetArch string) string { return getOpSourceFollowPaths(definition) } -func getOpSourceFollowPaths(definition *llb.Definition) string { +func getOpSourceFollowPaths(definition *llb.Definition) []string { // https://earthly.dev/blog/compiling-containers-dockerfiles-llvm-and-buildkit/ // https://stackoverflow.com/questions/73067660/what-exactly-is-the-frontend-and-backend-of-docker-buildkit - ops := make([]llbOp, 0) + opsByDigest := make(map[digest.Digest]llbOp, len(definition.Def)) for _, dt := range definition.Def { var op pb.Op if err := op.UnmarshalVT(dt); err != nil { panic("failed to parse op") } dgst := digest.FromBytes(dt) - ent := llbOp{Op: &op, Digest: dgst, OpMetadata: definition.Metadata[dgst].ToPB()} - ops = append(ops, ent) + ent := llbOp{ + Op: &op, + Digest: dgst, + OpMetadata: definition.Metadata[dgst].ToPB(), + } + opsByDigest[dgst] = ent } - var result string = "" - for _, op := range ops { - switch op := op.Op.Op.(type) { - case *pb.Op_Source: - if strings.HasPrefix(op.Source.Identifier, "docker-image://") { - // no-op - } else if strings.HasPrefix(op.Source.Identifier, "local://") { - paths := op.Source.Attrs[pb.AttrFollowPaths] - // TODO treat result as a set of strings to get unique set across all layers - // this code "works" as is because it seems the terminal layer is the last one processed - which - // contains all the referenced files - but treating this as a set seems safer (?) - result = paths - log.Printf("found paths: %s", paths) - } else { - panic(fmt.Errorf("unexpected prefix %v", op.Source.Identifier)) + var result []string + for _, opDef := range opsByDigest { + switch top := opDef.Op.Op.(type) { + // https://github.com/moby/buildkit/blob/v0.24/solver/pb/ops.proto#L308-L325 + case *pb.Op_File: + for _, a := range top.File.Actions { + // NOTE CAREFULLY: FileActionCopy copies files from secondaryInput on top of input + if cpy := a.GetCopy(); cpy != nil { + if inputIsFromLocalContext(a.SecondaryInput, opDef.Op.Inputs, opsByDigest) { + result = append(result, cleanPath(cpy.Src)) + } + } + } + case *pb.Op_Exec: + for _, m := range top.Exec.Mounts { + if inputIsFromLocalContext(m.Input, opDef.Op.Inputs, opsByDigest) { + result = append(result, cleanPath(m.Selector)) + } } } } + return result } +func cleanPath(path string) string { + return noErr2(filepath.Rel("/", filepath.Clean(path))) +} + +func inputIsFromLocalContext(input int64, inputs []*pb.Input, opsByDigest map[digest.Digest]llbOp) bool { + // input is -1 if the input is a FROM scratch or equivalent + if input == -1 { + return false + } + + srcDigest := digest.Digest(inputs[input].Digest) + sourceOp := opsByDigest[srcDigest] + if src, ok := sourceOp.Op.Op.(*pb.Op_Source); ok { + // local://context is the primary context, but there may be multiple named contexts + return strings.HasPrefix(src.Source.Identifier, "local://") + } + return false +} + // llbOp holds data for a single loaded LLB op type llbOp struct { Op *pb.Op diff --git a/scripts/buildinputs/go.mod b/scripts/buildinputs/go.mod index 0d50fe425b..347790d5b8 100644 --- a/scripts/buildinputs/go.mod +++ b/scripts/buildinputs/go.mod @@ -1,11 +1,13 @@ module dockerfile -go 1.24 +go 1.24.0 + +toolchain go1.24.5 require ( github.com/containerd/platforms v1.0.0-rc.1 github.com/google/go-cmp v0.7.0 - github.com/moby/buildkit v0.23.2 + github.com/moby/buildkit v0.24.0 github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/image-spec v1.1.1 github.com/pkg/errors v0.9.1 @@ -13,7 +15,7 @@ require ( require ( github.com/agext/levenshtein v1.2.3 // indirect - github.com/containerd/containerd/v2 v2.1.3 // indirect + github.com/containerd/containerd/v2 v2.1.4 // indirect github.com/containerd/errdefs v1.0.0 // indirect github.com/containerd/log v0.1.0 // indirect github.com/containerd/ttrpc v1.2.7 // indirect @@ -54,7 +56,7 @@ require ( golang.org/x/crypto v0.40.0 // indirect golang.org/x/net v0.42.0 // indirect golang.org/x/sync v0.16.0 // indirect - golang.org/x/sys v0.34.0 // indirect + golang.org/x/sys v0.36.0 // indirect golang.org/x/text v0.27.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250715232539-7130f93afb79 // indirect google.golang.org/grpc v1.74.0 // indirect diff --git a/scripts/buildinputs/go.sum b/scripts/buildinputs/go.sum index da927f22ee..a9405578e3 100644 --- a/scripts/buildinputs/go.sum +++ b/scripts/buildinputs/go.sum @@ -6,6 +6,8 @@ github.com/codahale/rfc6979 v0.0.0-20141003034818-6a90f24967eb h1:EDmT6Q9Zs+SbUo github.com/codahale/rfc6979 v0.0.0-20141003034818-6a90f24967eb/go.mod h1:ZjrT6AXHbDs86ZSdt/osfBi5qfexBrKUdONk989Wnk4= github.com/containerd/containerd/v2 v2.1.3 h1:eMD2SLcIQPdMlnlNF6fatlrlRLAeDaiGPGwmRKLZKNs= github.com/containerd/containerd/v2 v2.1.3/go.mod h1:8C5QV9djwsYDNhxfTCFjWtTBZrqjditQ4/ghHSYjnHM= +github.com/containerd/containerd/v2 v2.1.4 h1:/hXWjiSFd6ftrBOBGfAZ6T30LJcx1dBjdKEeI8xucKQ= +github.com/containerd/containerd/v2 v2.1.4/go.mod h1:8C5QV9djwsYDNhxfTCFjWtTBZrqjditQ4/ghHSYjnHM= github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= @@ -55,6 +57,8 @@ github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zt github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/moby/buildkit v0.23.2 h1:gt/dkfcpgTXKx+B9I310kV767hhVqTvEyxGgI3mqsGQ= github.com/moby/buildkit v0.23.2/go.mod h1:iEjAfPQKIuO+8y6OcInInvzqTMiKMbb2RdJz1K/95a0= +github.com/moby/buildkit v0.24.0 h1:qYfTl7W1SIJzWDIDCcPT8FboHIZCYfi++wvySi3eyFE= +github.com/moby/buildkit v0.24.0/go.mod h1:4qovICAdR2H4C7+EGMRva5zgHW1gyhT4/flHI7F5F9k= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg= @@ -137,6 +141,8 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= +golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.33.0 h1:NuFncQrRcaRvVmgRkvM3j/F00gWIAlcmlB8ACEKmGIg= golang.org/x/term v0.33.0/go.mod h1:s18+ql9tYWp1IfpV9DmCtQDDSRBUjKaw9M1eAv5UeF0= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/scripts/sandbox.py b/scripts/sandbox.py index f7a9752e86..a8592b09d9 100755 --- a/scripts/sandbox.py +++ b/scripts/sandbox.py @@ -66,7 +66,7 @@ def buildinputs( stdout = subprocess.check_output([ROOT_DIR / "bin/buildinputs", str(dockerfile)], text=True, cwd=ROOT_DIR, env={"TARGETPLATFORM": platform, **os.environ}) - prereqs = [pathlib.Path(file) for file in json.loads(stdout)] if stdout != "\n" else [] + prereqs = [pathlib.Path(file) for file in json.loads(stdout)] print(f"{prereqs=}") return prereqs