Skip to content

Commit fab86e6

Browse files
authored
Merge pull request #125 from infosiftr/oci-validate
Implement more TODOs in `oci-validate` code
2 parents a116b91 + 0f2312b commit fab86e6

File tree

4 files changed

+184
-81
lines changed

4 files changed

+184
-81
lines changed

helpers/oci-import.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp --tab '
9898
else . end
9999
100100
| .mediaType //= media_type_oci_index # TODO index normalize function? just force this to be set/valid instead?
101-
| validate_oci_index
101+
| validate_oci_index({ indexPlatformsOptional: true })
102102
| validate_length(.manifests; 1) # TODO allow upstream attestation in the future?
103103
104104
# purge maintainer-provided URLs / annotations (https://github.com/docker-library/bashbrew/blob/4e0ea8d8aba49d54daf22bd8415fabba65dc83ee/cmd/bashbrew/oci-builder.go#L146-L147)
@@ -123,7 +123,6 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp --tab '
123123
$build
124124
| .source.arches[.build.arch].platform
125125
)
126-
# TODO .manifests[1].platform ?
127126
128127
# inject our build annotations
129128
| .manifests[0].annotations += (
@@ -136,8 +135,6 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp --tab '
136135
' "$file" | tee index.json.new
137136
mv -f index.json.new index.json
138137

139-
# TODO "crane validate" is definitely interesting here -- it essentially validates all the descriptors recursively, including diff_ids, but it only supports "remote" or "tarball" (which refers to the *old* "docker save" tarball format), so isn't useful here, but we need to do basically that exact work
140-
141138
# now that "index.json" represents the exact index we want to push, let's push it down into a blob and make a new appropriate "index.json" for "crane push"
142139
# TODO we probably want/need some "traverse/manipulate an OCI layout" helpers 😭
143140
mediaType="$(jq --raw-output '.mediaType' index.json)"
@@ -159,3 +156,6 @@ jq -L"$BASHBREW_META_SCRIPTS" --null-input --tab '
159156
}
160157
| normalize_manifest
161158
' > index.json
159+
160+
# TODO move this further out
161+
"$BASHBREW_META_SCRIPTS/helpers/oci-validate.sh" .

helpers/oci-sbom.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ cd "$output"
3838
imageIndex="$(jq -L"$BASHBREW_META_SCRIPTS" --raw-output '
3939
include "oci";
4040
include "validate";
41-
validate_oci_index
41+
validate_oci_index({ indexPlatformsOptional: true })
4242
| validate_length(.manifests; 1)
4343
| validate_IN(.manifests[0].mediaType; media_types_index)
4444
| .manifests[0].digest
@@ -91,7 +91,7 @@ done
9191
sbomIndex="$(jq -L"$BASHBREW_META_SCRIPTS" --raw-output '
9292
include "oci";
9393
include "validate";
94-
validate_oci_index
94+
validate_oci_index({ indexPlatformsOptional: true })
9595
| validate_length(.manifests; 1)
9696
| validate_IN(.manifests[0].mediaType; media_types_index)
9797
| .manifests[0].digest
@@ -146,3 +146,6 @@ jq -L"$BASHBREW_META_SCRIPTS" --null-input --tab '
146146
}
147147
| normalize_manifest
148148
' > index.json
149+
150+
# TODO move this further out
151+
"$BASHBREW_META_SCRIPTS/helpers/oci-validate.sh" .

helpers/oci-validate.sh

Lines changed: 104 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env bash
22
set -Eeuo pipefail
33

4-
# given an OCI image layout (https://github.com/opencontainers/image-spec/blob/v1.1.1/image-layout.md), verifies all descriptors as much as possible (digest matches content, size, some media types, layer diff_ids, etc)
4+
# given an OCI image layout (https://github.com/opencontainers/image-spec/blob/v1.1.1/image-layout.md), verifies all descriptors as much as possible (digest matches content, size, media types, layer diff_ids, etc)
55

66
layout="$1"; shift
77

@@ -23,85 +23,134 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp '
2323
| empty
2424
' oci-layout
2525

26-
# TODO this is all rubbish; it needs more thought (the jq functions it invokes are pretty solid now though)
26+
# TODO (recursively?) validate subject descriptors in here somewhere 🤔
27+
28+
# TODO handle objects that *only* exist in the "data" field too 🤔 https://github.com/docker-library/meta-scripts/pull/125#discussion_r2070633122
29+
# maybe descriptor takes a "--data" flag that then returns the input descriptor, but enhanced with a "data" field so the other functions can use that to extract the data instead of relying on files?
2730

2831
descriptor() {
2932
local file="$1"; shift # "blobs/sha256/xxx"
30-
echo "blob: $file"
31-
local digest="$1"; shift # "sha256:xxx"
32-
local size="$1"; shift # "123"
33-
local algo="${digest%%:*}" # sha256
34-
local hash="${digest#$algo:}" # xxx
35-
local diskSize
36-
[ "$algo" = 'sha256' ] # TODO error message
37-
diskSize="$(stat --dereference --format '%s' "$file")"
38-
[ "$size" = "$diskSize" ] # TODO error message
39-
"${algo}sum" <<<"$hash *$file" --check --quiet --strict -
33+
local desc; desc="$(cat)"
34+
local shell
35+
shell="$(jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --raw-output '
36+
include "validate";
37+
include "oci";
38+
validate_one
39+
| validate_oci_descriptor
40+
| (
41+
@sh "local algo=\(
42+
.digest
43+
| split(":")[0]
44+
| validate_IN(.; "sha256", "sha512") # TODO more algorithms? need more tools on the host
45+
)",
46+
47+
@sh "local data=\(
48+
if has("data") then
49+
.data
50+
else " " end # empty string is valid base64 (which we should validate), but spaces are not, so we can use a single space to detect "data not set"
51+
)",
52+
53+
empty
54+
)
55+
')"
56+
eval "$shell"
57+
local digest size dataDigest= dataSize=
58+
digest="$("${algo}sum" "$file" | cut -d' ' -f1)"
59+
digest="$algo:$digest"
60+
size="$(stat --dereference --format '%s' "$file")"
61+
if [ "$data" != ' ' ]; then
62+
dataDigest="$(base64 <<<"$data" -d | "${algo}sum" | cut -d' ' -f1)"
63+
dataDigest="$algo:$dataDigest"
64+
dataSize="$(base64 <<<"$data" -d | wc --bytes)"
65+
# TODO *technically* we could get clever here and pass `base64 -d` to something like `tee >(wc --bytes) >(dig="$(sha256sum | cut -d' ' -f1)" && echo "sha256:$dig" && false) > /dev/null` to avoid parsing the base64 twice, but then failure cases are less likely to be caught, so it's safer to simply redecode (and we can't decode into a variable because this might be binary data *and* bash will do newline munging in both directions)
66+
fi
67+
jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --arg digest "$digest" --arg size "$size" --arg dataDigest "$dataDigest" --arg dataSize "$dataSize" '
68+
include "validate";
69+
validate_one
70+
| validate_IN(.digest; $digest)
71+
| validate_IN(.size; $size | tonumber)
72+
| if has("data") then
73+
validate(.data;
74+
$digest == $dataDigest
75+
and $size == $dataSize
76+
; "(decoded) data has size \($dataSize) and digest \($dataDigest) (expected \($size) and \($digest))")
77+
else . end
78+
| empty
79+
'
4080
}
4181

42-
images() {
43-
echo "image: $*"
82+
# TODO validate config (diff_ids, history, platform - gotta carry *two* levels of descriptors for that, and decompress all the layers 🙊)
83+
# TODO validate provenance/SBOM layer contents?
84+
85+
image() {
86+
local file="$1"; shift
87+
echo "image: $file"
88+
local desc; desc="$(cat)"
89+
descriptor <<<"$desc" "$file"
4490
local shell
4591
shell="$(
46-
jq -L"$BASHBREW_META_SCRIPTS" --arg expected "$#" --slurp --raw-output '
92+
jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --raw-output '
4793
include "validate";
4894
include "oci";
49-
# TODO technically, this would pass if one file is empty and another file has two documents in it (since it is counting the total), so that is not great, but probably is not a real problem
50-
validate_length(.; $expected | tonumber)
51-
| map(validate_oci_image)
95+
validate_length(.; 2)
96+
| .[0] as $desc
97+
| .[1]
98+
| validate_oci_image({
99+
imageAttestation: IN($desc.annotations["vnd.docker.reference.type"]; "attestation-manifest"),
100+
})
101+
| if $desc then
102+
validate_IN(.mediaType; $desc.mediaType)
103+
| validate_IN(.artifactType; $desc.artifactType)
104+
else . end
52105
| (
53106
(
54-
.[].config, .[].layers[]
55-
| @sh "descriptor \("blobs/\(.digest | sub(":"; "/"))") \(.digest) \(.size)"
56-
# TODO data?
107+
.config, .layers[]
108+
| @sh "descriptor <<<\(tojson) \(.digest | "blobs/\(sub(":"; "/"))")"
57109
),
58110
59111
empty # trailing comma
60112
)
61-
' "$@"
113+
' /dev/stdin "$file"
62114
)"
63115
eval "$shell"
64116
}
65117

66-
# TODO pass descriptor values down so we can validate that they match (.mediaType, .artifactType, .platform across *two* levels index->manifest->config), similar to .data
67-
# TODO disallow urls completely?
68-
69-
indexes() {
70-
echo "index: $*"
118+
index() {
119+
local file="$1"; shift
120+
echo "index: $file"
121+
local desc; desc="$(cat)"
122+
if [ "$desc" != 'null' ]; then
123+
descriptor <<<"$desc" "$file"
124+
fi
71125
local shell
72126
shell="$(
73-
jq -L"$BASHBREW_META_SCRIPTS" --arg expected "$#" --slurp --raw-output '
127+
jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --raw-output '
74128
include "validate";
75129
include "oci";
76-
# TODO technically, this would pass if one file is empty and another file has two documents in it (since it is counting the total), so that is not great, but probably is not a real problem
77-
validate_length(.; $expected | tonumber)
78-
| map(validate_oci_index)
130+
validate_length(.; 2)
131+
| .[0] as $desc
132+
| .[1]
133+
| validate_oci_index({
134+
indexPlatformsOptional: (input_filename == "index.json"),
135+
})
136+
| if $desc then
137+
validate_IN(.mediaType; $desc.mediaType)
138+
| validate_IN(.artifactType; $desc.artifactType)
139+
else . end
140+
| .manifests[]
79141
| (
80-
(
81-
.[].manifests[]
82-
| @sh "descriptor \("blobs/\(.digest | sub(":"; "/"))") \(.digest) \(.size)"
83-
# TODO data?
84-
),
85-
86-
(
87-
[ .[].manifests[] | select(IN(.mediaType; media_types_image)) | .digest ]
88-
| if length > 0 then
89-
"images \(map("blobs/\(sub(":"; "/"))" | @sh) | join(" "))"
90-
else empty end
91-
),
92-
93-
(
94-
[ .[].manifests[] | select(IN(.mediaType; media_types_index)) | .digest ]
95-
| if length > 0 then
96-
"indexes \(map("blobs/\(sub(":"; "/"))" | @sh) | join(" "))"
97-
else empty end
98-
),
99-
100-
empty # trailing comma
101-
)
102-
' "$@"
142+
.mediaType
143+
| if IN(media_types_index) then
144+
"index"
145+
elif IN(media_types_image) then
146+
"image"
147+
else
148+
error("UNSUPPORTED MEDIA TYPE: \(.)")
149+
end
150+
) + @sh " <<<\(tojson) \(.digest | "blobs/\(sub(":"; "/"))")"
151+
' /dev/stdin "$file"
103152
)"
104153
eval "$shell"
105154
}
106155

107-
indexes index.json
156+
index <<<'null' index.json

0 commit comments

Comments
 (0)