Skip to content

Commit 1d19340

Browse files
committed
Implement more TODOs in oci-validate code
I'm much happier with this interface now -- it can properly handle an image with attestations now, for example.
1 parent 2b04d9e commit 1d19340

File tree

2 files changed

+162
-69
lines changed

2 files changed

+162
-69
lines changed

helpers/oci-validate.sh

Lines changed: 101 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env bash
22
set -Eeuo pipefail
33

4-
# given an OCI image layout (https://github.com/opencontainers/image-spec/blob/v1.1.1/image-layout.md), verifies all descriptors as much as possible (digest matches content, size, some media types, layer diff_ids, etc)
4+
# given an OCI image layout (https://github.com/opencontainers/image-spec/blob/v1.1.1/image-layout.md), verifies all descriptors as much as possible (digest matches content, size, media types, layer diff_ids, etc)
55

66
layout="$1"; shift
77

@@ -23,85 +23,131 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp '
2323
| empty
2424
' oci-layout
2525

26-
# TODO this is all rubbish; it needs more thought (the jq functions it invokes are pretty solid now though)
26+
# TODO (recursively?) validate subject descriptors in here somewhere 🤔
2727

2828
descriptor() {
2929
local file="$1"; shift # "blobs/sha256/xxx"
30-
echo "blob: $file"
31-
local digest="$1"; shift # "sha256:xxx"
32-
local size="$1"; shift # "123"
33-
local algo="${digest%%:*}" # sha256
34-
local hash="${digest#$algo:}" # xxx
35-
local diskSize
36-
[ "$algo" = 'sha256' ] # TODO error message
37-
diskSize="$(stat --dereference --format '%s' "$file")"
38-
[ "$size" = "$diskSize" ] # TODO error message
39-
"${algo}sum" <<<"$hash *$file" --check --quiet --strict -
30+
local desc; desc="$(cat)"
31+
local shell
32+
shell="$(jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --raw-output '
33+
include "validate";
34+
include "oci";
35+
validate_one
36+
| validate_oci_descriptor
37+
| (
38+
@sh "local algo=\(
39+
.digest
40+
| split(":")[0]
41+
| validate_IN(.; "sha256", "sha512") # TODO more algorithms? need more tools on the host
42+
)",
43+
44+
@sh "local data=\(
45+
if has("data") then
46+
.data
47+
else " " end # empty string is valid base64 (which we should validate), but spaces are not, so we can use a single space to detect "data not set"
48+
)",
49+
50+
empty
51+
)
52+
')"
53+
eval "$shell"
54+
local digest size dataDigest= dataSize=
55+
digest="$("${algo}sum" "$file" | cut -d' ' -f1)"
56+
digest="$algo:$digest"
57+
size="$(stat --dereference --format '%s' "$file")"
58+
if [ "$data" != ' ' ]; then
59+
dataDigest="$(base64 <<<"$data" -d | "${algo}sum" | cut -d' ' -f1)"
60+
dataDigest="$algo:$dataDigest"
61+
dataSize="$(base64 <<<"$data" -d | wc --bytes)"
62+
# TODO *technically* we could get clever here and pass `base64 -d` to something like `tee >(wc --bytes) >(dig="$(sha256sum | cut -d' ' -f1)" && echo "sha256:$dig" && false) > /dev/null` to avoid parsing the base64 twice, but then failure cases are less likely to be caught, so it's safer to simply redecode (and we can't decode into a variable because this might be binary data *and* bash will do newline munging in both directions)
63+
fi
64+
jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --arg digest "$digest" --arg size "$size" --arg dataDigest "$dataDigest" --arg dataSize "$dataSize" '
65+
include "validate";
66+
validate_one
67+
| validate_IN(.digest; $digest)
68+
| validate_IN(.size; $size | tonumber)
69+
| if has("data") then
70+
validate(.data;
71+
$digest == $dataDigest
72+
and $size == $dataSize
73+
; "(decoded) data has size \($dataSize) and digest \($dataDigest) (expected \($size) and \($digest))")
74+
else . end
75+
| empty
76+
'
4077
}
4178

42-
images() {
43-
echo "image: $*"
79+
# TODO validate config (diff_ids, history, platform - gotta carry *two* levels of descriptors for that, and decompress all the layers 🙊)
80+
# TODO validate provenance/SBOM layer contents?
81+
82+
image() {
83+
local file="$1"; shift
84+
echo "image: $file"
85+
local desc; desc="$(cat)"
86+
descriptor <<<"$desc" "$file"
4487
local shell
4588
shell="$(
46-
jq -L"$BASHBREW_META_SCRIPTS" --arg expected "$#" --slurp --raw-output '
89+
jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --raw-output '
4790
include "validate";
4891
include "oci";
49-
# TODO technically, this would pass if one file is empty and another file has two documents in it (since it is counting the total), so that is not great, but probably is not a real problem
50-
validate_length(.; $expected | tonumber)
51-
| map(validate_oci_image)
92+
validate_length(.; 2)
93+
| .[0] as $desc
94+
| .[1]
95+
| validate_oci_image({
96+
imageAttestation: IN($desc.annotations["vnd.docker.reference.type"]; "attestation-manifest"),
97+
})
98+
| if $desc then
99+
validate_IN(.mediaType; $desc.mediaType)
100+
| validate_IN(.artifactType; $desc.artifactType)
101+
else . end
52102
| (
53103
(
54-
.[].config, .[].layers[]
55-
| @sh "descriptor \("blobs/\(.digest | sub(":"; "/"))") \(.digest) \(.size)"
56-
# TODO data?
104+
.config, .layers[]
105+
| @sh "descriptor <<<\(tojson) \(.digest | "blobs/\(sub(":"; "/"))")"
57106
),
58107
59108
empty # trailing comma
60109
)
61-
' "$@"
110+
' /dev/stdin "$file"
62111
)"
63112
eval "$shell"
64113
}
65114

66-
# TODO pass descriptor values down so we can validate that they match (.mediaType, .artifactType, .platform across *two* levels index->manifest->config), similar to .data
67-
# TODO disallow urls completely?
68-
69-
indexes() {
70-
echo "index: $*"
115+
index() {
116+
local file="$1"; shift
117+
echo "index: $file"
118+
local desc; desc="$(cat)"
119+
if [ "$desc" != 'null' ]; then
120+
descriptor <<<"$desc" "$file"
121+
fi
71122
local shell
72123
shell="$(
73-
jq -L"$BASHBREW_META_SCRIPTS" --arg expected "$#" --slurp --raw-output '
124+
jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --raw-output '
74125
include "validate";
75126
include "oci";
76-
# TODO technically, this would pass if one file is empty and another file has two documents in it (since it is counting the total), so that is not great, but probably is not a real problem
77-
validate_length(.; $expected | tonumber)
78-
| map(validate_oci_index)
127+
validate_length(.; 2)
128+
| .[0] as $desc
129+
| .[1]
130+
| validate_oci_index({
131+
indexPlatformsOptional: (input_filename == "index.json"),
132+
})
133+
| if $desc then
134+
validate_IN(.mediaType; $desc.mediaType)
135+
| validate_IN(.artifactType; $desc.artifactType)
136+
else . end
137+
| .manifests[]
79138
| (
80-
(
81-
.[].manifests[]
82-
| @sh "descriptor \("blobs/\(.digest | sub(":"; "/"))") \(.digest) \(.size)"
83-
# TODO data?
84-
),
85-
86-
(
87-
[ .[].manifests[] | select(IN(.mediaType; media_types_image)) | .digest ]
88-
| if length > 0 then
89-
"images \(map("blobs/\(sub(":"; "/"))" | @sh) | join(" "))"
90-
else empty end
91-
),
92-
93-
(
94-
[ .[].manifests[] | select(IN(.mediaType; media_types_index)) | .digest ]
95-
| if length > 0 then
96-
"indexes \(map("blobs/\(sub(":"; "/"))" | @sh) | join(" "))"
97-
else empty end
98-
),
99-
100-
empty # trailing comma
101-
)
102-
' "$@"
139+
.mediaType
140+
| if IN(media_types_index) then
141+
"index"
142+
elif IN(media_types_image) then
143+
"image"
144+
else
145+
error("UNSUPPORTED MEDIA TYPE: \(.)")
146+
end
147+
) + @sh " <<<\(tojson) \(.digest | "blobs/\(sub(":"; "/"))")"
148+
' /dev/stdin "$file"
103149
)"
104150
eval "$shell"
105151
}
106152

107-
indexes index.json
153+
index <<<'null' index.json

oci.jq

Lines changed: 61 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ def validate_oci_digest:
165165
| {
166166
"sha256": [ 64 ],
167167
"sha512": [ 128 ],
168+
# TODO "blake3": [ 64 ], # https://github.com/opencontainers/image-spec/pull/1240
168169
} as $lengths
169170
| validate_IN($dig.algorithm; $lengths | keys[])
170171
| validate_length($dig.encoded; $lengths[$dig.algorithm][])
@@ -191,7 +192,11 @@ def validate_oci_descriptor:
191192
| validate(.size; . == floor; "size must be whole")
192193
| validate(.size; . == ceil; "size must be whole")
193194

194-
# TODO urls?
195+
| if has("urls") then
196+
validate(.urls; type == "array")
197+
| validate(.urls[]; type == "string")
198+
| validate_length(.urls; 0) # TODO this intentionally contradicts the above lines -- are there cases where we should allow urls?
199+
else . end
195200

196201
| validate_oci_annotations_haver
197202

@@ -205,7 +210,9 @@ def validate_oci_descriptor:
205210
# someday, maybe we can validate that .data matches .digest here (needs more jq functionality, including and especially the ability to deal with non-UTF8 binary data from base64 and perform sha256 over it)
206211
else . end
207212

208-
# TODO artifactType?
213+
| if has("artifactType") then
214+
validate(.artifactType; type == "string")
215+
else . end
209216

210217
# https://github.com/opencontainers/image-spec/blob/v1.1.1/image-index.md#image-index-property-descriptions
211218
| if has("platform") then
@@ -240,47 +247,87 @@ def validate_oci_subject_haver:
240247
;
241248

242249
# https://github.com/opencontainers/image-spec/blob/v1.1.1/image-index.md
243-
def validate_oci_index:
250+
def validate_oci_index($opt):
244251
validate_IN(type; "object")
245252
| validate_IN(.schemaVersion; 2)
246-
| validate_IN(.mediaType; media_types_index) # TODO allow "null" here too? (https://github.com/opencontainers/image-spec/security/advisories/GHSA-77vh-xpmg-72qh)
247-
# TODO artifactType?
253+
| validate_IN(.mediaType; media_types_index)
254+
| if has("artifactType") then
255+
validate(.artifactType; type == "string")
256+
| validate_IN(.artifactType; null) # TODO acceptable values? (this check intentionally contradicts the one above so artifactType generates an error)
257+
else . end
248258
| validate(.manifests[];
249259
validate_oci_descriptor
250260
| validate_IN(.mediaType; media_types_index, media_types_image)
251261
| validate(.size; . > 2; "manifest size must be at *least* big enough for {} plus *some* content")
252262
# https://github.com/opencontainers/distribution-spec/pull/293#issuecomment-1452780554
253263
| validate(.size; . <= 4 * 1024 * 1024; "manifest size must be 4MiB (\(4 * 1024 * 1024)) or less")
264+
265+
# slightly stricter enforcement than "validate_oci_descriptor" by default
266+
| if $opt.indexPlatformsOptional then . else
267+
validate(.platform; type == "object")
268+
end
269+
270+
# https://github.com/moby/buildkit/blob/c6145c2423de48f891862ac02f9b2653864d3c9e/docs/attestations/attestation-storage.md
271+
| if .annotations | has("vnd.docker.reference.type") or has("vnd.docker.reference.digest") then
272+
validate_IN(.mediaType; media_type_oci_image)
273+
| validate_IN(.artifactType; null, "application/vnd.docker.attestation.manifest.v1+json") # https://github.com/moby/buildkit/pull/5573/files#r2069525281
274+
| validate_IN(.annotations["vnd.docker.reference.type"]; "attestation-manifest")
275+
| validate(.annotations["vnd.docker.reference.digest"]; validate_oci_digest)
276+
| validate_IN(.platform.os; "unknown")
277+
| validate_IN(.platform.architecture; "unknown")
278+
else
279+
validate_IN(.artifactType; null)
280+
end
254281
)
255-
# TODO if any manifest has "vnd.docker.reference.digest", validate the subject exists in the list
282+
| if any(.manifests[].annotations; has("vnd.docker.reference.digest")) then
283+
[ .manifests[].digest ] as $digests
284+
| validate_IN(.manifests[].annotations["vnd.docker.reference.digest"]; null, $digests[])
285+
else . end
256286
| validate_oci_subject_haver
257287
| validate_oci_annotations_haver
258288
;
289+
def validate_oci_index: validate_oci_index({});
259290

260291
# https://github.com/opencontainers/image-spec/blob/v1.1.1/manifest.md
261-
def validate_oci_image:
292+
def validate_oci_image($opt):
262293
validate_IN(type; "object")
263294
| validate_IN(.schemaVersion; 2)
264-
| validate_IN(.mediaType; media_types_image) # TODO allow "null" here too? (https://github.com/opencontainers/image-spec/security/advisories/GHSA-77vh-xpmg-72qh)
265-
# TODO artifactType (but only selectively / certain values)
295+
| validate_IN(.mediaType; media_types_image)
296+
| if has("artifactType") then
297+
validate(.artifactType; type == "string")
298+
| validate_IN(.artifactType;
299+
if $opt.imageAttestation then
300+
"application/vnd.docker.attestation.manifest.v1+json" # https://github.com/moby/buildkit/pull/5573/files#r2069525281
301+
else null end # (this check intentionally contradicts the one above so artifactType normally generates an error)
302+
)
303+
else . end
266304
| validate(.config;
267305
validate_oci_descriptor
268306
| validate(.size; . >= 2; "config must be at *least* big enough for {}")
269307
| validate_IN(.mediaType; media_types_config)
308+
| validate_IN(.artifactType; null)
270309
)
271310
| validate(.layers[];
272311
validate_oci_descriptor
273-
| validate_IN(.mediaType; media_types_layer) # TODO allow "application/vnd.in-toto+json" and friends, but selectively 🤔
312+
| if $opt.imageAttestation then
313+
# https://github.com/moby/buildkit/blob/c6145c2423de48f891862ac02f9b2653864d3c9e/docs/attestations/attestation-storage.md
314+
validate_IN(.mediaType; "application/vnd.in-toto+json")
315+
| validate_IN(.annotations["in-toto.io/predicate-type"];
316+
"https://slsa.dev/provenance/v0.2",
317+
"https://spdx.dev/Document",
318+
empty # trailing comma
319+
)
320+
else
321+
validate_IN(.mediaType; media_types_layer)
322+
end
323+
| validate_IN(.artifactType; null)
274324
)
275325
| validate_oci_subject_haver
276326
| validate_oci_annotations_haver
277327
;
328+
def validate_oci_image: validate_oci_image({});
278329

279330
# https://github.com/opencontainers/image-spec/blob/v1.1.1/image-layout.md#oci-layout-file
280331
def validate_oci_layout_file:
281332
validate_IN(.imageLayoutVersion; "1.0.0")
282333
;
283-
284-
# TODO validate digest, size of blobs (*somewhere*, probably not here - this is all "cheap" validations / version+ordering+format assumption validations)
285-
# TODO if .data, validate that somehow too (size, digest); https://github.com/jqlang/jq/issues/1116#issuecomment-2515814615
286-
# TODO also we should validate that the length of every/any manifest is <= 4MiB (https://github.com/opencontainers/distribution-spec/pull/293#issuecomment-1452780554)

0 commit comments

Comments
 (0)