Skip to content

Commit 594efa5

Browse files
Implement verification of Merkle inclusion proof (#38)
* sigstore: Initial implementation of verify command * _verify, _cli: Check the cert for the signer email * _verify: Add proper checks * setup: Add PyOpenSSL as a dependency * _verify: Remove redundant newline * Fix lint * Fix comment formatting * Return None for consistency * Use click files instead of paths and minor fixes * treewide: embed key material, refactor to accomodate * gitignore: only ignore junk in the root * sigstore: add fulcio root * Format data correctly for Rekor endpoint * Fix Rekor API calls in verification * Fix type checking * Use the updated Fulcio root key * Use CTFE key as a file properly and fix type hints * Add more progress feedback * Use pydantic and remove custom `from_dict` helper * Add type hints to verify API * Add pydantic to setup * merkle, _verify: Initial attempt at inclusion proof verification * merkle: Get root calculation working * Cleanup * Verify all matching entries * merkle: Documentation * Formatting Co-authored-by: William Woodruff <[email protected]>
1 parent 1e836ad commit 594efa5

File tree

3 files changed

+141
-5
lines changed

3 files changed

+141
-5
lines changed

sigstore/_internal/merkle.py

+108-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,115 @@
11
"""
22
Utilities for verifying proof-of-inclusion within Rekor's Merkle Tree.
3+
4+
This code is based off Google's Trillian Merkle Tree implementation which Cosign uses to validate
5+
Rekor entries.
6+
7+
The data format for the Merkle tree nodes is described in IETF's RFC 6962.
38
"""
49

5-
from sigstore._internal.rekor import RekorInclusionProof
10+
import base64
11+
import hashlib
12+
import struct
13+
from typing import List, Tuple
614

15+
from sigstore._internal.rekor import RekorEntry, RekorInclusionProof
716

8-
def verify_merkle_inclusion(inclusion_proof: RekorInclusionProof) -> None:
17+
18+
class InvalidInclusionProofError(Exception):
919
pass
20+
21+
22+
LEAF_HASH_PREFIX = 0
23+
NODE_HASH_PREFIX = 1
24+
25+
26+
def _decomp_inclusion_proof(index: int, size: int) -> Tuple[int, int]:
27+
"""
28+
Breaks down inclusion proof for a leaf at the specified |index| in a tree of the specified
29+
|size| into 2 components. The splitting point between them is where paths to leaves |index| and
30+
|size-1| diverge.
31+
32+
Returns lengths of the bottom and upper proof parts correspondingly. The sum of the two
33+
determines the correct length of the inclusion proof.
34+
"""
35+
36+
inner = (index ^ (size - 1)).bit_length()
37+
border = bin(index >> inner).count("1")
38+
return inner, border
39+
40+
41+
def _chain_inner(seed: bytes, hashes: List[str], log_index: int) -> bytes:
42+
"""
43+
Computes a subtree hash for a node on or below the tree's right border. Assumes |proof| hashes
44+
are ordered from lower levels to upper, and |seed| is the initial subtree/leaf hash on the path
45+
located at the specified |index| on its level.
46+
"""
47+
48+
for i in range(len(hashes)):
49+
h = bytes.fromhex(hashes[i])
50+
if (log_index >> i) & 1 == 0:
51+
seed = _hash_children(seed, h)
52+
else:
53+
seed = _hash_children(h, seed)
54+
return seed
55+
56+
57+
def _chain_border_right(seed: bytes, hashes: List[str]) -> bytes:
58+
"""
59+
Chains proof hashes along tree borders. This differs from inner chaining because |proof|
60+
contains only left-side subtree hashes.
61+
"""
62+
63+
for h in hashes:
64+
seed = _hash_children(bytes.fromhex(h), seed)
65+
return seed
66+
67+
68+
def _hash_children(lhs: bytes, rhs: bytes) -> bytes:
69+
pattern = f"B{len(lhs)}s{len(rhs)}s"
70+
data = struct.pack(pattern, NODE_HASH_PREFIX, lhs, rhs)
71+
return hashlib.sha256(data).digest()
72+
73+
74+
def _hash_leaf(leaf: bytes) -> bytes:
75+
pattern = f"B{len(leaf)}s"
76+
data = struct.pack(pattern, LEAF_HASH_PREFIX, leaf)
77+
return hashlib.sha256(data).digest()
78+
79+
80+
def verify_merkle_inclusion(
81+
inclusion_proof: RekorInclusionProof, entry: RekorEntry
82+
) -> None:
83+
"""Verify the Merkle Inclusion Proof for a given Rekor entry"""
84+
85+
# Figure out which subset of hashes corresponds to the inner and border nodes.
86+
inner, border = _decomp_inclusion_proof(
87+
inclusion_proof.log_index, inclusion_proof.tree_size
88+
)
89+
90+
# Check against the number of hashes.
91+
if len(inclusion_proof.hashes) != (inner + border):
92+
raise InvalidInclusionProofError(
93+
f"Inclusion proof has wrong size: expected {inner + border}, got "
94+
f"{len(inclusion_proof.hashes)}"
95+
)
96+
97+
# The new entry's hash isn't included in the inclusion proof so we should calculate this
98+
# ourselves.
99+
leaf_hash: bytes = _hash_leaf(base64.b64decode(entry.body))
100+
101+
# Now chain the hashes belonging to the inner and border portions. We should expect the
102+
# calculated hash to match the root hash.
103+
intermediate_result: bytes = _chain_inner(
104+
leaf_hash, inclusion_proof.hashes[:inner], inclusion_proof.log_index
105+
)
106+
107+
calc_hash: str = _chain_border_right(
108+
intermediate_result, inclusion_proof.hashes[inner:]
109+
).hex()
110+
111+
if calc_hash != inclusion_proof.root_hash:
112+
raise InvalidInclusionProofError(
113+
f"Inclusion proof contains invalid root hash: expected {inclusion_proof}, calculated "
114+
f"{calc_hash}"
115+
)

sigstore/_internal/rekor/_client.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from urllib.parse import urljoin
1212

1313
import requests
14-
from pydantic import BaseModel, Field
14+
from pydantic import BaseModel, Field, validator
1515

1616
DEFAULT_REKOR_URL = "https://rekor.sigstore.dev/api/v1/"
1717

@@ -52,6 +52,27 @@ class RekorInclusionProof(BaseModel):
5252
tree_size: int = Field(..., alias="treeSize")
5353
hashes: List[str] = Field(..., alias="hashes")
5454

55+
@validator("log_index")
56+
def log_index_positive(cls, v):
57+
if v < 0:
58+
raise ValueError(f"Inclusion proof has invalid log index: {v} < 0")
59+
return v
60+
61+
@validator("tree_size")
62+
def tree_size_positive(cls, v):
63+
if v < 0:
64+
raise ValueError(f"Inclusion proof has invalid tree size: {v} < 0")
65+
return v
66+
67+
@validator("tree_size")
68+
def log_index_within_tree_size(cls, v, values, **kwargs):
69+
if v <= values["log_index"]:
70+
raise ValueError(
71+
"Inclusion proof has log index greater than or equal to tree size: "
72+
f"{v} <= {values['log_index']}"
73+
)
74+
return v
75+
5576

5677
class RekorClientError(Exception):
5778
pass

sigstore/_verify.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@
1919
from cryptography.x509.oid import ExtendedKeyUsageOID
2020
from OpenSSL.crypto import X509, X509Store, X509StoreContext
2121

22-
from sigstore._internal.merkle import verify_merkle_inclusion
22+
from sigstore._internal.merkle import (
23+
InvalidInclusionProofError,
24+
verify_merkle_inclusion,
25+
)
2326
from sigstore._internal.rekor import (
2427
RekorClient,
2528
RekorEntry,
@@ -139,7 +142,13 @@ def verify(
139142
inclusion_proof = RekorInclusionProof.parse_obj(
140143
entry.verification.get("inclusionProof")
141144
)
142-
verify_merkle_inclusion(inclusion_proof)
145+
try:
146+
verify_merkle_inclusion(inclusion_proof, entry)
147+
except InvalidInclusionProofError as inval_inclusion_proof:
148+
output(
149+
f"Failed to validate Rekor entry's inclusion proof: {inval_inclusion_proof}"
150+
)
151+
continue
143152

144153
# 5) Verify the Signed Entry Timestamp (SET) supplied by Rekor for this artifact
145154
try:

0 commit comments

Comments
 (0)