Skip to content

Commit 3521fdb

Browse files
codexByron
andcommitted
fix!: Limit Commit and Tag parsing to a given gix_hash::Kind
Doing so adds conformity with Git, but also simplifies the parser which now only parse hex-hashes of a single valid length. Co-authored-by: Sebastian Thiel <sebastian.thiel@icloud.com>
1 parent 5127973 commit 3521fdb

20 files changed

Lines changed: 385 additions & 202 deletions

File tree

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-object/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ gix-hashtable = { version = "^0.14.0", path = "../gix-hashtable" }
4545
gix-validate = { version = "^0.11.1", path = "../gix-validate" }
4646
gix-actor = { version = "^0.40.1", path = "../gix-actor" }
4747
gix-date = { version = "^0.15.2", path = "../gix-date" }
48-
gix-error = { version = "^0.2.2", path = "../gix-error" }
4948
gix-utils = { version = "^0.3.2", path = "../gix-utils" }
5049

5150
itoa = "1.0.17"

gix-object/benches/decode_objects.rs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,29 @@ use std::hint::black_box;
33

44
fn parse_commit(c: &mut Criterion) {
55
c.bench_function("CommitRef(sig)", |b| {
6-
b.iter(|| black_box(gix_object::CommitRef::from_bytes(COMMIT_WITH_MULTI_LINE_HEADERS)).unwrap());
6+
b.iter(|| {
7+
black_box(gix_object::CommitRef::from_bytes(
8+
COMMIT_WITH_MULTI_LINE_HEADERS,
9+
gix_hash::Kind::Sha1,
10+
))
11+
.unwrap()
12+
});
713
});
814
c.bench_function("CommitRefIter(sig)", |b| {
9-
b.iter(|| black_box(gix_object::CommitRefIter::from_bytes(COMMIT_WITH_MULTI_LINE_HEADERS).count()));
15+
b.iter(|| {
16+
black_box(
17+
gix_object::CommitRefIter::from_bytes(COMMIT_WITH_MULTI_LINE_HEADERS, gix_hash::Kind::Sha1).count(),
18+
)
19+
});
1020
});
1121
}
1222

1323
fn parse_tag(c: &mut Criterion) {
1424
c.bench_function("TagRef(sig)", |b| {
15-
b.iter(|| black_box(gix_object::TagRef::from_bytes(TAG_WITH_SIGNATURE)).unwrap());
25+
b.iter(|| black_box(gix_object::TagRef::from_bytes(TAG_WITH_SIGNATURE, gix_hash::Kind::Sha1)).unwrap());
1626
});
1727
c.bench_function("TagRefIter(sig)", |b| {
18-
b.iter(|| black_box(gix_object::TagRefIter::from_bytes(TAG_WITH_SIGNATURE).count()));
28+
b.iter(|| black_box(gix_object::TagRefIter::from_bytes(TAG_WITH_SIGNATURE, gix_hash::Kind::Sha1).count()));
1929
});
2030
}
2131

gix-object/fuzz/fuzz_targets/fuzz_commit.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ use libfuzzer_sys::fuzz_target;
33
use std::hint::black_box;
44

55
fuzz_target!(|commit: &[u8]| {
6-
_ = black_box(gix_object::CommitRef::from_bytes(commit));
7-
_ = black_box(gix_object::CommitRefIter::from_bytes(commit)).count();
6+
_ = black_box(gix_object::CommitRef::from_bytes(commit, gix_hash::Kind::Sha1));
7+
_ = black_box(gix_object::CommitRefIter::from_bytes(commit, gix_hash::Kind::Sha1)).count();
8+
_ = black_box(gix_object::CommitRef::from_bytes(commit, gix_hash::Kind::Sha256));
9+
_ = black_box(gix_object::CommitRefIter::from_bytes(commit, gix_hash::Kind::Sha256)).count();
810
});

gix-object/fuzz/fuzz_targets/fuzz_tag.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ use libfuzzer_sys::fuzz_target;
44
use std::hint::black_box;
55

66
fuzz_target!(|tag: &[u8]| {
7-
_ = black_box(gix_object::TagRef::from_bytes(tag));
8-
_ = black_box(gix_object::TagRefIter::from_bytes(tag).count());
7+
_ = black_box(gix_object::TagRef::from_bytes(tag, gix_hash::Kind::Sha1));
8+
_ = black_box(gix_object::TagRefIter::from_bytes(tag, gix_hash::Kind::Sha1).count());
9+
_ = black_box(gix_object::TagRef::from_bytes(tag, gix_hash::Kind::Sha256));
10+
_ = black_box(gix_object::TagRefIter::from_bytes(tag, gix_hash::Kind::Sha256).count());
911
});

gix-object/src/commit/decode.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,13 @@ pub fn message<'a>(i: &mut &'a [u8]) -> ParseResult<&'a BStr> {
3737
/// This parser is not transactional as a whole: if a later required field or
3838
/// the final message parse fails, `i` may already have been advanced past
3939
/// earlier successfully parsed fields.
40-
pub fn commit<'a>(i: &mut &'a [u8]) -> ParseResult<CommitRef<'a>> {
41-
let tree = parse::header_field(i, b"tree", parse::hex_hash)?;
40+
pub fn commit<'a>(i: &mut &'a [u8], hash_kind: gix_hash::Kind) -> ParseResult<CommitRef<'a>> {
41+
let tree = parse::header_field(i, b"tree", |value| parse::hex_hash(value, hash_kind))?;
4242

4343
let mut parents = SmallVec::new();
4444
loop {
4545
let before = *i;
46-
match parse::header_field(i, b"parent", parse::hex_hash) {
46+
match parse::header_field(i, b"parent", |value| parse::hex_hash(value, hash_kind)) {
4747
Ok(parent) => parents.push(parent),
4848
Err(_) => {
4949
*i = before;

gix-object/src/commit/mod.rs

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,11 @@ mod write;
6262

6363
/// Lifecycle
6464
impl<'a> CommitRef<'a> {
65-
/// Deserialize a commit from the given `data` bytes while avoiding most allocations.
66-
pub fn from_bytes(mut data: &'a [u8]) -> Result<CommitRef<'a>, crate::decode::Error> {
65+
/// Deserialize a commit from the given `data` bytes while avoiding most allocations, using `hash_kind` to know
66+
/// what kind of hash to expect for validation.
67+
pub fn from_bytes(mut data: &'a [u8], hash_kind: gix_hash::Kind) -> Result<CommitRef<'a>, crate::decode::Error> {
6768
let input = &mut data;
68-
match decode::commit(input) {
69+
match decode::commit(input, hash_kind) {
6970
Ok(tag) => Ok(tag),
7071
Err(err) => Err(err),
7172
}
@@ -88,7 +89,10 @@ impl<'a> CommitRef<'a> {
8889

8990
/// Returns a convenient iterator over all extra headers.
9091
pub fn extra_headers(&self) -> ExtraHeaders<impl Iterator<Item = (&BStr, &BStr)>> {
91-
ExtraHeaders::new(self.extra_headers.iter().map(|(k, v)| (*k, v.as_ref())))
92+
ExtraHeaders::new(
93+
self.extra_headers.iter().map(|(k, v)| (*k, v.as_ref())),
94+
self.tree().kind(),
95+
)
9296
}
9397

9498
/// Return the author, with whitespace trimmed.
@@ -132,13 +136,17 @@ impl CommitRef<'_> {
132136
impl Commit {
133137
/// Returns a convenient iterator over all extra headers.
134138
pub fn extra_headers(&self) -> ExtraHeaders<impl Iterator<Item = (&BStr, &BStr)>> {
135-
ExtraHeaders::new(self.extra_headers.iter().map(|(k, v)| (k.as_bstr(), v.as_bstr())))
139+
ExtraHeaders::new(
140+
self.extra_headers.iter().map(|(k, v)| (k.as_bstr(), v.as_bstr())),
141+
self.tree.kind(),
142+
)
136143
}
137144
}
138145

139146
/// An iterator over extra headers in [owned][crate::Commit] and [borrowed][crate::CommitRef] commits.
140147
pub struct ExtraHeaders<I> {
141148
inner: I,
149+
hash_kind: gix_hash::Kind,
142150
}
143151

144152
/// Instantiation and convenience.
@@ -147,8 +155,8 @@ where
147155
I: Iterator<Item = (&'a BStr, &'a BStr)>,
148156
{
149157
/// Create a new instance from an iterator over tuples of (name, value) pairs.
150-
pub fn new(iter: I) -> Self {
151-
ExtraHeaders { inner: iter }
158+
pub fn new(iter: I, hash_kind: gix_hash::Kind) -> Self {
159+
ExtraHeaders { inner: iter, hash_kind }
152160
}
153161

154162
/// Find the _value_ of the _first_ header with the given `name`.
@@ -175,7 +183,8 @@ where
175183
/// A merge tag is a tag object embedded within the respective header field of a commit, making
176184
/// it a child object of sorts.
177185
pub fn mergetags(self) -> impl Iterator<Item = Result<TagRef<'a>, crate::decode::Error>> {
178-
self.find_all("mergetag").map(|b| TagRef::from_bytes(b))
186+
let hash_kind = self.hash_kind;
187+
self.find_all("mergetag").map(move |b| TagRef::from_bytes(b, hash_kind))
179188
}
180189

181190
/// Return the cryptographic signature provided by gpg/pgp verbatim.

gix-object/src/commit/ref_iter.rs

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,29 +30,35 @@ pub(crate) enum State {
3030

3131
/// Lifecycle
3232
impl<'a> CommitRefIter<'a> {
33-
/// Create a commit iterator from data.
34-
pub fn from_bytes(data: &'a [u8]) -> CommitRefIter<'a> {
33+
/// Create a commit iterator from the given `data`, using `hash_kind` to know
34+
/// what kind of hash to expect for validation.
35+
pub fn from_bytes(data: &'a [u8], hash_kind: gix_hash::Kind) -> CommitRefIter<'a> {
3536
CommitRefIter {
3637
data,
3738
state: State::default(),
39+
hash_kind,
3840
}
3941
}
4042
}
4143

4244
/// Access
4345
impl<'a> CommitRefIter<'a> {
4446
/// Parse `data` as commit and return its PGP signature, along with *all non-signature* data as [`SignedData`], or `None`
45-
/// if the commit isn't signed.
47+
/// if the commit isn't signed. All hashes in `data` are parsed as `hash_kind`.
4648
///
4749
/// This allows the caller to validate the signature by passing the signed data along with the signature back to the program
4850
/// that created it.
49-
pub fn signature(data: &'a [u8]) -> Result<Option<(Cow<'a, BStr>, SignedData<'a>)>, crate::decode::Error> {
51+
pub fn signature(
52+
data: &'a [u8],
53+
hash_kind: gix_hash::Kind,
54+
) -> Result<Option<(Cow<'a, BStr>, SignedData<'a>)>, crate::decode::Error> {
5055
let mut signature_and_range = None;
5156

5257
let raw_tokens = CommitRefIterRaw {
5358
data,
5459
state: State::default(),
5560
offset: 0,
61+
hash_kind,
5662
};
5763
for token in raw_tokens {
5864
let token = token?;
@@ -146,35 +152,43 @@ fn missing_field() -> crate::decode::Error {
146152

147153
impl<'a> CommitRefIter<'a> {
148154
#[inline]
149-
fn next_inner(mut i: &'a [u8], state: &mut State) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> {
155+
fn next_inner(
156+
mut i: &'a [u8],
157+
state: &mut State,
158+
hash_kind: gix_hash::Kind,
159+
) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> {
150160
let input = &mut i;
151-
match Self::next_inner_(input, state) {
161+
match Self::next_inner_(input, state, hash_kind) {
152162
Ok(token) => Ok((*input, token)),
153163
Err(err) => Err(err),
154164
}
155165
}
156166

157-
fn next_inner_(input: &mut &'a [u8], state: &mut State) -> Result<Token<'a>, crate::decode::Error> {
167+
fn next_inner_(
168+
input: &mut &'a [u8],
169+
state: &mut State,
170+
hash_kind: gix_hash::Kind,
171+
) -> Result<Token<'a>, crate::decode::Error> {
158172
use State::*;
159173
Ok(match state {
160174
Tree => {
161-
let tree = parse::header_field(input, b"tree", parse::hex_hash)?;
175+
let tree = parse::header_field(input, b"tree", |value| parse::hex_hash(value, hash_kind))?;
162176
*state = State::Parents;
163177
Token::Tree {
164178
id: ObjectId::from_hex(tree).expect("parsing validation"),
165179
}
166180
}
167181
Parents => {
168182
if input.starts_with(b"parent ") {
169-
let parent = parse::header_field(input, b"parent", parse::hex_hash)?;
183+
let parent = parse::header_field(input, b"parent", |value| parse::hex_hash(value, hash_kind))?;
170184
Token::Parent {
171185
id: ObjectId::from_hex(parent).expect("parsing validation"),
172186
}
173187
} else {
174188
*state = State::Signature {
175189
of: SignatureKind::Author,
176190
};
177-
Self::next_inner_(input, state)?
191+
Self::next_inner_(input, state, hash_kind)?
178192
}
179193
}
180194
Signature { ref mut of } => {
@@ -201,13 +215,13 @@ impl<'a> CommitRefIter<'a> {
201215
let encoding = parse::header_field(input, b"encoding", Ok)?;
202216
Token::Encoding(encoding.as_bstr())
203217
} else {
204-
Self::next_inner_(input, state)?
218+
Self::next_inner_(input, state, hash_kind)?
205219
}
206220
}
207221
ExtraHeaders => {
208222
if input.starts_with(b"\n") {
209223
*state = State::Message;
210-
Self::next_inner_(input, state)?
224+
Self::next_inner_(input, state, hash_kind)?
211225
} else {
212226
let before = *input;
213227
match parse::any_header_field_multi_line(input)
@@ -240,7 +254,7 @@ impl<'a> Iterator for CommitRefIter<'a> {
240254
if self.data.is_empty() {
241255
return None;
242256
}
243-
match Self::next_inner(self.data, &mut self.state) {
257+
match Self::next_inner(self.data, &mut self.state, self.hash_kind) {
244258
Ok((data, token)) => {
245259
self.data = data;
246260
Some(Ok(token))
@@ -258,6 +272,7 @@ struct CommitRefIterRaw<'a> {
258272
data: &'a [u8],
259273
state: State,
260274
offset: usize,
275+
hash_kind: gix_hash::Kind,
261276
}
262277

263278
impl<'a> Iterator for CommitRefIterRaw<'a> {
@@ -267,7 +282,7 @@ impl<'a> Iterator for CommitRefIterRaw<'a> {
267282
if self.data.is_empty() {
268283
return None;
269284
}
270-
match CommitRefIter::next_inner(self.data, &mut self.state) {
285+
match CommitRefIter::next_inner(self.data, &mut self.state, self.hash_kind) {
271286
Ok((remaining, token)) => {
272287
let consumed = self.data.len() - remaining.len();
273288
let start = self.offset;

gix-object/src/data.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ impl<'a> Data<'a> {
1616
Ok(match self.kind {
1717
Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(self.data, self.hash_kind)?),
1818
Kind::Blob => ObjectRef::Blob(BlobRef { data: self.data }),
19-
Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(self.data)?),
20-
Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(self.data)?),
19+
Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(self.data, self.hash_kind)?),
20+
Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(self.data, self.hash_kind)?),
2121
})
2222
}
2323

@@ -34,7 +34,7 @@ impl<'a> Data<'a> {
3434
/// `None` if this is not a commit object.
3535
pub fn try_into_commit_iter(self) -> Option<CommitRefIter<'a>> {
3636
match self.kind {
37-
Kind::Commit => Some(CommitRefIter::from_bytes(self.data)),
37+
Kind::Commit => Some(CommitRefIter::from_bytes(self.data, self.hash_kind)),
3838
_ => None,
3939
}
4040
}
@@ -43,7 +43,7 @@ impl<'a> Data<'a> {
4343
/// `None` if this is not a tag object.
4444
pub fn try_into_tag_iter(self) -> Option<TagRefIter<'a>> {
4545
match self.kind {
46-
Kind::Tag => Some(TagRefIter::from_bytes(self.data)),
46+
Kind::Tag => Some(TagRefIter::from_bytes(self.data, self.hash_kind)),
4747
_ => None,
4848
}
4949
}

gix-object/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ pub struct Blob {
109109
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
110110
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
111111
pub struct CommitRef<'a> {
112-
/// HEX hash of tree object we point to. Usually 40 bytes long.
112+
/// HEX hash of tree object we point to.
113113
///
114114
/// Use [`tree()`](CommitRef::tree()) to obtain a decoded version of it.
115115
#[cfg_attr(feature = "serde", serde(borrow))]
@@ -140,6 +140,7 @@ pub struct CommitRef<'a> {
140140
pub struct CommitRefIter<'a> {
141141
data: &'a [u8],
142142
state: commit::ref_iter::State,
143+
hash_kind: gix_hash::Kind,
143144
}
144145

145146
/// A mutable git commit, representing an annotated state of a working tree along with a reference to its historical commits.
@@ -194,6 +195,7 @@ pub struct TagRef<'a> {
194195
pub struct TagRefIter<'a> {
195196
data: &'a [u8],
196197
state: tag::ref_iter::State,
198+
hash_kind: gix_hash::Kind,
197199
}
198200

199201
/// A mutable git tag.

0 commit comments

Comments
 (0)