diff --git a/Cargo.lock b/Cargo.lock index f4df53efe51..49e54c58380 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1497,7 +1497,6 @@ dependencies = [ "gix-testtools", "pretty_assertions", "serde", - "winnow", ] [[package]] @@ -1623,12 +1622,10 @@ dependencies = [ "gix-path", "gix-ref", "gix-sec", - "memchr", "serde", "smallvec", "thiserror 2.0.18", "unicode-bom", - "winnow", ] [[package]] @@ -1928,7 +1925,6 @@ dependencies = [ "gix-imara-diff", "gix-object", "hashbrown 0.16.1", - "memchr", ] [[package]] @@ -2085,7 +2081,6 @@ dependencies = [ "smallvec", "termtree", "thiserror 2.0.18", - "winnow", ] [[package]] @@ -2260,7 +2255,6 @@ dependencies = [ "nonempty", "serde", "thiserror 2.0.18", - "winnow", ] [[package]] @@ -2296,7 +2290,6 @@ dependencies = [ "memmap2", "serde", "thiserror 2.0.18", - "winnow", ] [[package]] @@ -2497,7 +2490,6 @@ dependencies = [ "serial_test", "tar", "tempfile", - "winnow", "xz2", ] @@ -6106,9 +6098,6 @@ name = "winnow" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2ee1708bef14716a11bae175f579062d4554d95be2c6829f518df847b7b3fdd0" -dependencies = [ - "memchr", -] [[package]] name = "winreg" diff --git a/gitoxide-core/src/hours/mod.rs b/gitoxide-core/src/hours/mod.rs index 40fd0f2955a..d07e5c6e367 100644 --- a/gitoxide-core/src/hours/mod.rs +++ b/gitoxide-core/src/hours/mod.rs @@ -71,10 +71,10 @@ impl ParsedIdentity<'_> { fn parse_trailer_identity(trailer: gix::objs::commit::message::body::TrailerRef<'_>) -> Option> { match trailer.value { - std::borrow::Cow::Borrowed(value) => IdentityRef::from_bytes::(value.as_ref()) + std::borrow::Cow::Borrowed(value) => IdentityRef::from_bytes(value.as_ref()) .ok() .map(|identity| ParsedIdentity::Borrowed(identity.trim())), - std::borrow::Cow::Owned(value) => IdentityRef::from_bytes::(value.as_ref()) + std::borrow::Cow::Owned(value) => IdentityRef::from_bytes(value.as_ref()) .ok() .map(|identity| ParsedIdentity::Owned(identity.trim().to_owned())), } @@ -83,8 +83,9 @@ fn parse_trailer_identity(trailer: gix::objs::commit::message::body::TrailerRef< /// Return `(commit_author, [commit_author, co_authors...])`. Use the `commit_author` for easy access to the commit author itself. fn commit_author_identities( commit_data: &[u8], + hash_kind: gix::hash::Kind, ) -> Result<(gix::actor::SignatureRef<'_>, SmallVec<[ParsedIdentity<'_>; 2]>), gix::objs::decode::Error> { - let commit = gix::objs::CommitRef::from_bytes(commit_data)?; + let commit = gix::objs::CommitRef::from_bytes(commit_data, hash_kind)?; let author = commit.author()?.trim(); let mut authors = smallvec![ParsedIdentity::Borrowed(gix::actor::IdentityRef::from(author))]; authors.extend(commit.co_authored_by_trailers().filter_map(parse_trailer_identity)); @@ -130,7 +131,7 @@ where let extract_signatures = scope.spawn(move || -> anyhow::Result> { let mut out = Vec::new(); for (commit_idx, commit_data) in rx { - if let Ok((commit_author, authors)) = commit_author_identities(&commit_data) { + if let Ok((commit_author, authors)) = commit_author_identities(&commit_data, commit_id.kind()) { let mut string_ref = |s: &[u8]| -> &'static BStr { match string_heap.get(s) { Some(n) => n.as_bstr(), @@ -445,7 +446,7 @@ body\n\ \n\ Co-authored-by: Second Author \n\ Co-authored-by: Third Author \n"; - let (author, authors) = commit_author_identities(commit).expect("valid commit"); + let (author, authors) = commit_author_identities(commit, gix::hash::Kind::Sha1).expect("valid commit"); assert_eq!(author.time, "1710000000 +0000"); assert_eq!( authors @@ -478,7 +479,7 @@ committer Main Author 1710000000 +0000\n\ subject\n\ \n\ Co-authored-by: not a signature\n"; - let (_, authors) = commit_author_identities(commit).expect("valid commit"); + let (_, authors) = commit_author_identities(commit, gix::hash::Kind::Sha1).expect("valid commit"); assert_eq!(authors.len(), 1); assert_eq!(authors[0].name(), "Main Author".as_bytes().as_bstr()); assert_eq!(authors[0].email(), "main@example.com".as_bytes().as_bstr()); diff --git a/gitoxide-core/src/query/engine/update.rs b/gitoxide-core/src/query/engine/update.rs index 818dfff3ec7..f9d76d8633c 100644 --- a/gitoxide-core/src/query/engine/update.rs +++ b/gitoxide-core/src/query/engine/update.rs @@ -395,7 +395,7 @@ pub fn update( self.progress.inc(); if self.known_commits.binary_search(&id.to_owned()).is_err() { let res = { - let mut parents = gix::objs::CommitRefIter::from_bytes(obj.data).parent_ids(); + let mut parents = gix::objs::CommitRefIter::from_bytes(obj.data, obj.hash_kind).parent_ids(); let res = parents.next().map(|first_parent| (Some(first_parent), id.to_owned())); match parents.next() { Some(_) => None, diff --git a/gitoxide-core/src/repository/mailmap.rs b/gitoxide-core/src/repository/mailmap.rs index 9252bdafde0..b8d20107db1 100644 --- a/gitoxide-core/src/repository/mailmap.rs +++ b/gitoxide-core/src/repository/mailmap.rs @@ -71,7 +71,7 @@ pub fn check( let mut buf = Vec::new(); for contact in contacts { - let actor = match gix::actor::IdentityRef::from_bytes::<()>(&contact) { + let actor = match gix::actor::IdentityRef::from_bytes(&contact) { Ok(a) => a, Err(_) => { let Some(email) = contact diff --git a/gix-actor/Cargo.toml b/gix-actor/Cargo.toml index e1c2fc2504f..d8015c26d88 100644 --- a/gix-actor/Cargo.toml +++ b/gix-actor/Cargo.toml @@ -26,7 +26,6 @@ bstr = { version = "1.12.0", default-features = false, features = [ "std", "unicode", ] } -winnow = { version = "1.0.0", features = ["simd"] } serde = { version = "1.0.114", optional = true, default-features = false, features = [ "derive", ] } diff --git a/gix-actor/fuzz/fuzz_targets/actors.rs b/gix-actor/fuzz/fuzz_targets/actors.rs index b0773d21c0a..33d9a132491 100644 --- a/gix-actor/fuzz/fuzz_targets/actors.rs +++ b/gix-actor/fuzz/fuzz_targets/actors.rs @@ -1,6 +1,6 @@ #![no_main] -use gix_actor::{IdentityRef, Signature, SignatureRef}; +use gix_actor::{IdentityRef, SignatureRef}; use libfuzzer_sys::fuzz_target; use std::hint::black_box; @@ -24,10 +24,10 @@ fn inspect_signature(signature: SignatureRef<'_>) { } fuzz_target!(|input: &[u8]| { - if let Ok(identity) = IdentityRef::from_bytes::<()>(input) { + if let Ok(identity) = IdentityRef::from_bytes(input) { inspect_identity(identity); } - if let Ok(signature) = SignatureRef::from_bytes::<()>(input) { + if let Ok(signature) = SignatureRef::from_bytes(input) { inspect_signature(signature); } }); diff --git a/gix-actor/src/identity.rs b/gix-actor/src/identity.rs index f6fbac1d4a7..e900ec3ea5c 100644 --- a/gix-actor/src/identity.rs +++ b/gix-actor/src/identity.rs @@ -1,15 +1,21 @@ use bstr::ByteSlice; -use winnow::{error::StrContext, prelude::*}; use crate::{signature::decode, Identity, IdentityRef}; impl<'a> IdentityRef<'a> { /// Deserialize an identity from the given `data`. - pub fn from_bytes(mut data: &'a [u8]) -> Result> - where - E: winnow::error::ParserError<&'a [u8]> + winnow::error::AddContext<&'a [u8], StrContext>, - { - decode::identity.parse_next(&mut data) + /// + /// Typical input is `Name 1700000000 +0000`. + pub fn from_bytes(mut data: &'a [u8]) -> Result { + Self::from_bytes_consuming(&mut data) + } + + /// Deserialize an identity from the given `data` and advance it past the identity. + /// + /// Typical input is `Name 1700000000 +0000`; on success, + /// `data` points to the bytes immediately after the closing `>`. + pub fn from_bytes_consuming(data: &mut &'a [u8]) -> Result { + decode::identity(data) } /// Create an owned instance from this shared one. diff --git a/gix-actor/src/lib.rs b/gix-actor/src/lib.rs index 92db2ee6cb2..f65cd0ef3f3 100644 --- a/gix-actor/src/lib.rs +++ b/gix-actor/src/lib.rs @@ -5,13 +5,13 @@ //! ``` //! use gix_actor::{IdentityRef, SignatureRef}; //! -//! let actor = IdentityRef::from_bytes::<()>(b" Taylor Example < taylor@example.com >") +//! let actor = IdentityRef::from_bytes(b" Taylor Example < taylor@example.com >") //! .unwrap() //! .trim(); //! assert_eq!(actor.name, "Taylor Example"); //! assert_eq!(actor.email, "taylor@example.com"); //! -//! let signature = SignatureRef::from_bytes::<()>(b"Taylor Example 1711398853 +0800") +//! let signature = SignatureRef::from_bytes(b"Taylor Example 1711398853 +0800") //! .unwrap() //! .trim(); //! assert_eq!(signature.actor(), actor); diff --git a/gix-actor/src/signature/decode.rs b/gix-actor/src/signature/decode.rs index 87f5680c3ba..271503c3e7a 100644 --- a/gix-actor/src/signature/decode.rs +++ b/gix-actor/src/signature/decode.rs @@ -1,98 +1,68 @@ pub(crate) mod function { use bstr::ByteSlice; - use winnow::{ - combinator::{opt, separated_pair}, - error::{AddContext, ErrMode, ParserError, StrContext}, - prelude::*, - stream::{AsChar, Stream}, - token::take_while, - }; + use gix_error::ValidationError; use crate::{IdentityRef, SignatureRef}; - /// Parse a signature from the bytes input `i` using `nom`. - pub fn decode<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( - i: &mut &'a [u8], - ) -> ModalResult, E> { - separated_pair( - identity, - opt(b" "), - opt(( - take_while(0.., |b: u8| b == b'+' || b == b'-' || b.is_space() || b.is_dec_digit()).map(|v: &[u8]| v), - )) - .map(|maybe_bytes| { - if let Some((bytes,)) = maybe_bytes { - // SAFETY: The parser validated that there are only ASCII characters. - #[allow(unsafe_code)] - unsafe { - std::str::from_utf8_unchecked(bytes) - } - } else { - "" - } - }), - ) - .context(StrContext::Expected(" <> <+|->".into())) - .map(|(identity, time)| SignatureRef { + /// Parse a signature from the bytes input `i`, and change it to point to the unparsed bytes afterwards. + pub fn decode<'a>(i: &mut &'a [u8]) -> Result, ValidationError> { + let identity = identity(i)?; + if i.first() == Some(&b' ') { + *i = &i[1..]; + } + + let time_len = i.iter().position(|b| !is_time_byte(*b)).unwrap_or(i.len()); + let (time, rest) = i.split_at(time_len); + *i = rest; + // SAFETY: The parser validated that there are only ASCII characters with `is_time_byte()`. + #[allow(unsafe_code)] + let time = unsafe { std::str::from_utf8_unchecked(time) }; + + Ok(SignatureRef { name: identity.name, email: identity.email, time, }) - .parse_next(i) } - /// Parse an identity from the bytes input `i` (like `name `) using `nom`. - pub fn identity<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( - i: &mut &'a [u8], - ) -> ModalResult, E> { - let start = i.checkpoint(); + /// Parse an identity from the bytes input `i` (like `name `). + pub fn identity<'a>(i: &mut &'a [u8]) -> Result, ValidationError> { let eol_idx = i.find_byte(b'\n').unwrap_or(i.len()); let right_delim_idx = i[..eol_idx] .rfind_byte(b'>') - .ok_or(ErrMode::Cut(E::from_input(i).add_context( - i, - &start, - StrContext::Label("Closing '>' not found"), - )))?; + .ok_or_else(|| ValidationError::new("Closing '>' not found"))?; let i_name_and_email = &i[..right_delim_idx]; let skip_from_right = i_name_and_email.iter().rev().take_while(|b| **b == b'>').count(); let left_delim_idx = i_name_and_email .find_byte(b'<') - .ok_or(ErrMode::Cut(E::from_input(i).add_context( - &i_name_and_email, - &start, - StrContext::Label("Opening '<' not found"), - )))?; + .ok_or_else(|| ValidationError::new("Opening '<' not found"))?; let skip_from_left = i[left_delim_idx..].iter().take_while(|b| **b == b'<').count(); let mut name = i[..left_delim_idx].as_bstr(); name = name.strip_suffix(b" ").unwrap_or(name).as_bstr(); let email = i .get(left_delim_idx + skip_from_left..right_delim_idx - skip_from_right) - .ok_or(ErrMode::Cut(E::from_input(i).add_context( - &i_name_and_email, - &start, - StrContext::Label("Skipped parts run into each other"), - )))? + .ok_or_else(|| ValidationError::new("Skipped parts run into each other"))? .as_bstr(); *i = i.get(right_delim_idx + 1..).unwrap_or(&[]); Ok(IdentityRef { name, email }) } + + fn is_time_byte(b: u8) -> bool { + matches!(b, b'+' | b'-' | b'0'..=b'9' | b' ' | b'\t') + } } pub use function::identity; #[cfg(test)] mod tests { mod parse_signature { - use gix_testtools::to_bstr_err; - use winnow::prelude::*; + use gix_error::ValidationError; - use crate::{signature, SignatureRef}; + use crate::SignatureRef; - fn decode<'i>( - i: &mut &'i [u8], - ) -> ModalResult, winnow::error::TreeError<&'i [u8], winnow::error::StrContext>> { - signature::decode.parse_next(i) + fn decode(mut i: &[u8]) -> Result<(&[u8], SignatureRef<'_>), ValidationError> { + SignatureRef::from_bytes_consuming(&mut i).map(|signature| (i, signature)) } fn signature(name: &'static str, email: &'static str, time: &'static str) -> SignatureRef<'static> { @@ -105,8 +75,7 @@ mod tests { #[test] fn tz_minus() { - let actual = decode - .parse_peek(b"Sebastian Thiel 1528473343 -0230") + let actual = decode(b"Sebastian Thiel 1528473343 -0230") .expect("parse to work") .1; assert_eq!( @@ -126,8 +95,7 @@ mod tests { #[test] fn tz_plus() { assert_eq!( - decode - .parse_peek(b"Sebastian Thiel 1528473343 +0230") + decode(b"Sebastian Thiel 1528473343 +0230") .expect("parse to work") .1, signature("Sebastian Thiel", "byronimo@gmail.com", "1528473343 +0230") @@ -137,8 +105,7 @@ mod tests { #[test] fn email_with_space() { assert_eq!( - decode - .parse_peek(b"Sebastian Thiel <\tbyronimo@gmail.com > 1528473343 +0230") + decode(b"Sebastian Thiel <\tbyronimo@gmail.com > 1528473343 +0230") .expect("parse to work") .1, signature("Sebastian Thiel", "\tbyronimo@gmail.com ", "1528473343 +0230") @@ -148,8 +115,7 @@ mod tests { #[test] fn negative_offset_0000() { assert_eq!( - decode - .parse_peek(b"Sebastian Thiel 1528473343 -0000") + decode(b"Sebastian Thiel 1528473343 -0000") .expect("parse to work") .1, signature("Sebastian Thiel", "byronimo@gmail.com", "1528473343 -0000") @@ -159,8 +125,7 @@ mod tests { #[test] fn negative_offset_double_dash() { assert_eq!( - decode - .parse_peek(b"name 1288373970 --700") + decode(b"name 1288373970 --700") .expect("parse to work") .1, signature("name", "name@example.com", "1288373970 --700") @@ -170,7 +135,7 @@ mod tests { #[test] fn empty_name_and_email() { assert_eq!( - decode.parse_peek(b" <> 12345 -1215").expect("parse to work").1, + decode(b" <> 12345 -1215").expect("parse to work").1, signature("", "", "12345 -1215") ); } @@ -178,18 +143,17 @@ mod tests { #[test] fn invalid_signature() { assert_eq!( - decode.parse_peek(b"hello < 12345 -1215") - .map_err(to_bstr_err) - .expect_err("parse fails as > is missing") - .to_string(), - " at 'hello < 12345 -1215'\n 0: invalid Closing '>' not found at 'hello < 12345 -1215'\n 1: expected ` <> <+|->` at 'hello < 12345 -1215'\n" - ); + decode(b"hello < 12345 -1215") + .expect_err("parse fails as > is missing") + .to_string(), + "Closing '>' not found" + ); } #[test] fn invalid_time() { assert_eq!( - decode.parse_peek(b"hello <> abc -1215").expect("parse to work").1, + decode(b"hello <> abc -1215").expect("parse to work").1, signature("hello", "", "") ); } diff --git a/gix-actor/src/signature/mod.rs b/gix-actor/src/signature/mod.rs index 472f7aca45c..2e675dbc773 100644 --- a/gix-actor/src/signature/mod.rs +++ b/gix-actor/src/signature/mod.rs @@ -1,17 +1,24 @@ mod _ref { use bstr::ByteSlice; - use winnow::{error::StrContext, prelude::*}; use crate::{signature::decode, IdentityRef, Signature, SignatureRef}; /// Lifecycle impl<'a> SignatureRef<'a> { /// Deserialize a signature from the given `data`. - pub fn from_bytes(mut data: &'a [u8]) -> Result, winnow::error::ErrMode> - where - E: winnow::error::ParserError<&'a [u8]> + winnow::error::AddContext<&'a [u8], StrContext>, - { - decode.parse_next(&mut data) + /// + /// Typical input is `Name 1700000000 +0000`. + pub fn from_bytes(mut data: &'a [u8]) -> Result, gix_error::ValidationError> { + Self::from_bytes_consuming(&mut data) + } + + /// Deserialize a signature from the given `data` and advance it past the signature. + /// + /// Typical input is `Name 1700000000 +0000`; on + /// success, `data` points to the bytes immediately after the parsed + /// signature. + pub fn from_bytes_consuming(data: &mut &'a [u8]) -> Result, gix_error::ValidationError> { + decode(data) } /// Try to parse the timestamp and create an owned instance from this shared one. diff --git a/gix-actor/tests/actor/identity.rs b/gix-actor/tests/actor/identity.rs index e68dbcea4cd..7f600c01949 100644 --- a/gix-actor/tests/actor/identity.rs +++ b/gix-actor/tests/actor/identity.rs @@ -12,7 +12,7 @@ fn round_trip() -> gix_testtools::Result { b".. whitespace \t is explicitly allowed - unicode aware trimming must be done elsewhere " ]; for input in DEFAULTS { - let signature: Identity = gix_actor::IdentityRef::from_bytes::<()>(input).unwrap().into(); + let signature: Identity = gix_actor::IdentityRef::from_bytes(input).unwrap().into(); let mut output = Vec::new(); signature.write_to(&mut output)?; assert_eq!(output.as_bstr(), input.as_bstr()); @@ -32,7 +32,7 @@ fn lenient_parsing() -> gix_testtools::Result { "fl (input.as_bytes()).unwrap(); + let identity = gix_actor::IdentityRef::from_bytes(input.as_bytes()).unwrap(); assert_eq!(identity.name, "First Last"); assert_eq!( identity.email, expected_email, diff --git a/gix-actor/tests/actor/signature.rs b/gix-actor/tests/actor/signature.rs index c88a07f214d..c5191b2a5a2 100644 --- a/gix-actor/tests/actor/signature.rs +++ b/gix-actor/tests/actor/signature.rs @@ -49,7 +49,7 @@ use gix_actor::{Signature, SignatureRef}; #[test] fn trim() { - let sig = gix_actor::SignatureRef::from_bytes::<()>(b" \t hello there \t < \t email \t > 1 -0030").unwrap(); + let sig = gix_actor::SignatureRef::from_bytes(b" \t hello there \t < \t email \t > 1 -0030").unwrap(); let sig = sig.trim(); assert_eq!(sig.name, "hello there"); assert_eq!(sig.email, "email"); @@ -65,7 +65,7 @@ fn round_trip() -> Result<(), Box> { ]; for input in DEFAULTS { - let signature: Signature = gix_actor::SignatureRef::from_bytes::<()>(input).unwrap().into(); + let signature: Signature = gix_actor::SignatureRef::from_bytes(input).unwrap().into(); let mut output = Vec::new(); signature.write_to(&mut output)?; assert_eq!(output.as_bstr(), input.as_bstr()); @@ -76,7 +76,7 @@ fn round_trip() -> Result<(), Box> { #[test] fn signature_ref_round_trips_with_seconds_in_offset() -> Result<(), Box> { let input = b"Sebastian Thiel 1313584730 +051800"; // Seen in the wild - let signature: SignatureRef = gix_actor::SignatureRef::from_bytes::<()>(input).unwrap(); + let signature: SignatureRef = gix_actor::SignatureRef::from_bytes(input).unwrap(); let mut output = Vec::new(); signature.write_to(&mut output)?; assert_eq!(output.as_bstr(), input.as_bstr()); @@ -85,7 +85,7 @@ fn signature_ref_round_trips_with_seconds_in_offset() -> Result<(), Box(b"first last 1312735823 +051800") + let signature = gix_actor::SignatureRef::from_bytes(b"first last 1312735823 +051800") .expect("deal with trailing zeroes in timestamp by discarding it"); assert_eq!( signature, @@ -96,7 +96,7 @@ fn parse_timestamp_with_trailing_digits() { } ); - let signature = gix_actor::SignatureRef::from_bytes::<()>(b"first last 1312735823 +0518") + let signature = gix_actor::SignatureRef::from_bytes(b"first last 1312735823 +0518") .expect("this naturally works as the timestamp does not have trailing zeroes"); assert_eq!( signature, @@ -110,7 +110,7 @@ fn parse_timestamp_with_trailing_digits() { #[test] fn parse_missing_timestamp() { - let signature = gix_actor::SignatureRef::from_bytes::<()>(b"first last ") + let signature = gix_actor::SignatureRef::from_bytes(b"first last ") .expect("deal with missing timestamp in signature by zeroing it"); assert_eq!( signature, diff --git a/gix-config/Cargo.toml b/gix-config/Cargo.toml index 4200bb0bead..0d946e57834 100644 --- a/gix-config/Cargo.toml +++ b/gix-config/Cargo.toml @@ -28,8 +28,6 @@ gix-sec = { version = "^0.13.3", path = "../gix-sec" } gix-ref = { version = "^0.62.0", path = "../gix-ref" } gix-glob = { version = "^0.25.0", path = "../gix-glob" } -winnow = { version = "1.0.0", features = ["simd"] } -memchr = "2" thiserror = "2.0.18" unicode-bom = { version = "2.0.3" } bstr = { version = "1.12.0", default-features = false, features = ["std"] } diff --git a/gix-config/src/file/mutable/mod.rs b/gix-config/src/file/mutable/mod.rs index 0ff284cb2f8..29357bfc7f2 100644 --- a/gix-config/src/file/mutable/mod.rs +++ b/gix-config/src/file/mutable/mod.rs @@ -8,7 +8,7 @@ pub(crate) mod multi_value; pub(crate) mod section; pub(crate) mod value; -fn escape_value(value: &BStr) -> BString { +pub(crate) fn escape_value(value: &BStr) -> BString { let starts_with_whitespace = value.first().is_some_and(u8::is_ascii_whitespace); let ends_with_whitespace = value .get(value.len().saturating_sub(1)) diff --git a/gix-config/src/lib.rs b/gix-config/src/lib.rs index ff1b7a0a76e..4301fdb0899 100644 --- a/gix-config/src/lib.rs +++ b/gix-config/src/lib.rs @@ -54,7 +54,7 @@ doc = ::document_features::document_features!() )] #![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg))] -#![deny(missing_docs, rust_2018_idioms, unsafe_code)] +#![deny(missing_docs, unsafe_code)] pub mod file; diff --git a/gix-config/src/parse/events.rs b/gix-config/src/parse/events.rs index ae95b61c64c..1353987b759 100644 --- a/gix-config/src/parse/events.rs +++ b/gix-config/src/parse/events.rs @@ -35,8 +35,8 @@ pub type FrontMatterEvents<'a> = SmallVec<[Event<'a>; 8]>; /// - Comment markers are not strictly defined either. This parser will always /// and only handle a semicolon or octothorpe (also known as a hash or number /// sign). -/// - Global properties may be allowed in `.ini` parsers, but is strictly -/// disallowed by this parser. +/// - Global properties before the first section are accepted for compatibility +/// with Git, even though they are uncommon in `.gitconfig` files. /// - Only `\t`, `\n`, `\b` `\\` are valid escape characters. /// - Quoted and semi-quoted values will be parsed (but quotes will be included /// in event outputs). An example of a semi-quoted value is `5"hello world"`, diff --git a/gix-config/src/parse/from_bytes/mod.rs b/gix-config/src/parse/from_bytes/mod.rs new file mode 100644 index 00000000000..55b2ed6bd43 --- /dev/null +++ b/gix-config/src/parse/from_bytes/mod.rs @@ -0,0 +1,427 @@ +use std::borrow::Cow; + +use bstr::{BStr, ByteSlice}; + +use crate::parse::{error::ParseNode, section, Comment, Error, Event}; + +type ParseResult = Result; + +/// Attempt to zero-copy parse the provided `input`, passing results to `dispatch`. +/// +/// The `input` is a complete Git config file. A UTF BOM is skipped, leading +/// comments, whitespace, newlines, and Git-compatible key/value pairs before +/// the first section are emitted first via `dispatch`, and then one or more +/// sections are parsed until EOF. +/// +/// On success, all input is consumed. +/// On failure, the returned [`Error`] reports the line number, the parser node +/// that was active, and the remaining bytes at the point where parsing stopped. +pub fn from_bytes<'i>(mut input: &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> Result<(), Error> { + let original = input; + + let bom = unicode_bom::Bom::from(input); + input = &input[bom.len()..]; + + loop { + let before = input; + if let Ok(comment) = comment(&mut input) { + dispatch(Event::Comment(comment)); + } else if let Ok(whitespace) = take_spaces1(&mut input) { + dispatch(Event::Whitespace(Cow::Borrowed(whitespace))); + } else if let Ok(newline) = take_newlines1(&mut input) { + dispatch(Event::Newline(Cow::Borrowed(newline))); + } else if !input.starts_with(b"[") { + let mut node = ParseNode::SectionHeader; + key_value_pair(&mut input, &mut node, dispatch).map_err(|_| Error { + line_number: newlines_from(original, input), + last_attempted_parser: node, + parsed_until: input.as_bstr().into(), + })?; + } + if input.len() == before.len() { + break; + } + } + + if input.is_empty() { + return Ok(()); + } + + let mut node = ParseNode::SectionHeader; + while !input.is_empty() { + section(&mut input, &mut node, dispatch).map_err(|_| Error { + line_number: newlines_from(original, input), + last_attempted_parser: node, + parsed_until: input.as_bstr().into(), + })?; + } + Ok(()) +} + +/// Count newline bytes in `original` up to the beginning of `input`. +/// +/// `rest` is expected to be a suffix of `original`, i.e. the unconsumed input, +/// and the returned count is used as the zero-based parse error line offset. +fn newlines_from(original: &[u8], rest: &[u8]) -> usize { + let consumed = original.len().saturating_sub(rest.len()); + original[..consumed].iter().filter(|c| **c == b'\n').count() +} + +/// Parse a single Git config comment. +/// +/// A comment starts with `;` or `#` and continues until, but not including, the +/// next `\n` or EOF. On success, `i` is advanced to the newline or empty suffix +/// and the returned [`Comment`] borrows the marker and text from the input. +fn comment<'i>(i: &mut &'i [u8]) -> ParseResult> { + let Some((&tag, rest)) = i.split_first() else { + return Err(()); + }; + if tag != b';' && tag != b'#' { + return Err(()); + } + let end = rest.find_byte(b'\n').unwrap_or(rest.len()); + let text = rest[..end].as_bstr(); + *i = &rest[end..]; + Ok(Comment { + tag, + text: Cow::Borrowed(text), + }) +} + +/// Parse a section header and all following items until the next section or EOF. +/// +/// A section starts with a [`section_header`]. The body may contain whitespace, +/// newlines, key/value pairs, and comments in sequence. Parsed items are emitted +/// to `dispatch`, `node` is updated before parsing key names and values for +/// error reporting, and `i` is advanced past all consumed section content. +fn section<'i>(i: &mut &'i [u8], node: &mut ParseNode, dispatch: &mut dyn FnMut(Event<'i>)) -> ParseResult<()> { + let header = section_header(i)?; + dispatch(Event::SectionHeader(header)); + + loop { + let before = *i; + + if let Ok(v) = take_spaces1(i) { + dispatch(Event::Whitespace(Cow::Borrowed(v.as_bstr()))); + } + if let Ok(v) = take_newlines1(i) { + dispatch(Event::Newline(Cow::Borrowed(v.as_bstr()))); + } + + key_value_pair(i, node, dispatch)?; + + if let Ok(comment) = comment(i) { + dispatch(Event::Comment(comment)); + } + + if i.len() == before.len() { + break; + } + } + + Ok(()) +} + +/// Parse a Git config section header. +/// +/// Accepted forms are `[name]`, `[name.subsection]`, and the legacy +/// `[name "subsection"]` form. Section names contain ASCII alphanumeric bytes, +/// `-`, and `.`, and may be empty only for compatibility with Git's quoted +/// subsection form. Quoted subsection names are parsed by [`sub_section`]. On +/// success, `i` is advanced past the closing `]`. +fn section_header<'i>(i: &mut &'i [u8]) -> ParseResult> { + let mut c = *i; + c = c.strip_prefix(b"[").ok_or(())?; + let name = { + let rest = c; + let name_len = rest.iter().take_while(|b| is_section_char(**b)).count(); + c = &rest[name_len..]; + rest[..name_len].as_bstr() + }; + + if let Some(rest) = c.strip_prefix(b"]") { + if name.is_empty() { + return Err(()); + } + *i = rest; + return match name.find_byte(b'.') { + Some(index) => Ok(section::Header { + name: section::Name(Cow::Borrowed(name[..index].as_bstr())), + separator: name.get(index..=index).map(|s| Cow::Borrowed(s.as_bstr())), + subsection_name: name.get(index + 1..).map(|s| Cow::Borrowed(s.as_bstr())), + }), + None => Ok(section::Header { + name: section::Name(Cow::Borrowed(name.as_bstr())), + separator: None, + subsection_name: None, + }), + }; + } + + let whitespace = take_spaces1(&mut c)?; + let Some(rest) = c.strip_prefix(b"\"") else { + return Err(()); + }; + c = rest; + let subsection_name = quoted_sub_section(&mut c)?; + let Some(rest) = c.strip_prefix(b"\"]") else { + return Err(()); + }; + *i = rest; + Ok(section::Header { + name: section::Name(Cow::Borrowed(name)), + separator: Some(Cow::Borrowed(whitespace)), + subsection_name: Some(subsection_name), + }) +} + +/// Return true if `c` is accepted in an unquoted section header name. +/// +/// Valid bytes are ASCII alphanumeric characters, `-`, and `.`. +fn is_section_char(c: u8) -> bool { + c.is_ascii_alphanumeric() || c == b'-' || c == b'.' +} + +/// Parse the contents of a quoted legacy subsection name. +/// +/// Input starts immediately after the opening quote in `[section "sub"]`. +/// Parsing stops before the closing quote. Backslash escapes copy the escaped +/// byte into an owned buffer; otherwise the returned value borrows from the +/// input. Newlines are rejected. On success, `i` is advanced to the closing +/// quote. +/// NUL byte are explicitly allowed. +fn quoted_sub_section<'i>(i: &mut &'i [u8]) -> ParseResult> { + let mut c = *i; + let input = c; + let mut out: Option> = None; + let mut borrowed_len = 0usize; + while let Some(&b) = c.first() { + match b { + b'"' => break, + b'\n' => return Err(()), + b'\\' => { + let escaped = *c.get(1).ok_or(())?; + if escaped == b'\n' { + return Err(()); + } + let out = out.get_or_insert_with(|| input[..borrowed_len].to_vec()); + out.push(escaped); + c = &c[2..]; + borrowed_len = input.len() - c.len(); + } + _ => { + if let Some(out) = out.as_mut() { + out.push(b); + } + c = &c[1..]; + borrowed_len = input.len() - c.len(); + } + } + } + *i = c; + Ok(match out { + Some(out) => Cow::Owned(out.into()), + None => Cow::Borrowed(input[..borrowed_len].as_bstr()), + }) +} + +/// Parse a config key or value name. +/// +/// Names must start with an ASCII alphabetic byte and may continue with ASCII +/// alphanumeric bytes or `-`. On success, `i` is advanced past the name and the +/// returned value borrows the consumed bytes. +fn config_name<'i>(i: &mut &'i [u8]) -> ParseResult<&'i BStr> { + if !i.first().is_some_and(u8::is_ascii_alphabetic) { + return Err(()); + } + let len = i + .iter() + .take_while(|c| c.is_ascii_alphanumeric() || **c == b'-') + .count(); + let (name, rest) = i.split_at(len); + *i = rest; + Ok(name.as_bstr()) +} + +/// Parse an optional key/value pair in a section body. +/// +/// If a key name is present, this emits [`Event::SectionValueName`], optional +/// whitespace, and then the value events produced by [`config_value`]. If no +/// key name is present, this succeeds without emitting anything. +/// `node` is updated to distinguish name and value parse errors. +fn key_value_pair<'i>(i: &mut &'i [u8], node: &mut ParseNode, dispatch: &mut dyn FnMut(Event<'i>)) -> ParseResult<()> { + *node = ParseNode::Name; + let Ok(name) = config_name(i) else { return Ok(()) }; + + dispatch(Event::SectionValueName(section::ValueName(Cow::Borrowed(name)))); + + if let Ok(whitespace) = take_spaces1(i) { + dispatch(Event::Whitespace(Cow::Borrowed(whitespace))); + } + + *node = ParseNode::Value; + config_value(i, dispatch) +} + +/// Parse the value portion of a key/value pair. +/// +/// If `=` is present, this emits [`Event::KeyValueSeparator`], optional +/// whitespace, and delegates to [`value`]. If `=` is absent, the key is an +/// implicit boolean and an empty [`Event::Value`] is emitted. +fn config_value<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> ParseResult<()> { + if let Some(rest) = i.strip_prefix(b"=") { + *i = rest; + dispatch(Event::KeyValueSeparator); + if let Ok(whitespace) = take_spaces1(i) { + dispatch(Event::Whitespace(Cow::Borrowed(whitespace))); + } + value(i, dispatch) + } else { + dispatch(Event::Value(Cow::Borrowed("".into()))); + Ok(()) + } +} + +/// Parse a config value and emit value-related events. +/// +/// Values run until newline, EOF, or an unquoted `;` or `#` comment marker. +/// Double quotes toggle quoted mode for comment handling. Supported escapes are +/// backslash followed by `n`, `t`, `\`, `b`, `"`, LF, or CRLF. Line continuations +/// emit [`Event::ValueNotDone`], the continuation newline, and finally [`Event::ValueDone`]. +/// If the value ends with a trailing backslash at EOF, it is emitted as +/// [`Event::ValueNotDone`] followed directly by an empty [`Event::ValueDone`]. +/// Otherwise a single [`Event::Value`] is emitted with trailing ASCII whitespace +/// trimmed from the logical value. +/// On success, `i` is advanced to the first unconsumed delimiter or EOF. +fn value<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> ParseResult<()> { + let input = *i; + let mut cursor = 0usize; + let mut value_start = 0usize; + let mut value_end = None; + // While quoted, `;` and `#` remain part of the value instead of starting a comment. + let mut is_in_quotes = false; + // Set after a line continuation so the final chunk is emitted as `ValueDone`. + let mut partial_value_found = false; + + while cursor < input.len() { + match input[cursor] { + b'\n' => { + value_end = Some(cursor); + break; + } + b';' | b'#' if !is_in_quotes => { + value_end = Some(cursor); + break; + } + b'\\' => { + let escape_index = cursor; + cursor += 1; + let mut consumed = 1usize; + let Some(mut b) = input.get(cursor).copied() else { + let value = input[value_start..escape_index].as_bstr(); + dispatch(Event::ValueNotDone(Cow::Borrowed(value))); + dispatch(Event::ValueDone(Cow::Borrowed("".into()))); + *i = &[]; + return Ok(()); + }; + if b == b'\r' { + cursor += 1; + b = *input.get(cursor).ok_or(())?; + if b != b'\n' { + return Err(()); + } + consumed += 1; + } + match b { + b'\n' => { + partial_value_found = true; + let value = input[value_start..escape_index].as_bstr(); + dispatch(Event::ValueNotDone(Cow::Borrowed(value))); + let nl_start = escape_index + 1; + let nl = input[nl_start..nl_start + consumed].as_bstr(); + dispatch(Event::Newline(Cow::Borrowed(nl))); + cursor += 1; + value_start = cursor; + value_end = None; + } + b'n' | b't' | b'\\' | b'b' | b'"' => cursor += 1, + _ => return Err(()), + } + } + b'"' => { + is_in_quotes = !is_in_quotes; + cursor += 1; + } + _ => cursor += 1, + } + } + if is_in_quotes { + return Err(()); + } + + let end = value_end.unwrap_or(cursor); + if end == value_start { + dispatch(Event::Value(Cow::Borrowed("".into()))); + *i = &input[cursor..]; + return Ok(()); + } + + let value_end_no_trailing_whitespace = input[value_start..end] + .iter() + .enumerate() + .rev() + .find_map(|(idx, b)| (!b.is_ascii_whitespace()).then_some(value_start + idx + 1)) + .unwrap_or(value_start); + let value = input[value_start..value_end_no_trailing_whitespace].as_bstr(); + if partial_value_found { + dispatch(Event::ValueDone(Cow::Borrowed(value))); + } else { + dispatch(Event::Value(Cow::Borrowed(value))); + } + *i = &input[value_end_no_trailing_whitespace..]; + Ok(()) +} + +/// Parse one or more spaces or horizontal tabs. +/// +/// At least one space or horizontal tab must be present at the current cursor. +/// On success, `i` is advanced past the whitespace run and the returned +/// [`BStr`] borrows the consumed bytes. +fn take_spaces1<'i>(i: &mut &'i [u8]) -> ParseResult<&'i BStr> { + let len = i.iter().take_while(|c| **c == b' ' || **c == b'\t').count(); + if len == 0 { + return Err(()); + } + let (spaces, rest) = i.split_at(len); + *i = rest; + Ok(spaces.as_bstr()) +} + +/// Parse one or more line endings. +/// +/// Both `\n` and `\r\n` are accepted. At least one line ending must be present +/// at the current cursor. On success, `i` is advanced past the newline run and +/// the returned [`BStr`] borrows the consumed bytes. +fn take_newlines1<'i>(i: &mut &'i [u8]) -> ParseResult<&'i BStr> { + let mut c = *i; + let input = c; + let mut cursor = 0usize; + while cursor < input.len() { + if input[cursor..].starts_with(b"\r\n") { + cursor += 2; + } else if input[cursor] == b'\n' { + cursor += 1; + } else { + break; + } + } + if cursor == 0 { + return Err(()); + } + c = &input[cursor..]; + *i = c; + Ok(input[..cursor].as_bstr()) +} + +#[cfg(test)] +mod tests; diff --git a/gix-config/src/parse/nom/tests.rs b/gix-config/src/parse/from_bytes/tests.rs similarity index 70% rename from gix-config/src/parse/nom/tests.rs rename to gix-config/src/parse/from_bytes/tests.rs index fbf735a18fc..3f802097378 100644 --- a/gix-config/src/parse/nom/tests.rs +++ b/gix-config/src/parse/from_bytes/tests.rs @@ -1,10 +1,61 @@ use super::*; +trait ParsePeekExt<'a, T> { + fn parse_peek(self, input: &'a [u8]) -> Result<(&'a [u8], T), ()>; + fn parse(self, input: &'a [u8]) -> Result; +} + +impl<'a, T, F> ParsePeekExt<'a, T> for F +where + F: FnOnce(&mut &'a [u8]) -> Result, +{ + fn parse_peek(self, mut input: &'a [u8]) -> Result<(&'a [u8], T), ()> { + let value = self(&mut input)?; + Ok((input, value)) + } + + fn parse(self, input: &'a [u8]) -> Result { + let (remaining, value) = self.parse_peek(input)?; + if remaining.is_empty() { + Ok(value) + } else { + Err(()) + } + } +} + +mod config { + use super::from_bytes; + use crate::parse::tests::util::{name_event, newline_event, value_event, whitespace_event}; + use crate::parse::Event; + + #[test] + fn key_value_before_first_section_is_accepted() { + let mut events = Vec::new(); + from_bytes(b"a = b\n", &mut |event| events.push(event)).unwrap(); + assert_eq!( + events, + vec![ + name_event("a"), + whitespace_event(" "), + Event::KeyValueSeparator, + whitespace_event(" "), + value_event("b"), + newline_event(), + ], + "Git accepts this and reports `a=b`, as git_parse_source() parses alphabetic keys even before any section" + ); + } +} + mod section_headers { - use winnow::prelude::*; + use std::borrow::Cow; - use super::section_header; - use crate::parse::tests::util::{fully_consumed, section_header as parsed_section_header}; + use super::{section_header, ParsePeekExt}; + use crate::parse::{ + section, + tests::util::{fully_consumed, section_header as parsed_section_header}, + }; #[test] fn no_subsection() { @@ -14,6 +65,11 @@ mod section_headers { ); } + #[test] + fn empty_section_name_without_quoted_subsection_is_rejected() { + assert!(section_header.parse_peek(b"[]").is_err()); + } + #[test] fn modern_subsection() { assert_eq!( @@ -22,6 +78,27 @@ mod section_headers { ); } + #[test] + fn empty_section_name_with_quoted_subsection_is_accepted() { + assert_eq!( + section_header.parse_peek(br#"[ "core"]"#).unwrap(), + fully_consumed(section::Header { + name: section::Name(Cow::Borrowed("".into())), + separator: Some(Cow::Borrowed(" ".into())), + subsection_name: Some(Cow::Borrowed("core".into())), + }), + "Git accepts this as an empty section name with `core` as subsection, yielding keys like `.core.bare`; gix does this too for compatibility" + ); + } + + #[test] + fn quoted_section_name_without_leading_space_is_rejected() { + assert!( + section_header.parse_peek(br#"["core"]"#).is_err(), + "Git rejects this as a bad config line" + ); + } + #[test] fn escaped_subsection() { assert_eq!( @@ -78,7 +155,11 @@ mod section_headers { #[test] fn null_byt_in_sub_section() { - assert!(section_header.parse_peek(b"[hello \"hello\0\"]").is_err()); + assert_eq!( + section_header.parse_peek(b"[hello \"hello\0\"]").unwrap(), + fully_consumed(parsed_section_header("hello", (" ", "hello\0"))), + "Git accepts this because get_extended_base_var() only rejects newline in quoted subsections" + ); } #[test] @@ -91,6 +172,19 @@ mod section_headers { assert!(section_header.parse_peek(br#"[hello "hello\"#).is_err()); } + #[test] + fn missing_closing_bracket_after_quoted_subsection() { + assert!(section_header.parse_peek(br#"[hello "world""#).is_err()); + } + + #[test] + fn whitespace_before_closing_bracket_after_quoted_subsection() { + assert!( + section_header.parse_peek(br#"[hello "world" ]"#).is_err(), + "yes, Git fails here, too!" + ); + } + #[test] fn null_byte_in_header() { assert!(section_header.parse_peek(b"[hello\0]").is_err()); @@ -118,29 +212,25 @@ mod section_headers { mod sub_section { use std::borrow::Cow; - use winnow::prelude::*; - - use super::sub_section; + use super::{quoted_sub_section, ParsePeekExt}; #[test] fn zero_copy_simple() { - let actual = sub_section.parse_peek(br#"name""#).unwrap().1; + let actual = quoted_sub_section.parse_peek(br#"name""#).unwrap().1; assert_eq!(actual.as_ref(), "name"); assert!(matches!(actual, Cow::Borrowed(_))); } #[test] fn escapes_need_allocation() { - let actual = sub_section.parse_peek(br#"\x\t\n\0\\\"""#).unwrap().1; + let actual = quoted_sub_section.parse_peek(br#"\x\t\n\0\\\"""#).unwrap().1; assert_eq!(actual.as_ref(), r#"xtn0\""#); assert!(matches!(actual, Cow::Owned(_))); } } mod config_name { - use winnow::prelude::*; - - use super::config_name; + use super::{config_name, ParsePeekExt}; use crate::parse::tests::util::fully_consumed; #[test] @@ -167,8 +257,6 @@ mod config_name { } mod section { - use winnow::error::InputError; - use crate::parse::{ error::ParseNode, tests::util::{ @@ -179,10 +267,7 @@ mod section { Event, Section, }; - fn section<'a>( - mut i: &'a [u8], - node: &mut ParseNode, - ) -> winnow::ModalResult<(&'a [u8], Section<'a>), InputError<&'a [u8]>> { + fn section<'a>(mut i: &'a [u8], node: &mut ParseNode) -> Result<(&'a [u8], Section<'a>), ()> { let mut header = None; let mut events = Vec::new(); super::section(&mut i, node, &mut |e| match &header { @@ -512,24 +597,20 @@ mod section { mod value_continuation { use bstr::ByteSlice; - use winnow::error::InputError; use crate::parse::{ tests::util::{newline_custom_event, newline_event, value_done_event, value_not_done_event}, Event, }; - pub fn value_impl<'a>( - mut i: &'a [u8], - events: &mut Vec>, - ) -> winnow::ModalResult<(&'a [u8], ()), InputError<&'a [u8]>> { - super::value_impl(&mut i, &mut |e| events.push(e)).map(|_| (i, ())) + pub fn value<'a>(mut i: &'a [u8], events: &mut Vec>) -> Result<(&'a [u8], ()), ()> { + super::value(&mut i, &mut |e| events.push(e)).map(|_| (i, ())) } #[test] fn simple_continuation() { let mut events = Vec::new(); - assert_eq!(value_impl(b"hello\\\nworld", &mut events).unwrap().0, b""); + assert_eq!(value(b"hello\\\nworld", &mut events).unwrap().0, b""); assert_eq!( events, vec![ @@ -543,7 +624,7 @@ mod value_continuation { #[test] fn continuation_with_whitespace() { let mut events = Vec::new(); - assert_eq!(value_impl(b"hello\\\n world", &mut events).unwrap().0, b""); + assert_eq!(value(b"hello\\\n world", &mut events).unwrap().0, b""); assert_eq!( events, vec![ @@ -554,7 +635,7 @@ mod value_continuation { ); let mut events = Vec::new(); - assert_eq!(value_impl(b"hello\\\r\n world", &mut events).unwrap().0, b""); + assert_eq!(value(b"hello\\\r\n world", &mut events).unwrap().0, b""); assert_eq!( events, vec![ @@ -566,7 +647,7 @@ mod value_continuation { let mut events = Vec::new(); assert!( - value_impl(b"hello\\\r\r\n world", &mut events).is_err(), + value(b"hello\\\r\r\n world", &mut events).is_err(), r"\r must be followed by \n" ); } @@ -575,7 +656,7 @@ mod value_continuation { fn complex_continuation_with_leftover_comment() { let mut events = Vec::new(); assert_eq!( - value_impl(b"1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c", &mut events) + value(b"1 \"\\\"\\\na ; e \"\\\"\\\nd # \"b\t ; c", &mut events) .unwrap() .0, b" # \"b\t ; c" @@ -595,14 +676,14 @@ mod value_continuation { #[test] fn quote_split_over_two_lines_with_leftover_comment() { let mut events = Vec::new(); - assert_eq!(value_impl(b"\"\\\n;\";a", &mut events).unwrap().0, b";a"); + assert_eq!(value(b"\"\\\n;\";a", &mut events).unwrap().0, b";a"); assert_eq!( events, vec![value_not_done_event("\""), newline_event(), value_done_event(";\"")] ); let mut events = Vec::new(); - assert_eq!(value_impl(b"\"a\\\r\nb;\";c", &mut events).unwrap().0, b";c"); + assert_eq!(value(b"\"a\\\r\nb;\";c", &mut events).unwrap().0, b";c"); assert_eq!( events, vec![ @@ -617,7 +698,7 @@ mod value_continuation { fn quote_split_over_multiple_lines_without_surrounding_quotes_but_inner_quotes() { let mut events = Vec::new(); assert_eq!( - value_impl( + value( br#"1\ "2" a\ \"3 b\"\ @@ -647,7 +728,7 @@ mod value_continuation { fn quote_split_over_multiple_lines_with_surrounding_quotes() { let mut events = Vec::new(); assert_eq!( - value_impl( + value( br#""1\ "2" a\ \"3 b\"\ @@ -675,45 +756,45 @@ mod value_continuation { } mod value_no_continuation { - use super::value_continuation::value_impl; - use crate::parse::tests::util::value_event; + use super::value_continuation::value; + use crate::parse::tests::util::{value_done_event, value_event, value_not_done_event}; #[test] fn no_comment() { let mut events = Vec::new(); - assert_eq!(value_impl(b"hello", &mut events).unwrap().0, b""); + assert_eq!(value(b"hello", &mut events).unwrap().0, b""); assert_eq!(events, vec![value_event("hello")]); } #[test] fn windows_newline() { let mut events = Vec::new(); - assert_eq!(value_impl(b"hi\r\nrest", &mut events).unwrap().0, b"\r\nrest"); + assert_eq!(value(b"hi\r\nrest", &mut events).unwrap().0, b"\r\nrest"); assert_eq!(events, vec![value_event("hi")]); events.clear(); - assert_eq!(value_impl(b"hi\r\r\r\nrest", &mut events).unwrap().0, b"\r\r\r\nrest"); + assert_eq!(value(b"hi\r\r\r\nrest", &mut events).unwrap().0, b"\r\r\r\nrest"); assert_eq!(events, vec![value_event("hi")]); } #[test] fn no_comment_newline() { let mut events = Vec::new(); - assert_eq!(value_impl(b"hello\na", &mut events).unwrap().0, b"\na"); + assert_eq!(value(b"hello\na", &mut events).unwrap().0, b"\na"); assert_eq!(events, vec![value_event("hello")]); } #[test] fn semicolon_comment_not_consumed() { let mut events = Vec::new(); - assert_eq!(value_impl(b"hello;world", &mut events).unwrap().0, b";world"); + assert_eq!(value(b"hello;world", &mut events).unwrap().0, b";world"); assert_eq!(events, vec![value_event("hello")]); } #[test] fn octothorpe_comment_not_consumed() { let mut events = Vec::new(); - assert_eq!(value_impl(b"hello#world", &mut events).unwrap().0, b"#world"); + assert_eq!(value(b"hello#world", &mut events).unwrap().0, b"#world"); assert_eq!(events, vec![value_event("hello")]); } @@ -721,7 +802,7 @@ mod value_no_continuation { fn values_with_extraneous_whitespace_without_comment() { let mut events = Vec::new(); assert_eq!( - value_impl(b"hello ", &mut events).unwrap().0, + value(b"hello ", &mut events).unwrap().0, b" " ); assert_eq!(events, vec![value_event("hello")]); @@ -731,14 +812,14 @@ mod value_no_continuation { fn values_with_extraneous_whitespace_before_comment() { let mut events = Vec::new(); assert_eq!( - value_impl(b"hello #world", &mut events).unwrap().0, + value(b"hello #world", &mut events).unwrap().0, b" #world" ); assert_eq!(events, vec![value_event("hello")]); let mut events = Vec::new(); assert_eq!( - value_impl(b"hello ;world", &mut events).unwrap().0, + value(b"hello ;world", &mut events).unwrap().0, b" ;world" ); assert_eq!(events, vec![value_event("hello")]); @@ -748,52 +829,52 @@ mod value_no_continuation { #[allow(clippy::needless_raw_string_hashes)] fn trans_escaped_comment_marker_not_consumed() { let mut events = Vec::new(); - assert_eq!(value_impl(br##"hello"#"world; a"##, &mut events).unwrap().0, b"; a"); + assert_eq!(value(br##"hello"#"world; a"##, &mut events).unwrap().0, b"; a"); assert_eq!(events, vec![value_event(r##"hello"#"world"##)]); } #[test] fn complex_test() { let mut events = Vec::new(); - assert_eq!(value_impl(br#"value";";ahhhh"#, &mut events).unwrap().0, b";ahhhh"); + assert_eq!(value(br#"value";";ahhhh"#, &mut events).unwrap().0, b";ahhhh"); assert_eq!(events, vec![value_event(r#"value";""#)]); } #[test] fn garbage_after_continuation_is_err() { - assert!(value_impl(br"hello \afwjdls", &mut Default::default()).is_err()); + assert!(value(br"hello \afwjdls", &mut Default::default()).is_err()); } #[test] fn invalid_escape() { - assert!(value_impl(br"\x", &mut Default::default()).is_err()); + assert!(value(br"\x", &mut Default::default()).is_err()); } #[test] fn incomplete_quote() { - assert!(value_impl(br#"hello "world"#, &mut Default::default()).is_err()); + assert!(value(br#"hello "world"#, &mut Default::default()).is_err()); } #[test] fn incomplete_escape() { - assert!(value_impl(br"hello world\", &mut Default::default()).is_err()); + let mut events = Vec::new(); + assert_eq!(value(br"hello world\", &mut events).unwrap().0, b""); + assert_eq!( + events, + vec![value_not_done_event("hello world"), value_done_event("")], + "Git accepts this because EOF is normalized to newline and the trailing backslash becomes a continuation" + ); } } mod key_value_pair { - use winnow::error::InputError; - use crate::parse::{ error::ParseNode, tests::util::{name_event, value_event, whitespace_event}, Event, }; - fn key_value<'a>( - mut i: &'a [u8], - node: &mut ParseNode, - events: &mut Vec>, - ) -> winnow::ModalResult<(&'a [u8], ()), InputError<&'a [u8]>> { + fn key_value<'a>(mut i: &'a [u8], node: &mut ParseNode, events: &mut Vec>) -> Result<(&'a [u8], ()), ()> { super::key_value_pair(&mut i, node, &mut |e| events.push(e)).map(|_| (i, ())) } @@ -851,10 +932,146 @@ mod key_value_pair { } } -mod comment { - use winnow::prelude::*; +mod value { + use super::value; + use crate::parse::{ + tests::util::{newline_custom_event, newline_event, value_done_event, value_event, value_not_done_event}, + Event, + }; + + fn parse(mut input: &[u8]) -> Result<(&[u8], Vec>), ()> { + let mut events = Vec::new(); + value(&mut input, &mut |event| events.push(event))?; + Ok((input, events)) + } + + #[test] + fn empty_value() { + let (remaining, events) = parse(b"").unwrap(); + assert_eq!(remaining, b""); + assert_eq!(events, vec![value_event("")]); + } - use super::comment; + #[test] + fn plain_value_runs_until_eof_and_trims_trailing_whitespace() { + let (remaining, events) = parse(b"hello \t").unwrap(); + assert_eq!(remaining, b" \t"); + assert_eq!(events, vec![value_event("hello")]); + } + + #[test] + fn newline_without_backslash_is_not_a_continuation() { + let (remaining, events) = parse(b"config\n value").unwrap(); + assert_eq!(remaining, b"\n value"); + assert_eq!(events, vec![value_event("config")]); + } + + #[test] + fn unquoted_comment_markers_end_the_value() { + let (remaining, events) = parse(b"hello ;comment").unwrap(); + assert_eq!(remaining, b" ;comment"); + assert_eq!(events, vec![value_event("hello")]); + + let (remaining, events) = parse(b"hello #comment").unwrap(); + assert_eq!(remaining, b" #comment"); + assert_eq!(events, vec![value_event("hello")]); + } + + #[test] + fn quoted_comment_markers_remain_part_of_the_value() { + let (remaining, events) = parse(br#""a;b#c";comment"#).unwrap(); + assert_eq!(remaining, b";comment"); + assert_eq!(events, vec![value_event(r#""a;b#c""#)]); + } + + #[test] + fn allowed_escapes_remain_part_of_the_value() { + let (remaining, events) = parse(br#"\n\t\\\b\""#).unwrap(); + assert_eq!(remaining, b""); + assert_eq!(events, vec![value_event(r#"\n\t\\\b\""#)]); + } + + #[test] + fn backslash_lf_continuation_is_allowed() { + let (remaining, events) = parse(b"hello\\\n world").unwrap(); + assert_eq!(remaining, b""); + assert_eq!( + events, + vec![ + value_not_done_event("hello"), + newline_event(), + value_done_event(" world") + ] + ); + } + + #[test] + fn backslash_crlf_continuation_is_allowed() { + let (remaining, events) = parse(b"hello\\\r\n world").unwrap(); + assert_eq!(remaining, b""); + assert_eq!( + events, + vec![ + value_not_done_event("hello"), + newline_custom_event("\r\n"), + value_done_event(" world") + ] + ); + } + + #[test] + fn continuations_keep_quote_state() { + let (remaining, events) = parse( + br#""a\ +;b";comment"#, + ) + .unwrap(); + assert_eq!(remaining, b";comment"); + assert_eq!( + events, + vec![value_not_done_event("\"a"), newline_event(), value_done_event(";b\"")] + ); + } + + #[test] + fn backslash_cr_without_lf_is_rejected() { + assert!(parse(b"hello\\\r world").is_err()); + } + + #[test] + fn backslash_cr_crlf_is_rejected() { + assert!(parse(b"hello\\\r\r\n world").is_err()); + } + + #[test] + fn trailing_backslash_is_accepted_as_continuation_to_eof() { + let (remaining, events) = parse(b"hello\\").unwrap(); + assert_eq!( + remaining, b"", + "it consumes everything, as the continuation backslash is no value" + ); + assert_eq!( + events, + vec![value_not_done_event("hello"), value_done_event("")], + "Git accepts this because get_next_char() maps EOF to newline, which parse_value() treats as a continuation" + ); + } + + #[test] + fn unsupported_escapes_are_rejected() { + assert!(parse(br"\a").is_err()); + assert!(parse(br"\x").is_err()); + assert!(parse(b"\\\0").is_err()); + } + + #[test] + fn unterminated_quote_is_rejected() { + assert!(parse(br#""hello"#).is_err()); + } +} + +mod comment { + use super::{comment, ParsePeekExt}; use crate::parse::tests::util::{comment as parsed_comment, fully_consumed}; #[test] diff --git a/gix-config/src/parse/mod.rs b/gix-config/src/parse/mod.rs index 1f7713ece6b..50b1389c486 100644 --- a/gix-config/src/parse/mod.rs +++ b/gix-config/src/parse/mod.rs @@ -14,8 +14,9 @@ use std::{borrow::Cow, hash::Hash}; use bstr::BStr; -mod nom; -pub use self::nom::from_bytes; +mod from_bytes; +pub use self::from_bytes::from_bytes; + mod event; #[path = "events.rs"] mod events_type; @@ -63,8 +64,10 @@ pub enum Event<'a> { Newline(Cow<'a, BStr>), /// Any value that isn't completed. This occurs when the value is continued /// onto the next line by ending it with a backslash. - /// A [`Newline`][Self::Newline] event is guaranteed after, followed by - /// either a ValueDone, a Whitespace, or another ValueNotDone. + /// A [`Newline`][Self::Newline] event usually follows, followed by either + /// `ValueDone`, `Whitespace`, or another `ValueNotDone`. The exception is a + /// trailing backslash at EOF, which Git accepts as a continuation and which + /// is represented by `ValueNotDone` followed directly by `ValueDone`. ValueNotDone(Cow<'a, BStr>), /// The last line of a value which was continued onto another line. /// With this it's possible to obtain the complete value by concatenating diff --git a/gix-config/src/parse/nom/mod.rs b/gix-config/src/parse/nom/mod.rs deleted file mode 100644 index c5b3c27ca9a..00000000000 --- a/gix-config/src/parse/nom/mod.rs +++ /dev/null @@ -1,377 +0,0 @@ -use std::borrow::Cow; - -use bstr::{BStr, ByteSlice}; -use winnow::{ - combinator::{alt, delimited, opt, preceded, repeat}, - error::{ErrMode, InputError as NomError, ParserError as _}, - prelude::*, - stream::Offset as _, - token::{one_of, take_till, take_while}, -}; - -use crate::parse::{error::ParseNode, section, Comment, Error, Event}; - -/// Attempt to zero-copy parse the provided bytes, passing results to `dispatch`. -pub fn from_bytes<'i>(mut input: &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> Result<(), Error> { - let start = input.checkpoint(); - - let bom = unicode_bom::Bom::from(input); - input.next_slice(bom.len()); - - repeat( - 0.., - alt(( - comment.map(Event::Comment), - take_spaces1.map(|whitespace| Event::Whitespace(Cow::Borrowed(whitespace))), - |i: &mut &'i [u8]| { - let newline = take_newlines1.parse_next(i)?; - let o = Event::Newline(Cow::Borrowed(newline)); - Ok(o) - }, - )), - ) - .fold(|| (), |_acc, event| dispatch(event)) - .parse_next(&mut input) - // I don't think this can panic. many0 errors if the child parser returns - // a success where the input was not consumed, but alt will only return Ok - // if one of its children succeed. However, all of it's children are - // guaranteed to consume something if they succeed, so the Ok(i) == i case - // can never occur. - .expect("many0(alt(...)) panicked. Likely a bug in one of the children parsers."); - - if input.is_empty() { - return Ok(()); - } - - let mut node = ParseNode::SectionHeader; - - let res = repeat(1.., |i: &mut &'i [u8]| section(i, &mut node, dispatch)) - .map(|()| ()) - .parse_next(&mut input); - res.map_err(|_| { - let newlines = newlines_from(input, start); - Error { - line_number: newlines, - last_attempted_parser: node, - parsed_until: input.as_bstr().into(), - } - })?; - - // This needs to happen after we collect sections, otherwise the line number - // will be off. - if !input.is_empty() { - let newlines = newlines_from(input, start); - return Err(Error { - line_number: newlines, - last_attempted_parser: node, - parsed_until: input.as_bstr().into(), - }); - } - - Ok(()) -} - -fn newlines_from(input: &[u8], start: winnow::stream::Checkpoint<&[u8], &[u8]>) -> usize { - let offset = input.offset_from(&start); - let mut start_input = input; - start_input.reset(&start); - start_input.next_slice(offset).iter().filter(|c| **c == b'\n').count() -} - -fn comment<'i>(i: &mut &'i [u8]) -> ModalResult, NomError<&'i [u8]>> { - ( - one_of([';', '#']), - take_till(0.., |c| c == b'\n').map(|text: &[u8]| Cow::Borrowed(text.as_bstr())), - ) - .map(|(tag, text)| Comment { tag, text }) - .parse_next(i) -} - -#[cfg(test)] -mod tests; - -fn section<'i>( - i: &mut &'i [u8], - node: &mut ParseNode, - dispatch: &mut dyn FnMut(Event<'i>), -) -> ModalResult<(), NomError<&'i [u8]>> { - let start = i.checkpoint(); - let header = section_header(i).inspect_err(|_err| { - i.reset(&start); - })?; - dispatch(Event::SectionHeader(header)); - - // This would usually be a many0(alt(...)), the manual loop allows us to - // optimize vec insertions - loop { - let start = i.checkpoint(); - - if let Some(v) = opt(take_spaces1).parse_next(i)? { - dispatch(Event::Whitespace(Cow::Borrowed(v.as_bstr()))); - } - - if let Some(v) = opt(take_newlines1).parse_next(i)? { - dispatch(Event::Newline(Cow::Borrowed(v.as_bstr()))); - } - - key_value_pair(i, node, dispatch)?; - - if let Some(comment) = opt(comment).parse_next(i)? { - dispatch(Event::Comment(comment)); - } - - if i.offset_from(&start) == 0 { - break; - } - } - - Ok(()) -} - -fn section_header<'i>(i: &mut &'i [u8]) -> ModalResult, NomError<&'i [u8]>> { - // No spaces must be between section name and section start - let name = preceded('[', take_while(1.., is_section_char).map(bstr::ByteSlice::as_bstr)).parse_next(i)?; - - if opt(one_of::<_, _, ErrMode>>(']')) - .parse_next(i)? - .is_some() - { - // Either section does not have a subsection or using deprecated - // subsection syntax at this point. - let header = match memchr::memrchr(b'.', name.as_bytes()) { - Some(index) => section::Header { - name: section::Name(Cow::Borrowed(name[..index].as_bstr())), - separator: name.get(index..=index).map(|s| Cow::Borrowed(s.as_bstr())), - subsection_name: name.get(index + 1..).map(|s| Cow::Borrowed(s.as_bstr())), - }, - None => section::Header { - name: section::Name(Cow::Borrowed(name.as_bstr())), - separator: None, - subsection_name: None, - }, - }; - - if header.name.is_empty() { - return Err(winnow::error::ErrMode::from_input(i)); - } - return Ok(header); - } - - // Section header must be using modern subsection syntax at this point. - (take_spaces1, delimited('"', opt(sub_section), "\"]")) - .map(|(whitespace, subsection_name)| section::Header { - name: section::Name(Cow::Borrowed(name)), - separator: Some(Cow::Borrowed(whitespace)), - subsection_name, - }) - .parse_next(i) -} - -fn is_section_char(c: u8) -> bool { - c.is_ascii_alphanumeric() || c == b'-' || c == b'.' -} - -fn sub_section<'i>(i: &mut &'i [u8]) -> ModalResult, NomError<&'i [u8]>> { - let mut output = Cow::Borrowed(Default::default()); - if let Some(sub) = opt(subsection_subset).parse_next(i)? { - output = Cow::Borrowed(sub.as_bstr()); - } - while let Some(sub) = opt(subsection_subset).parse_next(i)? { - output.to_mut().extend(sub); - } - - Ok(output) -} - -fn subsection_subset<'i>(i: &mut &'i [u8]) -> ModalResult<&'i [u8], NomError<&'i [u8]>> { - alt((subsection_unescaped, subsection_escaped_char)).parse_next(i) -} - -fn subsection_unescaped<'i>(i: &mut &'i [u8]) -> ModalResult<&'i [u8], NomError<&'i [u8]>> { - take_while(1.., is_subsection_unescaped_char).parse_next(i) -} - -fn subsection_escaped_char<'i>(i: &mut &'i [u8]) -> ModalResult<&'i [u8], NomError<&'i [u8]>> { - preceded('\\', one_of(is_subsection_escapable_char).take()).parse_next(i) -} - -fn is_subsection_escapable_char(c: u8) -> bool { - c != b'\n' -} - -fn is_subsection_unescaped_char(c: u8) -> bool { - c != b'"' && c != b'\\' && c != b'\n' && c != 0 -} - -fn key_value_pair<'i>( - i: &mut &'i [u8], - node: &mut ParseNode, - dispatch: &mut dyn FnMut(Event<'i>), -) -> ModalResult<(), NomError<&'i [u8]>> { - *node = ParseNode::Name; - if let Some(name) = opt(config_name).parse_next(i)? { - dispatch(Event::SectionValueName(section::ValueName(Cow::Borrowed(name)))); - - if let Some(whitespace) = opt(take_spaces1).parse_next(i)? { - dispatch(Event::Whitespace(Cow::Borrowed(whitespace))); - } - - *node = ParseNode::Value; - config_value(i, dispatch) - } else { - Ok(()) - } -} - -/// Parses the config name of a config pair. Assumes the input has already been -/// trimmed of any leading whitespace. -fn config_name<'i>(i: &mut &'i [u8]) -> ModalResult<&'i BStr, NomError<&'i [u8]>> { - ( - one_of(|c: u8| c.is_ascii_alphabetic()), - take_while(0.., |c: u8| c.is_ascii_alphanumeric() || c == b'-'), - ) - .take() - .map(bstr::ByteSlice::as_bstr) - .parse_next(i) -} - -fn config_value<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> ModalResult<(), NomError<&'i [u8]>> { - if opt('=').parse_next(i)?.is_some() { - dispatch(Event::KeyValueSeparator); - if let Some(whitespace) = opt(take_spaces1).parse_next(i)? { - dispatch(Event::Whitespace(Cow::Borrowed(whitespace))); - } - value_impl(i, dispatch) - } else { - // This is a special way of denoting 'empty' values which a lot of code depends on. - // Hence, rather to fix this everywhere else, leave it here and fix it where it matters, namely - // when it's about differentiating between a missing key-value separator, and one followed by emptiness. - dispatch(Event::Value(Cow::Borrowed("".into()))); - Ok(()) - } -} - -/// Handles parsing of known-to-be values. This function handles both single -/// line values as well as values that are continuations. -fn value_impl<'i>(i: &mut &'i [u8], dispatch: &mut dyn FnMut(Event<'i>)) -> ModalResult<(), NomError<&'i [u8]>> { - let start_checkpoint = i.checkpoint(); - let mut value_start_checkpoint = i.checkpoint(); - let mut value_end = None; - - // This is required to ignore comment markers if they're in a quote. - let mut is_in_quotes = false; - // Used to determine if we return a Value or Value{Not,}Done - let mut partial_value_found = false; - - loop { - let _ = take_while(0.., |c| !matches!(c, b'\n' | b'\\' | b'"' | b';' | b'#')).parse_next(i)?; - if let Some(c) = i.next_token() { - match c { - b'\n' => { - value_end = Some(i.offset_from(&value_start_checkpoint) - 1); - break; - } - b';' | b'#' if !is_in_quotes => { - value_end = Some(i.offset_from(&value_start_checkpoint) - 1); - break; - } - b'\\' => { - let escaped_index = i.offset_from(&value_start_checkpoint); - let escape_index = escaped_index - 1; - let Some(mut c) = i.next_token() else { - i.reset(&start_checkpoint); - return Err(winnow::error::ErrMode::from_input(i)); - }; - let mut consumed = 1; - if c == b'\r' { - c = i.next_token().ok_or_else(|| { - i.reset(&start_checkpoint); - winnow::error::ErrMode::from_input(i) - })?; - if c != b'\n' { - i.reset(&start_checkpoint); - return Err(winnow::error::ErrMode::from_input(i)); - } - consumed += 1; - } - - match c { - b'\n' => { - partial_value_found = true; - - i.reset(&value_start_checkpoint); - - let value = i.next_slice(escape_index).as_bstr(); - dispatch(Event::ValueNotDone(Cow::Borrowed(value))); - - i.next_token(); - - let nl = i.next_slice(consumed).as_bstr(); - dispatch(Event::Newline(Cow::Borrowed(nl))); - - value_start_checkpoint = i.checkpoint(); - value_end = None; - } - b'n' | b't' | b'\\' | b'b' | b'"' => {} - _ => { - i.reset(&start_checkpoint); - return Err(winnow::error::ErrMode::from_input(i)); - } - } - } - b'"' => is_in_quotes = !is_in_quotes, - _ => {} - } - } else { - break; - } - } - if is_in_quotes { - i.reset(&start_checkpoint); - return Err(winnow::error::ErrMode::from_input(i)); - } - - let value_end = match value_end { - None => { - let last_value_index = i.offset_from(&value_start_checkpoint); - if last_value_index == 0 { - dispatch(Event::Value(Cow::Borrowed("".into()))); - return Ok(()); - } else { - last_value_index - } - } - Some(idx) => idx, - }; - - i.reset(&value_start_checkpoint); - let value_end_no_trailing_whitespace = i[..value_end] - .iter() - .enumerate() - .rev() - .find_map(|(idx, b)| (!b.is_ascii_whitespace()).then_some(idx + 1)) - .unwrap_or(0); - let remainder_value = i.next_slice(value_end_no_trailing_whitespace); - - if partial_value_found { - dispatch(Event::ValueDone(Cow::Borrowed(remainder_value.as_bstr()))); - } else { - dispatch(Event::Value(Cow::Borrowed(remainder_value.as_bstr()))); - } - - Ok(()) -} - -fn take_spaces1<'i>(i: &mut &'i [u8]) -> ModalResult<&'i BStr, NomError<&'i [u8]>> { - take_while(1.., winnow::stream::AsChar::is_space) - .map(bstr::ByteSlice::as_bstr) - .parse_next(i) -} - -fn take_newlines1<'i>(i: &mut &'i [u8]) -> ModalResult<&'i BStr, NomError<&'i [u8]>> { - repeat(1..1024, alt(("\r\n", "\n"))) - .map(|()| ()) - .take() - .map(bstr::ByteSlice::as_bstr) - .parse_next(i) -} diff --git a/gix-config/src/parse/tests.rs b/gix-config/src/parse/tests.rs index 703f8a2167a..f566c46c9e4 100644 --- a/gix-config/src/parse/tests.rs +++ b/gix-config/src/parse/tests.rs @@ -103,6 +103,65 @@ mod section { ); assert!(!invalid.is_legacy()); } + + #[test] + fn empty_section_name_with_quoted_subsection() { + let header = header("", Some((" ", "core"))); + let mut out = Vec::new(); + header.write_to(&mut out).unwrap(); + assert_eq!( + out, br#"[ "core"]"#, + "Git accepts this as an empty section name with `core` as subsection, and we keep it" + ); + assert!(!header.is_legacy()); + } + + #[test] + fn nul_byte_in_quoted_subsection() { + let header = header("hello", Some((" ", "hello\0"))); + let mut out = Vec::new(); + header.write_to(&mut out).unwrap(); + assert_eq!( + out, b"[hello \"hello\0\"]", + "Git accepts NUL bytes in quoted subsection names, and we preserve them" + ); + assert!(!header.is_legacy()); + } + } + } +} + +mod event { + mod write_to { + use crate::parse::Events; + + fn write_events(input: &str) -> Vec { + let events = Events::from_str(input).unwrap().into_vec(); + let mut out = Vec::new(); + for event in &events { + event.write_to(&mut out).unwrap(); + } + out + } + + #[test] + fn key_value_before_first_section() { + let input = "a = b\n"; + assert_eq!( + write_events(input), + input.as_bytes(), + "Git accepts key/value pairs before the first section, and we preserve them" + ); + } + + #[test] + fn value_with_trailing_backslash_at_eof() { + let input = "[core]\na=hello\\"; + assert_eq!( + write_events(input), + input.as_bytes(), + "Git accepts EOF as a line continuation terminator, and we preserve the original trailing backslash" + ); } } } diff --git a/gix-config/src/types.rs b/gix-config/src/types.rs index 8aa1566aa47..f41e9006b12 100644 --- a/gix-config/src/types.rs +++ b/gix-config/src/types.rs @@ -103,9 +103,10 @@ pub enum Source { /// [`raw_value()`]: Self::raw_value #[derive(Eq, Clone, Debug, Default)] pub struct File<'event> { - /// The list of events that occur before any section. Since a - /// `git-config` file prohibits global values, this vec is limited to only - /// comment, newline, and whitespace events. + /// The list of events that occur before any section. + /// + /// Git accepts global properties before the first section, so this may also + /// contain key/value events. pub(crate) frontmatter_events: crate::parse::FrontMatterEvents<'event>, /// Frontmatter events to be placed after the given section. pub(crate) frontmatter_post_section: HashMap>, diff --git a/gix-config/tests/config/file/access/raw/raw_value.rs b/gix-config/tests/config/file/access/raw/raw_value.rs index 55b923ff69b..a0ed095507b 100644 --- a/gix-config/tests/config/file/access/raw/raw_value.rs +++ b/gix-config/tests/config/file/access/raw/raw_value.rs @@ -8,6 +8,17 @@ fn single_section() -> crate::Result { Ok(()) } +#[test] +fn global_property_uses_empty_section_name() -> crate::Result { + let config = File::try_from("a=b\n[core]\na=c")?; + assert_eq!( + config.raw_value_by("", None, "a").unwrap_err().to_string(), + "The requested section does not exist", + "these are not readable because the supporting this adds a lot of complexity" + ); + Ok(()) +} + #[test] fn last_one_wins_respected_in_section() -> crate::Result { let config = File::try_from("[core]\na=b\na=d")?; diff --git a/gix-config/tests/config/file/access/raw/set_existing_raw_value.rs b/gix-config/tests/config/file/access/raw/set_existing_raw_value.rs index d369f8b6f89..66d045fe6cd 100644 --- a/gix-config/tests/config/file/access/raw/set_existing_raw_value.rs +++ b/gix-config/tests/config/file/access/raw/set_existing_raw_value.rs @@ -22,6 +22,18 @@ fn single_line() { assert_set_value("hello world"); } +#[test] +fn global_property_uses_empty_section_name() -> crate::Result { + let mut file = file("a=b\n[core]\na=c"); + let err = file.set_existing_raw_value_by("", None, "a", "d").unwrap_err(); + assert_eq!( + err.to_string(), + "The requested section does not exist", + "cannot set global values" + ); + Ok(()) +} + #[test] fn starts_with_whitespace() { assert_set_value("\ta"); diff --git a/gix-imara-diff/Cargo.toml b/gix-imara-diff/Cargo.toml index 1dea43e7813..f124bab22a2 100644 --- a/gix-imara-diff/Cargo.toml +++ b/gix-imara-diff/Cargo.toml @@ -18,7 +18,6 @@ include = ["/src/**/*", "/LICENSE", "/README.md", "!/src/tests.rs"] [dependencies] bstr = { version = "1.12.0", default-features = false } hashbrown = { version = ">=0.15,<=0.16", default-features = false, features = ["default-hasher", "inline-more"] } -memchr = "2.7.4" [features] default = [] diff --git a/gix-imara-diff/src/sources.rs b/gix-imara-diff/src/sources.rs index 5ee1abdf4a1..9b2d5ea0d1d 100644 --- a/gix-imara-diff/src/sources.rs +++ b/gix-imara-diff/src/sources.rs @@ -8,7 +8,7 @@ use std::str::from_utf8_unchecked; -use memchr::memchr; +use bstr::ByteSlice; use crate::TokenSource; @@ -175,7 +175,7 @@ impl<'a> Iterator for BStrLines<'a> { if self.0.is_empty() { return None; } - let line_len = memchr(b'\n', self.0).map_or(self.0.len(), |len| len + 1); + let line_len = self.0.find_byte(b'\n').map_or(self.0.len(), |len| len + 1); let (line, rem) = self.0.split_at(line_len); self.0 = rem.into(); Some(line.into()) @@ -208,7 +208,7 @@ impl<'a> Iterator for ByteLines<'a> { if self.0.is_empty() { return None; } - let line_len = memchr(b'\n', self.0).map_or(self.0.len(), |len| len + 1); + let line_len = self.0.find_byte(b'\n').map_or(self.0.len(), |len| len + 1); let (line, rem) = self.0.split_at(line_len); self.0 = rem; Some(line) diff --git a/gix-mailmap/fuzz/fuzz_targets/mailmap.rs b/gix-mailmap/fuzz/fuzz_targets/mailmap.rs index 1450f55435b..11cdc524c81 100644 --- a/gix-mailmap/fuzz/fuzz_targets/mailmap.rs +++ b/gix-mailmap/fuzz/fuzz_targets/mailmap.rs @@ -27,7 +27,7 @@ fn resolve(snapshot: &Snapshot, input: &[u8]) { b"Jane 1 +0000".as_slice(), b"Unknown 1 +0000".as_slice(), ] { - let Ok(signature) = SignatureRef::from_bytes::<()>(candidate) else { + let Ok(signature) = SignatureRef::from_bytes(candidate) else { continue; }; _ = black_box(snapshot.try_resolve_ref(signature)); diff --git a/gix-mailmap/src/lib.rs b/gix-mailmap/src/lib.rs index 11501001499..260dd3775e9 100644 --- a/gix-mailmap/src/lib.rs +++ b/gix-mailmap/src/lib.rs @@ -18,7 +18,7 @@ //! //! let snapshot = gix_mailmap::Snapshot::new(parsed); //! let resolved = snapshot.resolve( -//! SignatureRef::from_bytes::<()>(b"Jane 1711398853 +0800").unwrap(), +//! SignatureRef::from_bytes(b"Jane 1711398853 +0800").unwrap(), //! ); //! //! assert_eq!(resolved.name, "Jane Doe"); diff --git a/gix-object/Cargo.toml b/gix-object/Cargo.toml index 66401014432..8840c02fb4c 100644 --- a/gix-object/Cargo.toml +++ b/gix-object/Cargo.toml @@ -36,12 +36,6 @@ serde = [ "gix-hash/serde", "gix-actor/serde", ] -## When parsing objects by default errors will only be available on the granularity of success or failure, and with the above flag enabled -## details information about the error location will be collected. -## Use it in applications which expect broken or invalid objects or for debugging purposes. Incorrectly formatted objects aren't at all -## common otherwise. -verbose-object-parsing-errors = ["winnow/std"] - [dependencies] gix-features = { version = "^0.47.0", path = "../gix-features", features = [ "progress", @@ -59,7 +53,6 @@ bstr = { version = "1.12.0", default-features = false, features = [ "std", "unicode", ] } -winnow = { version = "1.0.0", features = ["simd"] } smallvec = { version = "1.15.1", features = ["write"] } serde = { version = "1.0.114", optional = true, default-features = false, features = [ "derive", diff --git a/gix-object/benches/decode_objects.rs b/gix-object/benches/decode_objects.rs index 6a340b00933..8bd51bcd901 100644 --- a/gix-object/benches/decode_objects.rs +++ b/gix-object/benches/decode_objects.rs @@ -3,19 +3,29 @@ use std::hint::black_box; fn parse_commit(c: &mut Criterion) { c.bench_function("CommitRef(sig)", |b| { - b.iter(|| black_box(gix_object::CommitRef::from_bytes(COMMIT_WITH_MULTI_LINE_HEADERS)).unwrap()); + b.iter(|| { + black_box(gix_object::CommitRef::from_bytes( + COMMIT_WITH_MULTI_LINE_HEADERS, + gix_hash::Kind::Sha1, + )) + .unwrap() + }); }); c.bench_function("CommitRefIter(sig)", |b| { - b.iter(|| black_box(gix_object::CommitRefIter::from_bytes(COMMIT_WITH_MULTI_LINE_HEADERS).count())); + b.iter(|| { + black_box( + gix_object::CommitRefIter::from_bytes(COMMIT_WITH_MULTI_LINE_HEADERS, gix_hash::Kind::Sha1).count(), + ) + }); }); } fn parse_tag(c: &mut Criterion) { c.bench_function("TagRef(sig)", |b| { - b.iter(|| black_box(gix_object::TagRef::from_bytes(TAG_WITH_SIGNATURE)).unwrap()); + b.iter(|| black_box(gix_object::TagRef::from_bytes(TAG_WITH_SIGNATURE, gix_hash::Kind::Sha1)).unwrap()); }); c.bench_function("TagRefIter(sig)", |b| { - b.iter(|| black_box(gix_object::TagRefIter::from_bytes(TAG_WITH_SIGNATURE).count())); + b.iter(|| black_box(gix_object::TagRefIter::from_bytes(TAG_WITH_SIGNATURE, gix_hash::Kind::Sha1).count())); }); } diff --git a/gix-object/fuzz/fuzz_targets/fuzz_commit.rs b/gix-object/fuzz/fuzz_targets/fuzz_commit.rs index b8f3fe1ff37..821e93c8749 100644 --- a/gix-object/fuzz/fuzz_targets/fuzz_commit.rs +++ b/gix-object/fuzz/fuzz_targets/fuzz_commit.rs @@ -3,6 +3,8 @@ use libfuzzer_sys::fuzz_target; use std::hint::black_box; fuzz_target!(|commit: &[u8]| { - _ = black_box(gix_object::CommitRef::from_bytes(commit)); - _ = black_box(gix_object::CommitRefIter::from_bytes(commit)).count(); + _ = black_box(gix_object::CommitRef::from_bytes(commit, gix_hash::Kind::Sha1)); + _ = black_box(gix_object::CommitRefIter::from_bytes(commit, gix_hash::Kind::Sha1)).count(); + _ = black_box(gix_object::CommitRef::from_bytes(commit, gix_hash::Kind::Sha256)); + _ = black_box(gix_object::CommitRefIter::from_bytes(commit, gix_hash::Kind::Sha256)).count(); }); diff --git a/gix-object/fuzz/fuzz_targets/fuzz_tag.rs b/gix-object/fuzz/fuzz_targets/fuzz_tag.rs index 34135d4097b..c3bb319218e 100644 --- a/gix-object/fuzz/fuzz_targets/fuzz_tag.rs +++ b/gix-object/fuzz/fuzz_targets/fuzz_tag.rs @@ -4,6 +4,8 @@ use libfuzzer_sys::fuzz_target; use std::hint::black_box; fuzz_target!(|tag: &[u8]| { - _ = black_box(gix_object::TagRef::from_bytes(tag)); - _ = black_box(gix_object::TagRefIter::from_bytes(tag).count()); + _ = black_box(gix_object::TagRef::from_bytes(tag, gix_hash::Kind::Sha1)); + _ = black_box(gix_object::TagRefIter::from_bytes(tag, gix_hash::Kind::Sha1).count()); + _ = black_box(gix_object::TagRef::from_bytes(tag, gix_hash::Kind::Sha256)); + _ = black_box(gix_object::TagRefIter::from_bytes(tag, gix_hash::Kind::Sha256).count()); }); diff --git a/gix-object/src/commit/decode.rs b/gix-object/src/commit/decode.rs index f68b82b6aba..dfd4798d9d9 100644 --- a/gix-object/src/commit/decode.rs +++ b/gix-object/src/commit/decode.rs @@ -1,74 +1,98 @@ use std::borrow::Cow; use smallvec::SmallVec; -use winnow::{ - combinator::{alt, eof, opt, preceded, repeat, terminated}, - error::{AddContext, ParserError, StrContext}, - prelude::*, - token::{rest, take_till}, -}; -use crate::{parse, parse::NL, BStr, ByteSlice, CommitRef}; +use crate::{parse, parse::ParseResult, BStr, ByteSlice, CommitRef}; -pub fn message<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( - i: &mut &'a [u8], -) -> ModalResult<&'a BStr, E> { - if i.is_empty() { - // newline + [message] - let start = i.checkpoint(); - return Err(winnow::error::ErrMode::from_input(i).add_context( - i, - &start, - StrContext::Expected("newline + ".into()), - )); +/// Parse the commit message after the header/message separator. +/// +/// Typical input starts with the blank-line separator before the message, for +/// example `\nsubject\n\nbody\n`. The returned message excludes that first +/// separator newline and borrows all remaining bytes from `i`. +/// +/// On success, `i` is advanced to the empty suffix as commits end with a message. +pub fn message<'a>(i: &mut &'a [u8]) -> ParseResult<&'a BStr> { + if let Some(rest) = i.strip_prefix(parse::NL) { + *i = &[]; + Ok(rest.as_bstr()) + } else { + Err(crate::decode::Error) } - preceded(NL, rest.map(ByteSlice::as_bstr)) - .context(StrContext::Expected( - "a newline separates headers from the message".into(), - )) - .parse_next(i) } -pub fn commit<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( - i: &mut &'a [u8], -) -> ModalResult, E> { - ( - (|i: &mut _| parse::header_field(i, b"tree", parse::hex_hash)) - .context(StrContext::Expected("tree <40 lowercase hex char>".into())), - repeat(0.., |i: &mut _| parse::header_field(i, b"parent", parse::hex_hash)) - .map(|p: Vec<_>| p) - .context(StrContext::Expected( - "zero or more 'parent <40 lowercase hex char>'".into(), - )), - (|i: &mut _| parse::header_field(i, b"author", parse::signature_and_consumed).map(|(_signature, raw)| raw)) - .context(StrContext::Expected("author ".into())), - (|i: &mut _| parse::header_field(i, b"committer", parse::signature_and_consumed).map(|(_signature, raw)| raw)) - .context(StrContext::Expected("committer ".into())), - opt(|i: &mut _| parse::header_field(i, b"encoding", take_till(0.., NL))) - .context(StrContext::Expected("encoding ".into())), - repeat( - 0.., - alt(( - parse::any_header_field_multi_line.map(|(k, o)| (k.as_bstr(), Cow::Owned(o))), - |i: &mut _| { - parse::any_header_field(i, take_till(0.., NL)) - .map(|(k, o)| (k.as_bstr(), Cow::Borrowed(o.as_bstr()))) - }, - )), - ) - .context(StrContext::Expected(" ".into())), - terminated(message, eof), - ) - .map( - |(tree, parents, author, committer, encoding, extra_headers, message)| CommitRef { - tree, - parents: SmallVec::from(parents), - author, - committer, - encoding: encoding.map(ByteSlice::as_bstr), - message, - extra_headers, - }, - ) - .parse_next(i) +/// Parse a complete commit object body. +/// +/// Typical input starts with `tree \n`, followed by zero or more +/// `parent \n` headers, then `author \n` and +/// `committer \n`. An optional `encoding \n` header and any +/// number of extra single-line or multi-line headers may follow. The headers +/// are terminated by a blank line, after which all remaining bytes are the +/// commit message. +/// +/// On success, the returned [`CommitRef`] borrows fields from `i` where +/// possible, and `i` is advanced to the empty suffix. Extra single-line header +/// values are borrowed, while multi-line header values are unfolded into owned +/// buffers. +/// +/// This parser is not transactional as a whole: if a later required field or +/// the final message parse fails, `i` may already have been advanced past +/// earlier successfully parsed fields. +pub fn commit<'a>(i: &mut &'a [u8], hash_kind: gix_hash::Kind) -> ParseResult> { + let tree = parse::header_field(i, b"tree", |value| parse::hex_hash(value, hash_kind))?; + + let mut parents = SmallVec::new(); + loop { + let before = *i; + match parse::header_field(i, b"parent", |value| parse::hex_hash(value, hash_kind)) { + Ok(parent) => parents.push(parent), + Err(_) => { + *i = before; + break; + } + } + } + + let author = parse::header_field(i, b"author", parse::signature_raw)?; + let committer = parse::header_field(i, b"committer", parse::signature_raw)?; + + let before = *i; + let encoding = match parse::header_field(i, b"encoding", Ok) { + Ok(encoding) => Some(encoding.as_bstr()), + Err(_) => { + *i = before; + None + } + }; + + let mut extra_headers = Vec::new(); + loop { + let before = *i; + match parse::any_header_field_multi_line(i) + .map(|(k, v)| (k.as_bstr(), Cow::Owned(v))) + .or_else(|_| { + *i = before; + parse::any_header_field(i).map(|(k, v)| (k.as_bstr(), Cow::Borrowed(v.as_bstr()))) + }) { + Ok(header) => extra_headers.push(header), + Err(_) => { + *i = before; + break; + } + } + } + + let message = message(i)?; + if !i.is_empty() { + return Err(crate::decode::Error); + } + + Ok(CommitRef { + tree, + parents, + author, + committer, + encoding, + message, + extra_headers, + }) } diff --git a/gix-object/src/commit/message/decode.rs b/gix-object/src/commit/message/decode.rs index fddb3d4d6d8..3ef600a6112 100644 --- a/gix-object/src/commit/message/decode.rs +++ b/gix-object/src/commit/message/decode.rs @@ -1,49 +1,26 @@ -use winnow::{ - combinator::{alt, eof, preceded, terminated}, - error::ParserError, - prelude::*, - stream::{Offset, Stream}, - token::{rest, take_till}, -}; - use crate::bstr::{BStr, ByteSlice}; -pub(crate) fn newline<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> ModalResult<&'a [u8], E> { - alt((b"\n", b"\r\n")).parse_next(i) -} - -fn subject_and_body<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> ModalResult<(&'a BStr, Option<&'a BStr>), E> { - let start_i = *i; - let start = i.checkpoint(); - while !i.is_empty() { - match take_till::<_, _, E>(1.., |c| c == b'\n' || c == b'\r').parse_next(i) { - Ok(_) => { - let consumed_bytes = i.offset_from(&start); - match preceded((newline::, newline::), rest).parse_next(i) { - Ok(body) => { - let body = (!body.is_empty()).then(|| body.as_bstr()); - return Ok((start_i[0usize..consumed_bytes].as_bstr(), body)); - } - Err(_) => match i.next_token() { - Some(_) => {} - None => break, - }, - } +/// Returns title and body, without separator +pub fn message_title_and_body(input: &[u8]) -> (&BStr, Option<&BStr>) { + let mut pos = 0; + while pos < input.len() { + if let Some(first_len) = newline_len(&input[pos..]) { + if let Some(second_len) = newline_len(&input[pos + first_len..]) { + let body = &input[pos + first_len + second_len..]; + return (input[..pos].as_bstr(), (!body.is_empty()).then(|| body.as_bstr())); } - Err(_) => match i.next_token() { - Some(_) => {} - None => break, - }, } + pos += 1; } - - i.reset(&start); - rest.map(|r: &[u8]| (r.as_bstr(), None)).parse_next(i) + (input.as_bstr(), None) } -/// Returns title and body, without separator -pub fn message(mut input: &[u8]) -> (&BStr, Option<&BStr>) { - terminated(subject_and_body::<()>, eof) - .parse_next(&mut input) - .expect("cannot fail") +fn newline_len(input: &[u8]) -> Option { + if input.starts_with(b"\r\n") { + Some(2) + } else if input.starts_with(b"\n") { + Some(1) + } else { + None + } } diff --git a/gix-object/src/commit/message/mod.rs b/gix-object/src/commit/message/mod.rs index d79c3772923..986e1a2cb5e 100644 --- a/gix-object/src/commit/message/mod.rs +++ b/gix-object/src/commit/message/mod.rs @@ -60,7 +60,7 @@ impl<'a> MessageRef<'a> { /// /// Note that this cannot fail as everything will be interpreted as title if there is no body separator. pub fn from_bytes(input: &'a [u8]) -> Self { - let (title, body) = decode::message(input); + let (title, body) = decode::message_title_and_body(input); MessageRef { title, body } } diff --git a/gix-object/src/commit/mod.rs b/gix-object/src/commit/mod.rs index 6d0219d4250..6db9331acab 100644 --- a/gix-object/src/commit/mod.rs +++ b/gix-object/src/commit/mod.rs @@ -1,7 +1,6 @@ use std::ops::Range; use bstr::{BStr, BString, ByteSlice}; -use winnow::prelude::*; use crate::parse::parse_signature; use crate::{Commit, CommitRef, TagRef}; @@ -63,12 +62,13 @@ mod write; /// Lifecycle impl<'a> CommitRef<'a> { - /// Deserialize a commit from the given `data` bytes while avoiding most allocations. - pub fn from_bytes(mut data: &'a [u8]) -> Result, crate::decode::Error> { + /// Deserialize a commit from the given `data` bytes while avoiding most allocations, using `hash_kind` to know + /// what kind of hash to expect for validation. + pub fn from_bytes(mut data: &'a [u8], hash_kind: gix_hash::Kind) -> Result, crate::decode::Error> { let input = &mut data; - match decode::commit.parse_next(input) { + match decode::commit(input, hash_kind) { Ok(tag) => Ok(tag), - Err(err) => Err(crate::decode::Error::with_err(err, input)), + Err(err) => Err(err), } } } @@ -89,7 +89,10 @@ impl<'a> CommitRef<'a> { /// Returns a convenient iterator over all extra headers. pub fn extra_headers(&self) -> ExtraHeaders> { - ExtraHeaders::new(self.extra_headers.iter().map(|(k, v)| (*k, v.as_ref()))) + ExtraHeaders::new( + self.extra_headers.iter().map(|(k, v)| (*k, v.as_ref())), + self.tree().kind(), + ) } /// Return the author, with whitespace trimmed. @@ -133,13 +136,17 @@ impl CommitRef<'_> { impl Commit { /// Returns a convenient iterator over all extra headers. pub fn extra_headers(&self) -> ExtraHeaders> { - ExtraHeaders::new(self.extra_headers.iter().map(|(k, v)| (k.as_bstr(), v.as_bstr()))) + ExtraHeaders::new( + self.extra_headers.iter().map(|(k, v)| (k.as_bstr(), v.as_bstr())), + self.tree.kind(), + ) } } /// An iterator over extra headers in [owned][crate::Commit] and [borrowed][crate::CommitRef] commits. pub struct ExtraHeaders { inner: I, + hash_kind: gix_hash::Kind, } /// Instantiation and convenience. @@ -148,8 +155,8 @@ where I: Iterator, { /// Create a new instance from an iterator over tuples of (name, value) pairs. - pub fn new(iter: I) -> Self { - ExtraHeaders { inner: iter } + pub fn new(iter: I, hash_kind: gix_hash::Kind) -> Self { + ExtraHeaders { inner: iter, hash_kind } } /// Find the _value_ of the _first_ header with the given `name`. @@ -176,7 +183,8 @@ where /// A merge tag is a tag object embedded within the respective header field of a commit, making /// it a child object of sorts. pub fn mergetags(self) -> impl Iterator, crate::decode::Error>> { - self.find_all("mergetag").map(|b| TagRef::from_bytes(b)) + let hash_kind = self.hash_kind; + self.find_all("mergetag").map(move |b| TagRef::from_bytes(b, hash_kind)) } /// Return the cryptographic signature provided by gpg/pgp verbatim. diff --git a/gix-object/src/commit/ref_iter.rs b/gix-object/src/commit/ref_iter.rs index 6a7eb2c778a..3b49f3f1c6b 100644 --- a/gix-object/src/commit/ref_iter.rs +++ b/gix-object/src/commit/ref_iter.rs @@ -2,18 +2,11 @@ use std::{borrow::Cow, ops::Range}; use bstr::BStr; use gix_hash::{oid, ObjectId}; -use winnow::{ - combinator::{alt, eof, opt, terminated}, - error::StrContext, - prelude::*, - token::take_till, -}; use crate::{ bstr::ByteSlice, commit::{decode, SignedData, SIGNATURE_FIELD_NAME}, - parse::{self, NL}, - CommitRefIter, + parse, CommitRefIter, }; #[derive(Copy, Clone)] @@ -37,11 +30,13 @@ pub(crate) enum State { /// Lifecycle impl<'a> CommitRefIter<'a> { - /// Create a commit iterator from data. - pub fn from_bytes(data: &'a [u8]) -> CommitRefIter<'a> { + /// Create a commit iterator from the given `data`, using `hash_kind` to know + /// what kind of hash to expect for validation. + pub fn from_bytes(data: &'a [u8], hash_kind: gix_hash::Kind) -> CommitRefIter<'a> { CommitRefIter { data, state: State::default(), + hash_kind, } } } @@ -49,17 +44,21 @@ impl<'a> CommitRefIter<'a> { /// Access impl<'a> CommitRefIter<'a> { /// Parse `data` as commit and return its PGP signature, along with *all non-signature* data as [`SignedData`], or `None` - /// if the commit isn't signed. + /// if the commit isn't signed. All hashes in `data` are parsed as `hash_kind`. /// /// This allows the caller to validate the signature by passing the signed data along with the signature back to the program /// that created it. - pub fn signature(data: &'a [u8]) -> Result, SignedData<'a>)>, crate::decode::Error> { + pub fn signature( + data: &'a [u8], + hash_kind: gix_hash::Kind, + ) -> Result, SignedData<'a>)>, crate::decode::Error> { let mut signature_and_range = None; let raw_tokens = CommitRefIterRaw { data, state: State::default(), offset: 0, + hash_kind, }; for token in raw_tokens { let token = token?; @@ -153,95 +152,91 @@ fn missing_field() -> crate::decode::Error { impl<'a> CommitRefIter<'a> { #[inline] - fn next_inner(mut i: &'a [u8], state: &mut State) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> { + fn next_inner( + mut i: &'a [u8], + state: &mut State, + hash_kind: gix_hash::Kind, + ) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> { let input = &mut i; - match Self::next_inner_(input, state) { + match Self::next_inner_(input, state, hash_kind) { Ok(token) => Ok((*input, token)), - Err(err) => Err(crate::decode::Error::with_err(err, input)), + Err(err) => Err(err), } } fn next_inner_( input: &mut &'a [u8], state: &mut State, - ) -> Result, winnow::error::ErrMode> { + hash_kind: gix_hash::Kind, + ) -> Result, crate::decode::Error> { use State::*; Ok(match state { Tree => { - let tree = (|i: &mut _| parse::header_field(i, b"tree", parse::hex_hash)) - .context(StrContext::Expected("tree <40 lowercase hex char>".into())) - .parse_next(input)?; + let tree = parse::header_field(input, b"tree", |value| parse::hex_hash(value, hash_kind))?; *state = State::Parents; Token::Tree { id: ObjectId::from_hex(tree).expect("parsing validation"), } } Parents => { - let parent = opt(|i: &mut _| parse::header_field(i, b"parent", parse::hex_hash)) - .context(StrContext::Expected("commit <40 lowercase hex char>".into())) - .parse_next(input)?; - match parent { - Some(parent) => Token::Parent { + if input.starts_with(b"parent ") { + let parent = parse::header_field(input, b"parent", |value| parse::hex_hash(value, hash_kind))?; + Token::Parent { id: ObjectId::from_hex(parent).expect("parsing validation"), - }, - None => { - *state = State::Signature { - of: SignatureKind::Author, - }; - Self::next_inner_(input, state)? } + } else { + *state = State::Signature { + of: SignatureKind::Author, + }; + Self::next_inner_(input, state, hash_kind)? } } Signature { ref mut of } => { let who = *of; - let (field_name, err_msg) = match of { + let field_name = match of { SignatureKind::Author => { *of = SignatureKind::Committer; - (&b"author"[..], "author ") + &b"author"[..] } SignatureKind::Committer => { *state = State::Encoding; - (&b"committer"[..], "committer ") + &b"committer"[..] } }; - let signature = (|i: &mut _| parse::header_field(i, field_name, parse::signature)) - .context(StrContext::Expected(err_msg.into())) - .parse_next(input)?; + let signature = parse::header_field(input, field_name, parse::signature)?; match who { SignatureKind::Author => Token::Author { signature }, SignatureKind::Committer => Token::Committer { signature }, } } Encoding => { - let encoding = opt(|i: &mut _| parse::header_field(i, b"encoding", take_till(0.., NL))) - .context(StrContext::Expected("encoding ".into())) - .parse_next(input)?; *state = State::ExtraHeaders; - match encoding { - Some(encoding) => Token::Encoding(encoding.as_bstr()), - None => Self::next_inner_(input, state)?, + if input.starts_with(b"encoding ") { + let encoding = parse::header_field(input, b"encoding", Ok)?; + Token::Encoding(encoding.as_bstr()) + } else { + Self::next_inner_(input, state, hash_kind)? } } ExtraHeaders => { - let extra_header = opt(alt(( - |i: &mut _| parse::any_header_field_multi_line(i).map(|(k, o)| (k.as_bstr(), Cow::Owned(o))), - |i: &mut _| { - parse::any_header_field(i, take_till(0.., NL)) - .map(|(k, o)| (k.as_bstr(), Cow::Borrowed(o.as_bstr()))) - }, - ))) - .context(StrContext::Expected(" ".into())) - .parse_next(input)?; - match extra_header { - Some(extra_header) => Token::ExtraHeader(extra_header), - None => { - *state = State::Message; - Self::next_inner_(input, state)? + if input.starts_with(b"\n") { + *state = State::Message; + Self::next_inner_(input, state, hash_kind)? + } else { + let before = *input; + match parse::any_header_field_multi_line(input) + .map(|(k, o)| (k.as_bstr(), Cow::Owned(o))) + .or_else(|_| { + *input = before; + parse::any_header_field(input).map(|(k, o)| (k.as_bstr(), Cow::Borrowed(o.as_bstr()))) + }) { + Ok(extra_header) => Token::ExtraHeader(extra_header), + Err(err) => return Err(err), } } } Message => { - let message = terminated(decode::message, eof).parse_next(input)?; + let message = decode::message(input)?; debug_assert!( input.is_empty(), "we should have consumed all data - otherwise iter may go forever" @@ -259,7 +254,7 @@ impl<'a> Iterator for CommitRefIter<'a> { if self.data.is_empty() { return None; } - match Self::next_inner(self.data, &mut self.state) { + match Self::next_inner(self.data, &mut self.state, self.hash_kind) { Ok((data, token)) => { self.data = data; Some(Ok(token)) @@ -277,6 +272,7 @@ struct CommitRefIterRaw<'a> { data: &'a [u8], state: State, offset: usize, + hash_kind: gix_hash::Kind, } impl<'a> Iterator for CommitRefIterRaw<'a> { @@ -286,7 +282,7 @@ impl<'a> Iterator for CommitRefIterRaw<'a> { if self.data.is_empty() { return None; } - match CommitRefIter::next_inner(self.data, &mut self.state) { + match CommitRefIter::next_inner(self.data, &mut self.state, self.hash_kind) { Ok((remaining, token)) => { let consumed = self.data.len() - remaining.len(); let start = self.offset; diff --git a/gix-object/src/data.rs b/gix-object/src/data.rs index 1ced8a7f4de..1d16eb581ae 100644 --- a/gix-object/src/data.rs +++ b/gix-object/src/data.rs @@ -16,8 +16,8 @@ impl<'a> Data<'a> { Ok(match self.kind { Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(self.data, self.hash_kind)?), Kind::Blob => ObjectRef::Blob(BlobRef { data: self.data }), - Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(self.data)?), - Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(self.data)?), + Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(self.data, self.hash_kind)?), + Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(self.data, self.hash_kind)?), }) } @@ -34,7 +34,7 @@ impl<'a> Data<'a> { /// `None` if this is not a commit object. pub fn try_into_commit_iter(self) -> Option> { match self.kind { - Kind::Commit => Some(CommitRefIter::from_bytes(self.data)), + Kind::Commit => Some(CommitRefIter::from_bytes(self.data, self.hash_kind)), _ => None, } } @@ -43,7 +43,7 @@ impl<'a> Data<'a> { /// `None` if this is not a tag object. pub fn try_into_tag_iter(self) -> Option> { match self.kind { - Kind::Tag => Some(TagRefIter::from_bytes(self.data)), + Kind::Tag => Some(TagRefIter::from_bytes(self.data, self.hash_kind)), _ => None, } } diff --git a/gix-object/src/lib.rs b/gix-object/src/lib.rs index bdfcc422769..5e153f67530 100644 --- a/gix-object/src/lib.rs +++ b/gix-object/src/lib.rs @@ -109,7 +109,7 @@ pub struct Blob { #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct CommitRef<'a> { - /// HEX hash of tree object we point to. Usually 40 bytes long. + /// HEX hash of tree object we point to. /// /// Use [`tree()`](CommitRef::tree()) to obtain a decoded version of it. #[cfg_attr(feature = "serde", serde(borrow))] @@ -140,6 +140,7 @@ pub struct CommitRef<'a> { pub struct CommitRefIter<'a> { data: &'a [u8], state: commit::ref_iter::State, + hash_kind: gix_hash::Kind, } /// A mutable git commit, representing an annotated state of a working tree along with a reference to its historical commits. @@ -194,6 +195,7 @@ pub struct TagRef<'a> { pub struct TagRefIter<'a> { data: &'a [u8], state: tag::ref_iter::State, + hash_kind: gix_hash::Kind, } /// A mutable git tag. @@ -308,78 +310,14 @@ pub struct Header { /// pub mod decode { - #[cfg(feature = "verbose-object-parsing-errors")] - mod _decode { - /// The type to be used for parse errors. - pub type ParseError = winnow::error::ContextError; - + mod error { pub(crate) fn empty_error() -> Error { - Error { - inner: winnow::error::ContextError::new(), - remaining: Default::default(), - } - } - - /// A type to indicate errors during parsing and to abstract away details related to `nom`. - #[derive(Debug, Clone)] - pub struct Error { - /// The actual error - pub inner: ParseError, - /// Where the error occurred - pub remaining: Vec, - } - - impl Error { - pub(crate) fn with_err(err: winnow::error::ErrMode, remaining: &[u8]) -> Self { - Self { - inner: err.into_inner().expect("we don't have streaming parsers"), - remaining: remaining.to_owned(), - } - } - } - - impl std::fmt::Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "object parsing failed at `{}`", bstr::BStr::new(&self.remaining))?; - if self.inner.context().next().is_some() { - writeln!(f)?; - self.inner.fmt(f)?; - } - Ok(()) - } + Error } - impl std::error::Error for Error { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - self.inner.cause().map(|v| v as &(dyn std::error::Error + 'static)) - } - } - } - - /// - #[cfg(not(feature = "verbose-object-parsing-errors"))] - mod _decode { - /// The type to be used for parse errors, discards everything and is zero size - pub type ParseError = (); - - pub(crate) fn empty_error() -> Error { - Error { inner: () } - } - - /// A type to indicate errors during parsing and to abstract away details related to `nom`. - #[derive(Debug, Clone)] - pub struct Error { - /// The actual error - pub inner: ParseError, - } - - impl Error { - pub(crate) fn with_err(err: winnow::error::ErrMode, _remaining: &[u8]) -> Self { - Self { - inner: err.into_inner().expect("we don't have streaming parsers"), - } - } - } + /// A type to indicate any error occurred during parsing. + #[derive(Debug, Clone, Copy, Default)] + pub struct Error; impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -389,8 +327,8 @@ pub mod decode { impl std::error::Error for Error {} } - pub(crate) use _decode::empty_error; - pub use _decode::{Error, ParseError}; + pub(crate) use error::empty_error; + pub use error::Error; /// Returned by [`loose_header()`] #[derive(Debug, thiserror::Error)] diff --git a/gix-object/src/object/mod.rs b/gix-object/src/object/mod.rs index 82751195743..2a15b08ef36 100644 --- a/gix-object/src/object/mod.rs +++ b/gix-object/src/object/mod.rs @@ -212,8 +212,8 @@ impl<'a> ObjectRef<'a> { Ok(match kind { Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(data, hash_kind)?), Kind::Blob => ObjectRef::Blob(BlobRef { data }), - Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(data)?), - Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(data)?), + Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(data, hash_kind)?), + Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(data, hash_kind)?), }) } diff --git a/gix-object/src/parse.rs b/gix-object/src/parse.rs index 41e24ac6ee3..3e426bd8685 100644 --- a/gix-object/src/parse.rs +++ b/gix-object/src/parse.rs @@ -1,89 +1,152 @@ -use bstr::{BStr, BString, ByteVec}; -use winnow::{ - combinator::{preceded, repeat, terminated}, - error::{AddContext, ParserError, StrContext}, - prelude::*, - token::{take_till, take_until, take_while}, -}; - -use crate::ByteSlice; +use bstr::{BStr, BString, ByteSlice, ByteVec}; pub(crate) const NL: &[u8] = b"\n"; pub(crate) const SPACE: &[u8] = b" "; const SPACE_OR_NL: &[u8] = b" \n"; -pub(crate) fn any_header_field_multi_line<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( - i: &mut &'a [u8], -) -> ModalResult<(&'a [u8], BString), E> { - ( - terminated(take_till(1.., SPACE_OR_NL), SPACE), - ( - take_till(0.., NL), - NL, - repeat(1.., terminated((SPACE, take_until(0.., NL)), NL)).map(|()| ()), - ) - .take() - .map(|o: &[u8]| { - let bytes = o.as_bstr(); - let mut out = BString::from(Vec::with_capacity(bytes.len())); - let mut lines = bytes.lines_with_terminator(); - out.push_str(lines.next().expect("first line")); - for line in lines { - out.push_str(&line[1..]); // cut leading space - } - out - }), - ) - .context(StrContext::Expected("name ".into())) - .parse_next(i) -} +/// The result type shared by object parsers. +pub(crate) type ParseResult = Result; -pub(crate) fn header_field<'a, T, E: ParserError<&'a [u8]>>( - i: &mut &'a [u8], - name: &'static [u8], - parse_value: impl ModalParser<&'a [u8], T, E>, -) -> ModalResult { - terminated(preceded(terminated(name, SPACE), parse_value), NL).parse_next(i) +/// Parse any multi-line object header field. +/// +/// Typical input is `gpgsig -----BEGIN...\n \nnext ...`, where the +/// field name is followed by a space, an initial value line, and at least one +/// continuation line starting with a space. +/// +/// The returned tuple contains the field name and the unfolded value, +/// with the leading space removed from each continuation line. +/// +/// On success, `i` is advanced to the first byte after the final continuation +/// line. +pub(crate) fn any_header_field_multi_line<'a>(i: &mut &'a [u8]) -> ParseResult<(&'a [u8], BString)> { + let mut c = *i; + let input = c; + let name_end = c + .find_byteset(SPACE_OR_NL) + .filter(|pos| *pos > 0) + .ok_or(crate::decode::Error)?; + if c.get(name_end) != Some(&b' ') { + return Err(crate::decode::Error); + } + + c = &c[name_end + 1..]; + let first_line_end = c.find_byte(b'\n').ok_or(crate::decode::Error)?; + c = &c[first_line_end + 1..]; + + let mut continuation_end = name_end + 1 + first_line_end + 1; + let mut continuation_count = 0usize; + while c.first() == Some(&b' ') { + let line_end = c.find_byte(b'\n').ok_or(crate::decode::Error)?; + continuation_end += line_end + 1; + c = &c[line_end + 1..]; + continuation_count += 1; + } + if continuation_count == 0 { + return Err(crate::decode::Error); + } + + let bytes = input[name_end + 1..continuation_end].as_bstr(); + let mut out = BString::from(Vec::with_capacity(bytes.len())); + let mut lines = bytes.lines_with_terminator(); + out.push_str(lines.next().expect("first line")); + for line in lines { + out.push_str(&line[1..]); + } + *i = &input[continuation_end..]; + Ok((input[..name_end].as_bstr(), out)) } -pub(crate) fn any_header_field<'a, T, E: ParserError<&'a [u8]>>( +/// Parse a specific single-line object header field. +/// +/// `name` is the header name without its trailing space, for example `b"tree"` +/// or `b"author"`. Typical input is ` \n...`. The value bytes, +/// excluding the header name, separator, and trailing newline, are passed to +/// `parse_value`. +/// +/// On success, `i` is advanced past the entire header line and the parsed value +/// is returned. +pub(crate) fn header_field<'a, T>( i: &mut &'a [u8], - parse_value: impl ModalParser<&'a [u8], T, E>, -) -> ModalResult<(&'a [u8], T), E> { - terminated((terminated(take_till(1.., SPACE_OR_NL), SPACE), parse_value), NL).parse_next(i) + name: &'static [u8], + parse_value: impl FnOnce(&'a [u8]) -> ParseResult, +) -> ParseResult { + let c = *i; + let Some(rest) = c.strip_prefix(name).and_then(|rest| rest.strip_prefix(SPACE)) else { + return Err(crate::decode::Error); + }; + let Some(nl) = rest.find_byte(b'\n') else { + return Err(crate::decode::Error); + }; + let value = parse_value(&rest[..nl])?; + *i = &rest[nl + 1..]; + Ok(value) } -fn is_hex_digit_lc(b: u8) -> bool { - matches!(b, b'0'..=b'9' | b'a'..=b'f') +/// Parse any single-line object header field and return its raw value. +/// +/// Typical input is ` \n...`. The returned tuple contains the +/// field name and the value bytes without the trailing newline. +/// +/// On success, `i` is advanced past the newline. +pub(crate) fn any_header_field<'a>(i: &mut &'a [u8]) -> ParseResult<(&'a [u8], &'a [u8])> { + let mut c = *i; + let input = c; + let name_end = c + .find_byteset(SPACE_OR_NL) + .filter(|pos| *pos > 0) + .ok_or(crate::decode::Error)?; + if c.get(name_end) != Some(&b' ') { + return Err(crate::decode::Error); + } + c = &c[name_end + 1..]; + if let Some(value_end) = c.find_byte(b'\n') { + let value = &c[..value_end]; + let rest = &c[value_end + 1..]; + *i = rest; + Ok((&input[..name_end], value)) + } else { + Err(crate::decode::Error) + } } -pub fn hex_hash<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> ModalResult<&'a BStr, E> { - take_while( - gix_hash::Kind::shortest().len_in_hex()..=gix_hash::Kind::longest().len_in_hex(), - is_hex_digit_lc, - ) - .verify(|hex: &&[u8]| gix_hash::Kind::all().iter().any(|hk| hk.len_in_hex() == hex.len())) - .map(ByteSlice::as_bstr) - .parse_next(i) +/// Parse a complete hexadecimal object id of the given `hash_kind`. +/// +/// Typical input is a 40-byte SHA-1 hex id or a 64-byte SHA-256 hex id, +/// depending on `hash_kind`. The entire input slice must be ASCII hex and +/// match the expected object hash length. +pub fn hex_hash(i: &[u8], hash_kind: gix_hash::Kind) -> ParseResult<&BStr> { + if i.len() != hash_kind.len_in_hex() || !i.iter().all(u8::is_ascii_hexdigit) { + return Err(crate::decode::Error); + } + Ok(i.as_bstr()) } -pub(crate) fn signature<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( - i: &mut &'a [u8], -) -> ModalResult, E> { - gix_actor::signature::decode(i) +/// Parse a complete actor signature. +/// +/// Typical input is `Name 1700000000 +0000`. +/// The entire input slice must be consumed by +/// `gix_actor`'s signature parser; trailing bytes cause an error. +pub(crate) fn signature(mut i: &[u8]) -> ParseResult> { + let signature = gix_actor::SignatureRef::from_bytes_consuming(&mut i).map_err(|_| crate::decode::Error)?; + if i.is_empty() { + Ok(signature) + } else { + Err(crate::decode::Error) + } } -pub(crate) fn signature_and_consumed<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( - i: &mut &'a [u8], -) -> ModalResult<(gix_actor::SignatureRef<'a>, &'a BStr), E> { - let original = *i; - gix_actor::signature::decode(i).map(|signature| { - let consumed = original.len() - i.len(); - (signature, original[..consumed].as_bstr()) - }) +/// Validate a complete actor signature and return its raw bytes. +/// +/// Typical input is `Name 1700000000 +0000`. On success, the +/// returned [`BStr`] borrows all of `i`. +pub(crate) fn signature_raw(i: &[u8]) -> ParseResult<&BStr> { + signature(i).map(|_| i.as_bstr()) } +/// Parse a complete actor signature from a [`BStr`]. +/// +/// This is a convenience wrapper around [`signature`] for callers that already +/// hold byte-string data. pub(crate) fn parse_signature(raw: &BStr) -> Result, crate::decode::Error> { - gix_actor::SignatureRef::from_bytes::(raw.as_ref()) - .map_err(|err| crate::decode::Error::with_err(err, raw.as_ref())) + signature(raw.as_ref()) } diff --git a/gix-object/src/tag/decode.rs b/gix-object/src/tag/decode.rs index ad7e6ea3a46..2bf67424a26 100644 --- a/gix-object/src/tag/decode.rs +++ b/gix-object/src/tag/decode.rs @@ -1,73 +1,165 @@ -use winnow::{ - combinator::{alt, delimited, eof, opt, preceded, terminated}, - error::{AddContext, ParserError, StrContext}, - prelude::*, - stream::AsChar, - token::{rest, take_until, take_while}, -}; - -use crate::{parse, parse::NL, BStr, ByteSlice, TagRef}; - -pub fn git_tag<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( - i: &mut &'a [u8], -) -> ModalResult, E> { - ( - (|i: &mut _| parse::header_field(i, b"object", parse::hex_hash)) - .context(StrContext::Expected("object <40 lowercase hex char>".into())), - (|i: &mut _| parse::header_field(i, b"type", take_while(1.., AsChar::is_alpha))) - .verify_map(|kind| crate::Kind::from_bytes(kind).ok()) - .context(StrContext::Expected("type ".into())), - (|i: &mut _| parse::header_field(i, b"tag", take_while(1.., |b| b != NL[0]))) - .context(StrContext::Expected("tag ".into())), - opt(|i: &mut _| parse::header_field(i, b"tagger", parse::signature_and_consumed).map(|(_signature, raw)| raw)) - .context(StrContext::Expected("tagger ".into())), - terminated(message, eof), - ) - .map(|(target, kind, tag_version, tagger, (message, pgp_signature))| TagRef { - target, - name: tag_version.as_bstr(), - target_kind: kind, - message, - tagger, - pgp_signature, - }) - .parse_next(i) +use bstr::ByteSlice; + +use crate::{parse, parse::ParseResult, BStr, Kind, TagRef}; + +/// Parse a complete annotated tag object body. +/// +/// Typical input starts with `object \n`, followed by `type \n`, +/// `tag \n`, an optional `tagger \n`, and a message separated +/// from the headers by a blank line. On success, the returned [`TagRef`] borrows +/// all fields from `i` and `i` is advanced to the empty suffix, and it expects +/// to see an entire, fully consumable tag in `i` without any unconsumed content +/// after parsing. +/// +/// This parser is not transactional as a whole: if a later field fails, `i` may +/// already have been advanced past earlier successfully parsed fields. Individual +/// field parsers document their own cursor behaviour. +pub fn git_tag<'a>(i: &mut &'a [u8], hash_kind: gix_hash::Kind) -> ParseResult> { + let target = target(i, hash_kind)?; + let kind = kind(i)?; + let tag_version = name(i)?; + let tagger = tagger_raw(i)?; + + let (message, pgp_signature) = message(i)?; + if !i.is_empty() { + return Err(crate::decode::Error); + } + + Ok(TagRef { + target, + name: tag_version.as_bstr(), + target_kind: kind, + message, + tagger, + pgp_signature, + }) +} + +/// Parse the `object \n` header and return the object id as bytes. +/// +/// Typical input is `object 0123456789012345678901234567890123456789\n`. +/// The hash must match `hash_kind`. Uppercase ASCII hex is also valid. +/// On success, `i` is advanced past the entire header line. +pub(crate) fn target<'a>(i: &mut &'a [u8], hash_kind: gix_hash::Kind) -> ParseResult<&'a BStr> { + parse::header_field(i, b"object", |value| parse::hex_hash(value, hash_kind)) +} + +/// Parse the `type \n` header and return the object kind. +/// +/// Typical inputs are `type commit\n`, `type tree\n`, `type blob\n`, and +/// `type tag\n`. On success, `i` is advanced past the entire header line. +pub(crate) fn kind(i: &mut &[u8]) -> ParseResult { + parse::header_field(i, b"type", |value| { + Kind::from_bytes(value).map_err(|_| crate::decode::Error) + }) +} + +/// Parse the `tag \n` header and return the tag name. +/// +/// A typical input is `tag v1.0.0\n`. The returned name excludes both the +/// `tag ` prefix and the trailing newline, and must be non-empty. On success, +/// `i` is advanced past the entire header line. +pub(crate) fn name<'a>(i: &mut &'a [u8]) -> ParseResult<&'a BStr> { + parse::header_field(i, b"tag", |value| { + (!value.is_empty()).then(|| value.as_bstr()).ok_or(crate::decode::Error) + }) +} + +/// Parse an optional `tagger \n` header and return its raw signature. +/// +/// A typical input is `tagger Name 1700000000 +0000\n`. If +/// the `tagger ` prefix is absent, this returns `Ok(None)`. On success, it +/// returns the signature bytes without the prefix or newline and advances `i` +/// past the entire header line. +pub(crate) fn tagger_raw<'a>(i: &mut &'a [u8]) -> ParseResult> { + if !i.starts_with(b"tagger ") { + return Ok(None); + } + parse::header_field(i, b"tagger", |raw| { + let mut sig = raw; + gix_actor::SignatureRef::from_bytes_consuming(&mut sig).map_err(|_| crate::decode::Error)?; + Ok(raw.as_bstr()) + }) + .map(Some) +} + +/// Parse an optional `tagger \n` header and return the decoded signature. +/// +/// A typical input is `tagger Name 1700000000 +0000\n`. If +/// the `tagger ` prefix is absent, this returns `Ok(None)`. On success, it +/// returns the parsed [`gix_actor::SignatureRef`] and advances `i` past the +/// entire header line. +pub(crate) fn tagger<'a>(i: &mut &'a [u8]) -> ParseResult>> { + if !i.starts_with(b"tagger ") { + return Ok(None); + } + parse::header_field(i, b"tagger", |i| { + let mut sig = i; + let signature = gix_actor::SignatureRef::from_bytes_consuming(&mut sig).map_err(|_| crate::decode::Error)?; + Ok(signature) + }) + .map(Some) } -pub fn message<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> ModalResult<(&'a BStr, Option<&'a BStr>), E> { - const PGP_SIGNATURE_BEGIN: &[u8] = b"\n-----BEGIN PGP SIGNATURE-----"; - const PGP_SIGNATURE_END: &[u8] = b"-----END PGP SIGNATURE-----"; +/// Parse the tag message and its optional PGP signature block. +/// +/// Typical input starts with the blank-line separator before the message, for +/// example `\nrelease notes`. A signed input looks like +/// `\nrelease notes\n-----BEGIN PGP SIGNATURE-----\n...\n-----END PGP SIGNATURE-----`. +/// On success, `i` is always advanced to the empty suffix. The returned tuple +/// contains the message and, if a PGP signature marker is found at the +/// beginning of a line, all bytes from that marker to the end of the input, +/// and notably the end-of-signature marker isn't required. +/// +/// An input consisting only of newlines is accepted as an empty-header message +/// and consumed entirely. In that case, the newlines are returned as part of +/// the message to preserve roundtrips for tags whose body is only the +/// header/message separator. +pub fn message<'a>(i: &mut &'a [u8]) -> ParseResult<(&'a BStr, Option<&'a BStr>)> { + const PGP_SIGNATURE_BEGIN: &[u8] = b"-----BEGIN PGP SIGNATURE-----"; if i.iter().all(|b| *b == b'\n') { - return i.map(|message: &[u8]| (message.as_bstr(), None)).parse_next(i); + let message = i.as_bstr(); + *i = &[]; + return Ok((message, None)); + } + + let Some(rest) = i.strip_prefix(parse::NL) else { + return Err(crate::decode::Error); + }; + + *i = &[]; + if let Some(sig_start) = find_pgp_signature(rest, PGP_SIGNATURE_BEGIN) { + // Truncate newline off the message end. + let message_end = if sig_start > 0 && rest[sig_start - 1] == b'\n' { + sig_start - 1 + } else { + sig_start + }; + let message = rest[..message_end].as_bstr(); + let signature = &rest[sig_start..]; + return Ok((message, (!signature.is_empty()).then(|| signature.as_bstr()))); + } + + Ok((rest.as_bstr(), None)) +} + +/// Find a PGP signature marker that starts at a line boundary. +/// +/// `haystack` is usually the tag message body and `needle` is the marker to +/// search for. On success, the returned index is the marker itself. +fn find_pgp_signature(haystack: &[u8], needle: &[u8]) -> Option { + if haystack.starts_with(needle) { + return Some(0); + } + + let mut offset = 0; + while let Some(pos) = haystack.get(offset..)?.find_byte(b'\n') { + let found = offset + pos + 1; + if haystack[found..].starts_with(needle) { + return Some(found); + } + offset = found; } - delimited( - NL, - alt(( - ( - take_until(0.., PGP_SIGNATURE_BEGIN), - preceded( - NL, - ( - &PGP_SIGNATURE_BEGIN[1..], - take_until(0.., PGP_SIGNATURE_END), - PGP_SIGNATURE_END, - rest, - ) - .take() - .map(|signature: &[u8]| { - if signature.is_empty() { - None - } else { - Some(signature.as_bstr()) - } - }), - ), - ), - rest.map(|rest: &[u8]| (rest, None)), - )), - opt(NL), - ) - .map(|(message, signature)| (message.as_bstr(), signature)) - .parse_next(i) + None } diff --git a/gix-object/src/tag/mod.rs b/gix-object/src/tag/mod.rs index 3ae20f2deaa..62242210aa2 100644 --- a/gix-object/src/tag/mod.rs +++ b/gix-object/src/tag/mod.rs @@ -1,6 +1,5 @@ use crate::parse::parse_signature; use crate::TagRef; -use winnow::prelude::*; mod decode; @@ -12,11 +11,11 @@ pub mod ref_iter; impl<'a> TagRef<'a> { /// Deserialize a tag from `data`. - pub fn from_bytes(mut data: &'a [u8]) -> Result, crate::decode::Error> { + pub fn from_bytes(mut data: &'a [u8], hash_kind: gix_hash::Kind) -> Result, crate::decode::Error> { let input = &mut data; - match decode::git_tag.parse_next(input) { + match decode::git_tag(input, hash_kind) { Ok(tag) => Ok(tag), - Err(err) => Err(crate::decode::Error::with_err(err, input)), + Err(err) => Err(err), } } /// The object this tag points to as `Id`. diff --git a/gix-object/src/tag/ref_iter.rs b/gix-object/src/tag/ref_iter.rs index 11a882ce358..199f4cc8225 100644 --- a/gix-object/src/tag/ref_iter.rs +++ b/gix-object/src/tag/ref_iter.rs @@ -1,14 +1,7 @@ use bstr::BStr; use gix_hash::{oid, ObjectId}; -use winnow::{ - combinator::{eof, opt, terminated}, - error::{ParserError, StrContext}, - prelude::*, - stream::AsChar, - token::take_while, -}; -use crate::{bstr::ByteSlice, parse, parse::NL, tag::decode, Kind, TagRefIter}; +use crate::{bstr::ByteSlice, tag::decode, Kind, TagRefIter}; #[derive(Default, Copy, Clone)] pub(crate) enum State { @@ -21,11 +14,12 @@ pub(crate) enum State { } impl<'a> TagRefIter<'a> { - /// Create a tag iterator from data. - pub fn from_bytes(data: &'a [u8]) -> TagRefIter<'a> { + /// Create a tag iterator from `data`, parsing hashes as `hash_kind`. + pub fn from_bytes(data: &'a [u8], hash_kind: gix_hash::Kind) -> TagRefIter<'a> { TagRefIter { data, state: State::default(), + hash_kind, } } @@ -59,53 +53,49 @@ fn missing_field() -> crate::decode::Error { impl<'a> TagRefIter<'a> { #[inline] - fn next_inner(mut i: &'a [u8], state: &mut State) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> { + fn next_inner( + mut i: &'a [u8], + state: &mut State, + hash_kind: gix_hash::Kind, + ) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> { let input = &mut i; - match Self::next_inner_(input, state) { + match Self::next_inner_(input, state, hash_kind) { Ok(token) => Ok((*input, token)), - Err(err) => Err(crate::decode::Error::with_err(err, input)), + Err(err) => Err(err), } } fn next_inner_( input: &mut &'a [u8], state: &mut State, - ) -> Result, winnow::error::ErrMode> { + hash_kind: gix_hash::Kind, + ) -> Result, crate::decode::Error> { use State::*; Ok(match state { Target => { - let target = (|i: &mut _| parse::header_field(i, b"object", parse::hex_hash)) - .context(StrContext::Expected("object <40 lowercase hex char>".into())) - .parse_next(input)?; + let target = decode::target(input, hash_kind)?; *state = TargetKind; Token::Target { id: ObjectId::from_hex(target).expect("parsing validation"), } } TargetKind => { - let kind = (|i: &mut _| parse::header_field(i, b"type", take_while(1.., AsChar::is_alpha))) - .context(StrContext::Expected("type ".into())) - .parse_next(input)?; - let kind = Kind::from_bytes(kind).map_err(|_| winnow::error::ErrMode::from_input(input))?; + let kind = decode::kind(input)?; *state = Name; Token::TargetKind(kind) } Name => { - let tag_version = (|i: &mut _| parse::header_field(i, b"tag", take_while(1.., |b| b != NL[0]))) - .context(StrContext::Expected("tag ".into())) - .parse_next(input)?; + let tag_version = decode::name(input)?; *state = Tagger; Token::Name(tag_version.as_bstr()) } Tagger => { - let signature = opt(|i: &mut _| parse::header_field(i, b"tagger", parse::signature)) - .context(StrContext::Expected("tagger ".into())) - .parse_next(input)?; *state = Message; + let signature = decode::tagger(input)?; Token::Tagger(signature) } Message => { - let (message, pgp_signature) = terminated(decode::message, eof).parse_next(input)?; + let (message, pgp_signature) = decode::message(input)?; debug_assert!( input.is_empty(), "we should have consumed all data - otherwise iter may go forever" @@ -123,7 +113,7 @@ impl<'a> Iterator for TagRefIter<'a> { if self.data.is_empty() { return None; } - match Self::next_inner(self.data, &mut self.state) { + match Self::next_inner(self.data, &mut self.state, self.hash_kind) { Ok((data, token)) => { self.data = data; Some(Ok(token)) diff --git a/gix-object/src/tree/ref_iter.rs b/gix-object/src/tree/ref_iter.rs index 8d6050614b2..14128ae110a 100644 --- a/gix-object/src/tree/ref_iter.rs +++ b/gix-object/src/tree/ref_iter.rs @@ -1,7 +1,6 @@ use std::ops::ControlFlow; use bstr::BStr; -use winnow::error::ParserError; use crate::{tree, tree::EntryRef, TreeRef, TreeRefIter}; @@ -192,13 +191,8 @@ impl<'a> Iterator for TreeRefIter<'a> { Some(Ok(entry)) } None => { - let failing = self.data; self.data = &[]; - #[allow(clippy::unit_arg)] - Some(Err(crate::decode::Error::with_err( - winnow::error::ErrMode::from_input(&failing), - failing, - ))) + Some(Err(crate::decode::Error)) } } } @@ -214,7 +208,6 @@ impl<'a> TryFrom<&'a [u8]> for tree::EntryMode { mod decode { use bstr::ByteSlice; - use winnow::error::ParserError; use crate::{tree, tree::EntryRef, TreeRef}; @@ -253,11 +246,7 @@ mod decode { while !i.is_empty() { let Some((rest, entry)) = fast_entry(i, hash_len) else { - #[allow(clippy::unit_arg)] - return Err(crate::decode::Error::with_err( - winnow::error::ErrMode::from_input(&i), - i, - )); + return Err(crate::decode::Error); }; i = rest; out.push(entry); diff --git a/gix-object/tests/object/commit/from_bytes.rs b/gix-object/tests/object/commit/from_bytes.rs index 40f6cc478af..42e1a170792 100644 --- a/gix-object/tests/object/commit/from_bytes.rs +++ b/gix-object/tests/object/commit/from_bytes.rs @@ -1,16 +1,18 @@ use gix_actor::SignatureRef; -use gix_object::{bstr::ByteSlice, commit::message::body::TrailerRef, CommitRef, WriteTo}; +use gix_object::{ + bstr::ByteSlice, commit::message::body::TrailerRef, commit::ref_iter::Token, CommitRef, CommitRefIter, WriteTo, +}; use smallvec::SmallVec; use crate::{ commit::{LONG_MESSAGE, MERGE_TAG, SIGNATURE}, - fixture_name, hex_to_id, linus_signature, + fixture_name, fixture_oid, hex_to_id, linus_signature, }; #[test] fn invalid_timestsamp() { assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "invalid-timestamp.txt")) + CommitRef::from_bytes(&fixture_name("commit", "invalid-timestamp.txt"), gix_hash::Kind::Sha1) .expect("auto-correct invalid timestamp by discarding it (time is still valid UTC)"), CommitRef { tree: b"7989dfb2ec2f41914611a22fb30bbc2b3849df9a".as_bstr(), @@ -25,6 +27,82 @@ fn invalid_timestsamp() { ); } +#[test] +fn sha256_with_all_fields_and_signature() -> crate::Result { + let input = b"tree 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef +parent 1111111111111111111111111111111111111111111111111111111111111111 +parent 2222222222222222222222222222222222222222222222222222222222222222 +author Ada Lovelace 1710000000 +0000 +committer Grace Hopper 1710003600 -0230 +encoding ISO-8859-1 +gpgsig -----BEGIN SSH SIGNATURE----- + U1NIU0lHAAAAAQAAADMAAAALc3NoLWVkMjU1MTkAAAAgZXhhbXBsZS1zaGEyNTY= + -----END SSH SIGNATURE----- +mergetag object 3333333333333333333333333333333333333333333333333333333333333333 + type commit + tag nested-sha256 + tagger Release Bot 1710007200 +0530 +\x20 +nested release notes + -----BEGIN PGP SIGNATURE----- + nested-signature + -----END PGP SIGNATURE----- + +sha256 subject + +sha256 body +"; + let commit = CommitRef::from_bytes(input, gix_hash::Kind::Sha256)?; + assert_eq!( + commit.tree, + b"0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef".as_bstr() + ); + assert_eq!(commit.parents.len(), 2); + assert_eq!(commit.encoding, Some(b"ISO-8859-1".as_bstr())); + assert_eq!(commit.author()?.name, b"Ada Lovelace".as_bstr()); + assert_eq!(commit.committer()?.email, b"grace@example.com".as_bstr()); + assert_eq!( + commit.extra_headers().pgp_signature(), + Some( + b"-----BEGIN SSH SIGNATURE----- +U1NIU0lHAAAAAQAAADMAAAALc3NoLWVkMjU1MTkAAAAgZXhhbXBsZS1zaGEyNTY= +-----END SSH SIGNATURE----- +" + .as_bstr() + ) + ); + assert_eq!(commit.extra_headers().mergetags().count(), 1); + assert_eq!(commit.message, b"sha256 subject\n\nsha256 body\n".as_bstr()); + + let tokens = CommitRefIter::from_bytes(input, gix_hash::Kind::Sha256).collect::, _>>()?; + assert!(matches!(tokens[0], Token::Tree { ref id } if id.kind() == gix_hash::Kind::Sha256)); + assert_eq!( + tokens + .iter() + .filter(|token| matches!(token, Token::Parent { .. })) + .count(), + 2 + ); + assert_eq!( + tokens.last(), + Some(&Token::Message(b"sha256 subject\n\nsha256 body\n".as_bstr())) + ); + Ok(()) +} + +#[test] +fn uppercase_tree_id() -> crate::Result { + let input = b"tree 7989DFB2EC2F41914611A22FB30BBC2B3849DF9A +author Name 1312735823 +0518 +committer Name 1312735823 +0518 + +message"; + let commit = CommitRef::from_bytes(input, gix_hash::Kind::Sha1)?; + assert_eq!(commit.tree, b"7989DFB2EC2F41914611A22FB30BBC2B3849DF9A".as_bstr()); + assert_eq!(commit.tree(), hex_to_id("7989dfb2ec2f41914611a22fb30bbc2b3849df9a")); + Ok(()) +} + #[test] fn invalid_email_of_committer() -> crate::Result { let actor = gix_actor::SignatureRef { @@ -35,7 +113,7 @@ fn invalid_email_of_committer() -> crate::Result { let mut buf = vec![]; let backing = fixture_name("commit", "invalid-actor.txt"); - let commit = CommitRef::from_bytes(&backing).expect("ignore strangely formed actor format"); + let commit = CommitRef::from_bytes(&backing, gix_hash::Kind::Sha1).expect("ignore strangely formed actor format"); assert_eq!( commit, CommitRef { @@ -53,7 +131,7 @@ fn invalid_email_of_committer() -> crate::Result { commit.write_to(&mut buf).expect("we can write invalid actors back"); assert_eq!( - CommitRef::from_bytes(&buf).expect("this is the same commit and it can be parsed"), + CommitRef::from_bytes(&buf, gix_hash::Kind::Sha1).expect("this is the same commit and it can be parsed"), commit, "round-tripping works" ); @@ -63,10 +141,11 @@ fn invalid_email_of_committer() -> crate::Result { #[test] fn unsigned() -> crate::Result { + let tree = fixture_oid_hex("1b2dfb4ac5e42080b682fc676e9738c94ce6d54d"); assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "unsigned.txt"))?, + CommitRef::from_bytes(&commit_fixture("unsigned.txt")?, crate::fixture_hash_kind())?, CommitRef { - tree: b"1b2dfb4ac5e42080b682fc676e9738c94ce6d54d".as_bstr(), + tree: tree.as_bytes().as_bstr(), parents: SmallVec::default(), author: b"Sebastian Thiel 1592437401 +0800".as_bstr(), committer: b"Sebastian Thiel 1592437401 +0800".as_bstr(), @@ -80,11 +159,13 @@ fn unsigned() -> crate::Result { #[test] fn whitespace() -> crate::Result { + let tree = fixture_oid_hex("9bed6275068a0575243ba8409253e61af81ab2ff"); + let parent = fixture_oid_hex("26b4df046d1776c123ac69d918f5aec247b58cc6"); assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "whitespace.txt"))?, + CommitRef::from_bytes(&commit_fixture("whitespace.txt")?, crate::fixture_hash_kind())?, CommitRef { - tree: b"9bed6275068a0575243ba8409253e61af81ab2ff".as_bstr(), - parents: SmallVec::from(vec![b"26b4df046d1776c123ac69d918f5aec247b58cc6".as_bstr()]), + tree: tree.as_bytes().as_bstr(), + parents: SmallVec::from(vec![parent.as_bytes().as_bstr()]), author: b"Sebastian Thiel 1592448450 +0800".as_bstr(), committer: b"Sebastian Thiel 1592448450 +0800".as_bstr(), encoding: None, @@ -97,11 +178,13 @@ fn whitespace() -> crate::Result { #[test] fn signed_singleline() -> crate::Result { + let tree = fixture_oid_hex("00fc39317701176e326974ce44f5bd545a32ec0b"); + let parent = fixture_oid_hex("09d8d3a12e161a7f6afb522dbe8900a9c09bce06"); assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "signed-singleline.txt"))?, + CommitRef::from_bytes(&commit_fixture("signed-singleline.txt")?, crate::fixture_hash_kind())?, CommitRef { - tree: b"00fc39317701176e326974ce44f5bd545a32ec0b".as_bstr(), - parents: SmallVec::from(vec![b"09d8d3a12e161a7f6afb522dbe8900a9c09bce06".as_bstr()]), + tree: tree.as_bytes().as_bstr(), + parents: SmallVec::from(vec![parent.as_bytes().as_bstr()]), author: b"Sebastian Thiel 1592391367 +0800".as_bstr(), committer: b"Sebastian Thiel 1592391367 +0800".as_bstr(), encoding: None, @@ -114,13 +197,13 @@ fn signed_singleline() -> crate::Result { #[test] fn mergetag() -> crate::Result { - let fixture = fixture_name("commit", "mergetag.txt"); + let fixture = commit_fixture("mergetag.txt")?; + let tree = fixture_oid_hex("1c61918031bf2c7fab9e17dde3c52a6a9884fcb5"); + let parent_a = fixture_oid_hex("44ebe016df3aad96e3be8f95ec52397728dd7701"); + let parent_b = fixture_oid_hex("8d485da0ddee79d0e6713405694253d401e41b93"); let expected = CommitRef { - tree: b"1c61918031bf2c7fab9e17dde3c52a6a9884fcb5".as_bstr(), - parents: SmallVec::from(vec![ - b"44ebe016df3aad96e3be8f95ec52397728dd7701".as_bstr(), - b"8d485da0ddee79d0e6713405694253d401e41b93".as_bstr(), - ]), + tree: tree.as_bytes().as_bstr(), + parents: SmallVec::from(vec![parent_a.as_bytes().as_bstr(), parent_b.as_bytes().as_bstr()]), author: b"Linus Torvalds 1591996221 -0700".as_bstr(), committer: b"Linus Torvalds 1591996221 -0700".as_bstr(), encoding: None, @@ -130,7 +213,7 @@ fn mergetag() -> crate::Result { std::borrow::Cow::Owned(MERGE_TAG.as_bytes().into()), )], }; - let commit = CommitRef::from_bytes(&fixture)?; + let commit = CommitRef::from_bytes(&fixture, crate::fixture_hash_kind())?; assert_eq!(commit, expected); assert_eq!(commit.extra_headers().find_all("mergetag").count(), 1); assert_eq!(commit.extra_headers().mergetags().count(), 1); @@ -141,11 +224,13 @@ fn mergetag() -> crate::Result { #[test] fn signed() -> crate::Result { + let tree = fixture_oid_hex("00fc39317701176e326974ce44f5bd545a32ec0b"); + let parent = fixture_oid_hex("09d8d3a12e161a7f6afb522dbe8900a9c09bce06"); assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "signed.txt"))?, + CommitRef::from_bytes(&commit_fixture("signed.txt")?, crate::fixture_hash_kind())?, CommitRef { - tree: b"00fc39317701176e326974ce44f5bd545a32ec0b".as_bstr(), - parents: SmallVec::from(vec![b"09d8d3a12e161a7f6afb522dbe8900a9c09bce06".as_bstr()]), + tree: tree.as_bytes().as_bstr(), + parents: SmallVec::from(vec![parent.as_bytes().as_bstr()]), author: b"Sebastian Thiel 1592391367 +0800".as_bstr(), committer: b"Sebastian Thiel 1592391367 +0800".as_bstr(), encoding: None, @@ -158,11 +243,13 @@ fn signed() -> crate::Result { #[test] fn signed_with_encoding() -> crate::Result { + let tree = fixture_oid_hex("1973afa74d87b2bb73fa884aaaa8752aec43ea88"); + let parent = fixture_oid_hex("79c51cc86923e2b8ca0ee5c4eb75e48027133f9a"); assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "signed-with-encoding.txt"))?, + CommitRef::from_bytes(&commit_fixture("signed-with-encoding.txt")?, crate::fixture_hash_kind())?, CommitRef { - tree: b"1973afa74d87b2bb73fa884aaaa8752aec43ea88".as_bstr(), - parents: SmallVec::from(vec![b"79c51cc86923e2b8ca0ee5c4eb75e48027133f9a".as_bstr()]), + tree: tree.as_bytes().as_bstr(), + parents: SmallVec::from(vec![parent.as_bytes().as_bstr()]), author: b"Sebastian Thiel 1592448995 +0800".as_bstr(), committer: b"Sebastian Thiel 1592449083 +0800".as_bstr(), encoding: Some(b"ISO-8859-1".as_bstr()), @@ -175,11 +262,13 @@ fn signed_with_encoding() -> crate::Result { #[test] fn with_encoding() -> crate::Result { + let tree = fixture_oid_hex("4a1c03029e7407c0afe9fc0320b3258e188b115e"); + let parent = fixture_oid_hex("7ca98aad461a5c302cb4c9e3acaaa6053cc67a62"); assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "with-encoding.txt"))?, + CommitRef::from_bytes(&commit_fixture("with-encoding.txt")?, crate::fixture_hash_kind())?, CommitRef { - tree: b"4a1c03029e7407c0afe9fc0320b3258e188b115e".as_bstr(), - parents: SmallVec::from(vec![b"7ca98aad461a5c302cb4c9e3acaaa6053cc67a62".as_bstr()]), + tree: tree.as_bytes().as_bstr(), + parents: SmallVec::from(vec![parent.as_bytes().as_bstr()]), author: b"Sebastian Thiel 1592438199 +0800".as_bstr(), committer: b"Sebastian Thiel 1592438199 +0800".as_bstr(), encoding: Some("ISO-8859-1".into()), @@ -192,10 +281,11 @@ fn with_encoding() -> crate::Result { #[test] fn pre_epoch() -> crate::Result { + let tree = fixture_oid_hex("71cdd4015386b764b178005cad4c88966bc9d61a"); assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "pre-epoch.txt"))?, + CommitRef::from_bytes(&commit_fixture("pre-epoch.txt")?, crate::fixture_hash_kind())?, CommitRef { - tree: b"71cdd4015386b764b178005cad4c88966bc9d61a".as_bstr(), + tree: tree.as_bytes().as_bstr(), parents: SmallVec::default(), author: "Législateur <> -5263834140 +0009".as_bytes().as_bstr(), committer: "Législateur <> -5263834140 +0009".as_bytes().as_bstr(), @@ -210,7 +300,10 @@ fn pre_epoch() -> crate::Result { #[test] fn double_dash_special_time_offset() -> crate::Result { assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "double-dash-date-offset.txt"))?, + CommitRef::from_bytes( + &fixture_name("commit", "double-dash-date-offset.txt"), + gix_hash::Kind::Sha1 + )?, CommitRef { tree: b"0a851d7a2a66084ab10516c406a405d147e974ad".as_bstr(), parents: SmallVec::from(vec![b"31350f4f0f459485eff2131517e3450cf251f6fa".as_bstr()]), @@ -231,13 +324,15 @@ fn with_trailer() -> crate::Result { email: "kim@eagain.st".into(), time: "1631514803 +0200", }; - let backing = fixture_name("commit", "message-with-footer.txt"); - let commit = CommitRef::from_bytes(&backing)?; + let backing = commit_fixture("message-with-footer.txt")?; + let tree = fixture_oid_hex("25a19c29c5e36884c1ad85d8faf23f1246b7961b"); + let parent = fixture_oid_hex("699ae71105dddfcbb9711ed3a92df09e91a04e90"); + let commit = CommitRef::from_bytes(&backing, crate::fixture_hash_kind())?; assert_eq!( commit, CommitRef { - tree: b"25a19c29c5e36884c1ad85d8faf23f1246b7961b".as_bstr(), - parents: SmallVec::from(vec![b"699ae71105dddfcbb9711ed3a92df09e91a04e90".as_bstr()]), + tree: tree.as_bytes().as_bstr(), + parents: SmallVec::from(vec![parent.as_bytes().as_bstr()]), author: "Kim Altintop 1631514803 +0200".as_bytes().as_bstr(), committer: "Kim Altintop 1631514803 +0200".as_bytes().as_bstr(), encoding: None, @@ -312,14 +407,14 @@ instead of depending directly on the lower-level crates. #[test] fn merge() -> crate::Result { + let tree = fixture_oid_hex("0cf16ce8e229b59a761198975f0c0263229faf82"); + let parent_a = fixture_oid_hex("6a6054db4ce3c1e4e6a37f8c4d7acb63a4d6ad71"); + let parent_b = fixture_oid_hex("c91d592913d47ac4e4a76daf16fd649b276e211e"); assert_eq!( - CommitRef::from_bytes(&fixture_name("commit", "merge.txt"))?, + CommitRef::from_bytes(&commit_fixture("merge.txt")?, crate::fixture_hash_kind())?, CommitRef { - tree: b"0cf16ce8e229b59a761198975f0c0263229faf82".as_bstr(), - parents: SmallVec::from(vec![ - b"6a6054db4ce3c1e4e6a37f8c4d7acb63a4d6ad71".as_bstr(), - b"c91d592913d47ac4e4a76daf16fd649b276e211e".as_bstr() - ]), + tree: tree.as_bytes().as_bstr(), + parents: SmallVec::from(vec![parent_a.as_bytes().as_bstr(), parent_b.as_bytes().as_bstr()]), author: b"Sebastian Thiel 1592454703 +0800".as_bstr(), committer: b"Sebastian Thiel 1592454738 +0800".as_bstr(), encoding: Some("ISO-8859-1".into()), @@ -332,8 +427,8 @@ fn merge() -> crate::Result { #[test] fn newline_right_after_signature_multiline_header() -> crate::Result { - let fixture = fixture_name("commit", "signed-whitespace.txt"); - let commit = CommitRef::from_bytes(&fixture)?; + let fixture = commit_fixture("signed-whitespace.txt")?; + let commit = CommitRef::from_bytes(&fixture, crate::fixture_hash_kind())?; let pgp_sig = crate::commit::OTHER_SIGNATURE.as_bstr(); assert_eq!(commit.extra_headers[0].1.as_ref(), pgp_sig); assert_eq!(commit.extra_headers().pgp_signature(), Some(pgp_sig)); @@ -349,8 +444,8 @@ fn newline_right_after_signature_multiline_header() -> crate::Result { #[test] fn bogus_multi_gpgsig_header() -> crate::Result { - let fixture = fixture_name("commit", "bogus-gpgsig-lines-in-git.git.txt"); - let commit = CommitRef::from_bytes(&fixture)?; + let fixture = commit_fixture("bogus-gpgsig-lines-in-git.git.txt")?; + let commit = CommitRef::from_bytes(&fixture, crate::fixture_hash_kind())?; let pgp_sig = b"-----BEGIN PGP SIGNATURE-----".as_bstr(); assert_eq!(commit.extra_headers().pgp_signature(), Some(pgp_sig)); assert_eq!( @@ -362,11 +457,17 @@ fn bogus_multi_gpgsig_header() -> crate::Result { let mut buf = Vec::::new(); commit.write_to(&mut buf)?; - let actual = gix_object::compute_hash(gix_hash::Kind::Sha1, gix_object::Kind::Commit, &buf)?; - assert_eq!( - actual, - hex_to_id("5f549aa2f78314ac37bbd436c8f80aea4c752e07"), - "round-tripping works despite the strangeness" - ); + let hash_kind = crate::fixture_hash_kind(); + let expected = gix_object::compute_hash(hash_kind, gix_object::Kind::Commit, &fixture)?; + let actual = gix_object::compute_hash(hash_kind, gix_object::Kind::Commit, &buf)?; + assert_eq!(actual, expected, "round-tripping works despite the strangeness"); Ok(()) } + +fn commit_fixture(path: &str) -> crate::Result> { + crate::object_fixture(&format!("commit/{path}")) +} + +fn fixture_oid_hex(hex: &str) -> String { + fixture_oid(hex).to_hex().to_string() +} diff --git a/gix-object/tests/object/commit/iter.rs b/gix-object/tests/object/commit/iter.rs index 6991ba14472..7ce34bc13a9 100644 --- a/gix-object/tests/object/commit/iter.rs +++ b/gix-object/tests/object/commit/iter.rs @@ -8,7 +8,7 @@ use crate::{ #[test] fn newline_right_after_signature_multiline_header() -> crate::Result { let data = fixture_name("commit", "signed-whitespace.txt"); - let tokens = CommitRefIter::from_bytes(&data).collect::, _>>()?; + let tokens = CommitRefIter::from_bytes(&data, gix_hash::Kind::Sha1).collect::, _>>()?; assert_eq!(tokens.len(), 7, "mainly a parsing exercise"); match tokens.last().expect("there are tokens") { Token::Message(msg) => { @@ -22,7 +22,7 @@ fn newline_right_after_signature_multiline_header() -> crate::Result { #[test] fn signed_with_encoding() -> crate::Result { let input = fixture_name("commit", "signed-with-encoding.txt"); - let iter = CommitRefIter::from_bytes(&input); + let iter = CommitRefIter::from_bytes(&input, gix_hash::Kind::Sha1); assert_eq!( iter.collect::, _>>()?, vec![ @@ -52,7 +52,8 @@ fn signed_with_encoding() -> crate::Result { #[test] fn whitespace() -> crate::Result { assert_eq!( - CommitRefIter::from_bytes(&fixture_name("commit", "whitespace.txt")).collect::, _>>()?, + CommitRefIter::from_bytes(&fixture_name("commit", "whitespace.txt"), gix_hash::Kind::Sha1) + .collect::, _>>()?, vec![ Token::Tree { id: hex_to_id("9bed6275068a0575243ba8409253e61af81ab2ff") @@ -75,7 +76,8 @@ fn whitespace() -> crate::Result { #[test] fn unsigned() -> crate::Result { assert_eq!( - CommitRefIter::from_bytes(&fixture_name("commit", "unsigned.txt")).collect::, _>>()?, + CommitRefIter::from_bytes(&fixture_name("commit", "unsigned.txt"), gix_hash::Kind::Sha1) + .collect::, _>>()?, vec![ Token::Tree { id: hex_to_id("1b2dfb4ac5e42080b682fc676e9738c94ce6d54d") @@ -95,7 +97,8 @@ fn unsigned() -> crate::Result { #[test] fn signed_singleline() -> crate::Result { assert_eq!( - CommitRefIter::from_bytes(&fixture_name("commit", "signed-singleline.txt")).collect::, _>>()?, + CommitRefIter::from_bytes(&fixture_name("commit", "signed-singleline.txt"), gix_hash::Kind::Sha1) + .collect::, _>>()?, vec![ Token::Tree { id: hex_to_id("00fc39317701176e326974ce44f5bd545a32ec0b") @@ -114,7 +117,7 @@ fn signed_singleline() -> crate::Result { ] ); assert_eq!( - CommitRefIter::from_bytes(&fixture_name("commit", "signed-singleline.txt")) + CommitRefIter::from_bytes(&fixture_name("commit", "signed-singleline.txt"), gix_hash::Kind::Sha1) .parent_ids() .collect::>(), vec![hex_to_id("09d8d3a12e161a7f6afb522dbe8900a9c09bce06")] @@ -125,7 +128,7 @@ fn signed_singleline() -> crate::Result { #[test] fn error_handling() -> crate::Result { let data = fixture_name("commit", "unsigned.txt"); - let iter = CommitRefIter::from_bytes(&data[..data.len() / 2]); + let iter = CommitRefIter::from_bytes(&data[..data.len() / 2], gix_hash::Kind::Sha1); let tokens = iter.collect::>(); assert!( tokens.last().expect("at least the errored token").is_err(), @@ -137,7 +140,7 @@ fn error_handling() -> crate::Result { #[test] fn mergetag() -> crate::Result { let input = fixture_name("commit", "mergetag.txt"); - let iter = CommitRefIter::from_bytes(&input); + let iter = CommitRefIter::from_bytes(&input, gix_hash::Kind::Sha1); assert_eq!( iter.collect::, _>>()?, vec![ @@ -179,7 +182,7 @@ mod method { #[test] fn tree_id() -> crate::Result { let input = fixture_name("commit", "unsigned.txt"); - let iter = CommitRefIter::from_bytes(&input); + let iter = CommitRefIter::from_bytes(&input, gix_hash::Kind::Sha1); assert_eq!( iter.clone().tree_id().ok(), Some(hex_to_id("1b2dfb4ac5e42080b682fc676e9738c94ce6d54d")) @@ -195,7 +198,7 @@ mod method { #[test] fn signatures() -> crate::Result { let input = fixture_name("commit", "unsigned.txt"); - let iter = CommitRefIter::from_bytes(&input); + let iter = CommitRefIter::from_bytes(&input, gix_hash::Kind::Sha1); assert_eq!( iter.signatures().collect::>(), vec![signature("1592437401 +0800"), signature("1592437401 +0800")] @@ -227,7 +230,8 @@ mod method { let expected_signature = expected_signature.into(); let fixture_data = fixture_name("commit", fixture); - let (actual_signature, actual_signed_data) = CommitRefIter::signature(&fixture_data)?.expect("sig present"); + let (actual_signature, actual_signed_data) = + CommitRefIter::signature(&fixture_data, gix_hash::Kind::Sha1)?.expect("sig present"); let expected_signed_data: BString = fixture_data .lines_with_terminator() .enumerate() diff --git a/gix-object/tests/object/commit/mod.rs b/gix-object/tests/object/commit/mod.rs index 022123aa93d..382cbf6fc42 100644 --- a/gix-object/tests/object/commit/mod.rs +++ b/gix-object/tests/object/commit/mod.rs @@ -158,7 +158,7 @@ mod method { #[test] fn tree() -> crate::Result { let fixture = fixture_name("commit", "unsigned.txt"); - let commit = CommitRef::from_bytes(&fixture)?; + let commit = CommitRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?; assert_eq!(commit.tree(), hex_to_id("1b2dfb4ac5e42080b682fc676e9738c94ce6d54d")); assert_eq!(commit.tree, "1b2dfb4ac5e42080b682fc676e9738c94ce6d54d"); Ok(()) @@ -167,7 +167,7 @@ mod method { #[test] fn author_and_committer_trims_signature() -> crate::Result { let backing = fixture_name("commit", "email-with-space.txt"); - let commit = CommitRef::from_bytes(&backing)?; + let commit = CommitRef::from_bytes(&backing, gix_hash::Kind::Sha1)?; std::assert_eq!(commit.author()?, signature("1592437401 +0800")); std::assert_eq!(commit.committer()?, signature("1592437401 +0800")); Ok(()) @@ -178,16 +178,9 @@ mod method { fn invalid() { let fixture = fixture_name("commit", "unsigned.txt"); let partial_commit = &fixture[..fixture.len() / 2]; + assert!(CommitRef::from_bytes(partial_commit, gix_hash::Kind::Sha1).is_err()); assert_eq!( - CommitRef::from_bytes(partial_commit).unwrap_err().to_string(), - if cfg!(feature = "verbose-object-parsing-errors") { - "object parsing failed at ``\nexpected `author `" - } else { - "object parsing failed" - } - ); - assert_eq!( - CommitRefIter::from_bytes(partial_commit) + CommitRefIter::from_bytes(partial_commit, gix_hash::Kind::Sha1) .take_while(Result::is_ok) .count(), 1, @@ -199,8 +192,8 @@ fn invalid() { fn invalid_object_id_length() { let input = b"tree 00000066666666666684666666666666666299297\npare6"; - assert!(CommitRef::from_bytes(input).is_err()); - assert!(CommitRefIter::from_bytes(input) + assert!(CommitRef::from_bytes(input, gix_hash::Kind::Sha1).is_err()); + assert!(CommitRefIter::from_bytes(input, gix_hash::Kind::Sha1) .next() .expect("a decoding error is returned for the first token") .is_err()); @@ -210,8 +203,8 @@ fn invalid_object_id_length() { fn fuzz_artifact_inputs_can_be_parsed_without_panicking() { for path in crate::fuzz_artifact_paths("fuzz_commit") { let input = std::fs::read(path).expect("artifact is readable"); - _ = CommitRef::from_bytes(&input); - _ = CommitRefIter::from_bytes(&input).count(); + _ = CommitRef::from_bytes(&input, gix_hash::Kind::Sha1); + _ = CommitRefIter::from_bytes(&input, gix_hash::Kind::Sha1).count(); } } diff --git a/gix-object/tests/object/encode.rs b/gix-object/tests/object/encode.rs index 1fe1eeecd21..ee9b15c54f5 100644 --- a/gix-object/tests/object/encode.rs +++ b/gix-object/tests/object/encode.rs @@ -6,71 +6,6 @@ enum Error { TryFromError, } -macro_rules! round_trip { - ($owned:ty, $borrowed:ty, $( $files:literal ), +) => { - #[test] - fn round_trip() -> Result<(), Box> { - use std::convert::TryFrom; - use std::io::Write; - use crate::fixture_bytes; - use gix_object::{ObjectRef, Object, WriteTo}; - use bstr::ByteSlice; - - for input_name in &[ - $( $files ),* - ] { - let input = fixture_bytes(input_name); - // Test the parse->borrowed->owned->write chain for an object kind - let mut output = Vec::new(); - let item = <$borrowed>::from_bytes(&input)?; - item.write_to(&mut output)?; - assert_eq!(output.as_bstr(), input.as_bstr(), "borrowed: {input_name}"); - - let item: $owned = item.try_into()?; - output.clear(); - item.write_to(&mut output)?; - assert_eq!(output.as_bstr(), input.as_bstr()); - - // Test the parse->borrowed->owned->write chain for the top-level objects - let item = ObjectRef::from(<$borrowed>::from_bytes(&input)?); - output.clear(); - item.write_to(&mut output)?; - assert_eq!(output.as_bstr(), input.as_bstr(), "object-ref"); - - let item: Object = Object::try_from(item)?; - output.clear(); - item.write_to(&mut output)?; - assert_eq!(output.as_bstr(), input.as_bstr(), "owned"); - - // Test the loose serialisation -> parse chain for an object kind - let item = <$borrowed>::from_bytes(&input)?; - // serialise a borowed item to a tagged loose object - output.clear(); - { - let w = &mut output; - w.write_all(&item.loose_header())?; - item.write_to(w)?; - let parsed = ObjectRef::from_loose(&output, gix_testtools::hash_kind_from_env().unwrap_or_default())?; - let item2 = <$borrowed>::try_from(parsed).or(Err(super::Error::TryFromError))?; - assert_eq!(item2, item, "object-ref loose: {input_name} {:?}\n{:?}", output.as_bstr(), input.as_bstr()); - } - - let item: $owned = item.try_into()?; - // serialise an owned to a tagged loose object - output.clear(); - let w = &mut output; - w.write_all(&item.loose_header())?; - item.write_to(w)?; - let parsed = ObjectRef::from_loose(&output, gix_testtools::hash_kind_from_env().unwrap_or_default())?; - let parsed_borrowed = <$borrowed>::try_from(parsed).or(Err(super::Error::TryFromError))?; - let item2: $owned = parsed_borrowed.try_into().or(Err(super::Error::TryFromError))?; - assert_eq!(item2, item, "object-ref loose owned: {input_name} {:?}\n{:?}", output.as_bstr(), input.as_bstr()); - } - Ok(()) - } - }; -} - /// Needed for roundtripping object types that take a `hash_kind` parameter. /// This is the same as `round_trip`, but for types that have `from_bytes()` with `hash_kind`. macro_rules! round_trip_with_hash_kind { @@ -79,7 +14,7 @@ macro_rules! round_trip_with_hash_kind { fn round_trip() -> Result<(), Box> { use std::convert::TryFrom; use std::io::Write; - use crate::fixture_bytes; + use crate::object_fixture; use gix_object::{ObjectRef, Object, WriteTo}; use bstr::ByteSlice; let hash_kind = crate::fixture_hash_kind(); @@ -87,11 +22,7 @@ macro_rules! round_trip_with_hash_kind { for input_name in &[ $( $files ),* ] { - let input = if let Some(path) = input_name.strip_prefix("tree/") { - crate::tree_fixture(path)? - } else { - fixture_bytes(input_name) - }; + let input = object_fixture(input_name)?; // Test the parse->borrowed->owned->write chain for an object kind let mut output = Vec::new(); let item = <$borrowed>::from_bytes(&input, hash_kind)?; @@ -144,7 +75,7 @@ macro_rules! round_trip_with_hash_kind { } mod tag { - round_trip!( + round_trip_with_hash_kind!( gix_object::Tag, gix_object::TagRef, "tag/empty_missing_nl.txt", @@ -157,7 +88,7 @@ mod tag { } mod commit { - round_trip!( + round_trip_with_hash_kind!( gix_object::Commit, gix_object::CommitRef, "commit/email-with-space.txt", @@ -216,8 +147,74 @@ mod tree { } mod blob { - // It doesn't matter which data we use - it's not interpreted. - round_trip!(gix_object::Blob, gix_object::BlobRef, "tree/everything.tree"); + use std::{convert::TryFrom, io::Write}; + + use bstr::ByteSlice; + use gix_object::{Blob, BlobRef, Object, ObjectRef, WriteTo}; + + use crate::fixture_bytes; + + #[test] + fn round_trip() -> Result<(), Box> { + let input_name = "tree/everything.tree"; + let input = fixture_bytes(input_name); + // It doesn't matter which data we use - it's not interpreted. + + let mut output = Vec::new(); + let item = BlobRef::from_bytes(&input)?; + item.write_to(&mut output)?; + assert_eq!(output.as_bstr(), input.as_bstr(), "borrowed: {input_name}"); + + let item: Blob = item.into(); + output.clear(); + item.write_to(&mut output)?; + assert_eq!(output.as_bstr(), input.as_bstr()); + + let item = ObjectRef::from(BlobRef::from_bytes(&input)?); + output.clear(); + item.write_to(&mut output)?; + assert_eq!(output.as_bstr(), input.as_bstr(), "object-ref"); + + let item: Object = Object::try_from(item)?; + output.clear(); + item.write_to(&mut output)?; + assert_eq!(output.as_bstr(), input.as_bstr(), "owned"); + + let item = BlobRef::from_bytes(&input)?; + output.clear(); + { + let w = &mut output; + w.write_all(&item.loose_header())?; + item.write_to(w)?; + let parsed = ObjectRef::from_loose(&output, gix_testtools::hash_kind_from_env().unwrap_or_default())?; + let item2 = BlobRef::try_from(parsed).or(Err(super::Error::TryFromError))?; + assert_eq!( + item2, + item, + "object-ref loose: {input_name} {:?}\n{:?}", + output.as_bstr(), + input.as_bstr() + ); + } + + let item: Blob = item.into(); + output.clear(); + let w = &mut output; + w.write_all(&item.loose_header())?; + item.write_to(w)?; + let parsed = ObjectRef::from_loose(&output, gix_testtools::hash_kind_from_env().unwrap_or_default())?; + let parsed_borrowed = BlobRef::try_from(parsed).or(Err(super::Error::TryFromError))?; + let item2: Blob = parsed_borrowed.into(); + assert_eq!( + item2, + item, + "object-ref loose owned: {input_name} {:?}\n{:?}", + output.as_bstr(), + input.as_bstr() + ); + + Ok(()) + } } mod loose_header { diff --git a/gix-object/tests/object/main.rs b/gix-object/tests/object/main.rs index fdcc90fd71e..113569e942b 100644 --- a/gix-object/tests/object/main.rs +++ b/gix-object/tests/object/main.rs @@ -99,12 +99,16 @@ fn fixture_name(kind: &str, path: &str) -> Vec { /// Return the object id expected in fixture assertions for the active fixture hash kind. /// -/// Tree fixtures in this test module are authored as SHA-1 data and are rewritten on demand for -/// SHA-256 runs. This helper mirrors that rewrite on the expectation side so tree parsing tests can -/// use one set of source ids for both hash kinds. +/// Object fixtures in this test module are authored as SHA-1 data and are rewritten on demand for +/// SHA-256 runs. This helper mirrors that rewrite on the expectation side so parsing tests can use +/// one set of source ids for both hash kinds. pub fn fixture_oid(hex: &str) -> ObjectId { let oid = hex_to_id(hex); - match fixture_hash_kind() { + translate_fixture_oid(oid, fixture_hash_kind()) +} + +fn translate_fixture_oid(oid: ObjectId, kind: gix_hash::Kind) -> ObjectId { + match kind { gix_hash::Kind::Sha1 => oid, kind => { let mut hasher = gix_hash::hasher(kind); @@ -114,6 +118,19 @@ pub fn fixture_oid(hex: &str) -> ObjectId { } } +/// Load an object fixture and, if needed, rewrite its SHA-1 object ids for the active fixture hash kind. +pub fn object_fixture(path: &str) -> Result> { + if let Some(path) = path.strip_prefix("tree/") { + tree_fixture(path) + } else if let Some(path) = path.strip_prefix("commit/") { + commit_fixture(path) + } else if let Some(path) = path.strip_prefix("tag/") { + tag_fixture(path) + } else { + Ok(fixture_bytes(path)) + } +} + /// Load a tree fixture and, if needed, rewrite its embedded entry ids for the active fixture hash kind. /// /// The on-disk `tree/*.tree` fixtures contain SHA-1-sized ids. For SHA-256 test runs we parse the @@ -126,9 +143,7 @@ pub fn tree_fixture(path: &str) -> Result> { kind => { let mut tree: gix_object::Tree = gix_object::TreeRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?.into(); for entry in &mut tree.entries { - let mut hasher = gix_hash::hasher(kind); - hasher.update(entry.oid.as_bytes()); - entry.oid = hasher.try_finalize()?; + entry.oid = translate_fixture_oid(entry.oid, kind); } let mut out = Vec::with_capacity( fixture.len() + tree.entries.len() * (kind.len_in_bytes() - gix_hash::Kind::Sha1.len_in_bytes()), @@ -139,6 +154,42 @@ pub fn tree_fixture(path: &str) -> Result> { } } +fn commit_fixture(path: &str) -> Result> { + let fixture = fixture_name("commit", path); + match fixture_hash_kind() { + gix_hash::Kind::Sha1 => Ok(fixture), + kind => { + let mut commit = gix_object::CommitRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?.into_owned()?; + commit.tree = translate_fixture_oid(commit.tree, kind); + for parent in &mut commit.parents { + *parent = translate_fixture_oid(*parent, kind); + } + + let mut out = Vec::with_capacity( + fixture.len() + + (1 + commit.parents.len()) * (kind.len_in_bytes() - gix_hash::Kind::Sha1.len_in_bytes()), + ); + commit.write_to(&mut out)?; + Ok(out) + } + } +} + +fn tag_fixture(path: &str) -> Result> { + let fixture = fixture_name("tag", path); + match fixture_hash_kind() { + gix_hash::Kind::Sha1 => Ok(fixture), + kind => { + let mut tag = gix_object::TagRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?.into_owned()?; + tag.target = translate_fixture_oid(tag.target, kind); + + let mut out = Vec::with_capacity(fixture.len() + kind.len_in_bytes() - gix_hash::Kind::Sha1.len_in_bytes()); + tag.write_to(&mut out)?; + Ok(out) + } + } +} + pub fn generated_tree_root_id() -> Result { let root = gix_testtools::scripted_fixture_read_only("make_trees.sh")?; let tree = std::fs::read(root.join("tree.baseline"))?; diff --git a/gix-object/tests/object/tag.rs b/gix-object/tests/object/tag.rs index 33b48441a90..60df56d4c02 100644 --- a/gix-object/tests/object/tag.rs +++ b/gix-object/tests/object/tag.rs @@ -2,6 +2,107 @@ use gix_object::{bstr::ByteSlice, Kind, TagRef, TagRefIter}; use crate::fixture_name; +const PGP_BEGIN_NOT_AT_LINE_START: &[u8] = b"object ffa700b4aca13b80cb6b98a078e7c96804f8e0ec +type commit +tag pgp-marker-in-message + +message text +not-a-signature -----BEGIN PGP SIGNATURE----- +body +-----END PGP SIGNATURE-----"; +const PGP_BEGIN_NOT_AT_LINE_START_MESSAGE: &[u8] = b"message text +not-a-signature -----BEGIN PGP SIGNATURE----- +body +-----END PGP SIGNATURE-----"; +const PGP_SIGNATURE_WITH_TRAILING_TEXT: &[u8] = b"object ffa700b4aca13b80cb6b98a078e7c96804f8e0ec +type commit +tag pgp-signature-with-trailing-text + +message text +-----BEGIN PGP SIGNATURE----- +body +-----END PGP SIGNATURE----- +trailing text"; +const PGP_SIGNATURE_WITH_TRAILING_TEXT_SIGNATURE: &[u8] = b"-----BEGIN PGP SIGNATURE----- +body +-----END PGP SIGNATURE----- +trailing text"; +const PGP_SIGNATURE_WITHOUT_END_MARKER: &[u8] = b"object ffa700b4aca13b80cb6b98a078e7c96804f8e0ec +type commit +tag pgp-signature-without-end-marker + +message text +-----BEGIN PGP SIGNATURE----- +body"; +const PGP_SIGNATURE_WITHOUT_END_MARKER_SIGNATURE: &[u8] = b"-----BEGIN PGP SIGNATURE----- +body"; +const PGP_SIGNATURE_AT_BODY_START: &[u8] = b"object ffa700b4aca13b80cb6b98a078e7c96804f8e0ec +type commit +tag pgp-signature-at-body-start + +-----BEGIN PGP SIGNATURE----- +body"; +const PGP_SIGNATURE_AT_BODY_START_SIGNATURE: &[u8] = b"-----BEGIN PGP SIGNATURE----- +body"; + +#[test] +fn sha256_with_all_fields_and_signature() -> crate::Result { + let input = b"object abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789 +type commit +tag v2.0.0-sha256 +tagger Release Bot 1710007200 +0530 + +Release v2.0.0 + +- ship sha256 object support +- include annotated tag signatures +-----BEGIN PGP SIGNATURE----- +sha256-tag-signature +-----END PGP SIGNATURE----- +"; + let tag = TagRef::from_bytes(input, gix_hash::Kind::Sha256)?; + assert_eq!( + tag.target, + b"abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789".as_bstr() + ); + assert_eq!(tag.target().kind(), gix_hash::Kind::Sha256); + assert_eq!(tag.target_kind, Kind::Commit); + assert_eq!(tag.name, b"v2.0.0-sha256".as_bstr()); + assert_eq!(tag.tagger()?.expect("tagger").name, b"Release Bot".as_bstr()); + assert_eq!( + tag.message, + b"Release v2.0.0 + +- ship sha256 object support +- include annotated tag signatures" + .as_bstr() + ); + assert_eq!( + tag.pgp_signature, + Some( + b"-----BEGIN PGP SIGNATURE----- +sha256-tag-signature +-----END PGP SIGNATURE----- +" + .as_bstr() + ) + ); + + let tokens = TagRefIter::from_bytes(input, gix_hash::Kind::Sha256).collect::, _>>()?; + assert!(matches!( + tokens.first(), + Some(gix_object::tag::ref_iter::Token::Target { id }) if id.kind() == gix_hash::Kind::Sha256 + )); + assert!(matches!( + tokens.last(), + Some(gix_object::tag::ref_iter::Token::Body { + pgp_signature: Some(_), + .. + }) + )); + Ok(()) +} + mod method { use bstr::ByteSlice; use gix_object::TagRef; @@ -12,7 +113,7 @@ mod method { #[test] fn target() -> crate::Result { let fixture = fixture_name("tag", "signed.txt"); - let tag_ref = TagRef::from_bytes(&fixture)?; + let tag_ref = TagRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?; assert_eq!(tag_ref.target(), hex_to_id("ffa700b4aca13b80cb6b98a078e7c96804f8e0ec")); assert_eq!(tag_ref.target, "ffa700b4aca13b80cb6b98a078e7c96804f8e0ec".as_bytes()); @@ -37,7 +138,7 @@ mod method { #[test] fn tagger_trims_signature() -> crate::Result { let fixture = fixture_name("tag", "tagger-with-whitespace.txt"); - let tag = TagRef::from_bytes(&fixture)?; + let tag = TagRef::from_bytes(&fixture, gix_hash::Kind::Sha1)?; std::assert_eq!(tag.tagger()?, Some(signature("1592381636 +0800"))); Ok(()) } @@ -51,7 +152,7 @@ mod iter { #[test] fn empty() -> crate::Result { let tag = fixture_name("tag", "empty.txt"); - let tag_iter = TagRefIter::from_bytes(&tag); + let tag_iter = TagRefIter::from_bytes(&tag, gix_hash::Kind::Sha1); let target_id = hex_to_id("01dd4e2a978a9f5bd773dae6da7aa4a5ac1cdbbc"); let tagger = Some(signature("1592381636 +0800")); assert_eq!( @@ -75,7 +176,8 @@ mod iter { #[test] fn no_tagger() -> crate::Result { assert_eq!( - TagRefIter::from_bytes(&fixture_name("tag", "no-tagger.txt")).collect::, _>>()?, + TagRefIter::from_bytes(&fixture_name("tag", "no-tagger.txt"), gix_hash::Kind::Sha1) + .collect::, _>>()?, vec![ Token::Target { id: hex_to_id("c39ae07f393806ccf406ef966e9a15afc43cc36a") @@ -111,7 +213,8 @@ KLMHist5yj0sw1E4hDTyQa0= #[test] fn whitespace() -> crate::Result { assert_eq!( - TagRefIter::from_bytes(&fixture_name("tag", "whitespace.txt")).collect::, _>>()?, + TagRefIter::from_bytes(&fixture_name("tag", "whitespace.txt"), gix_hash::Kind::Sha1) + .collect::, _>>()?, vec![ Token::Target { id: hex_to_id("01dd4e2a978a9f5bd773dae6da7aa4a5ac1cdbbc") @@ -128,10 +231,31 @@ KLMHist5yj0sw1E4hDTyQa0= Ok(()) } + #[test] + fn pgp_begin_marker_not_at_line_start_is_message() -> crate::Result { + assert_eq!( + TagRefIter::from_bytes(super::PGP_BEGIN_NOT_AT_LINE_START, gix_hash::Kind::Sha1) + .collect::, _>>()?, + vec![ + Token::Target { + id: hex_to_id("ffa700b4aca13b80cb6b98a078e7c96804f8e0ec") + }, + Token::TargetKind(Kind::Commit), + Token::Name(b"pgp-marker-in-message".as_bstr()), + Token::Tagger(None), + Token::Body { + message: super::PGP_BEGIN_NOT_AT_LINE_START_MESSAGE.as_bstr(), + pgp_signature: None + } + ] + ); + Ok(()) + } + #[test] fn error_handling() -> crate::Result { let data = fixture_name("tag", "empty.txt"); - let iter = TagRefIter::from_bytes(&data[..data.len() / 3]); + let iter = TagRefIter::from_bytes(&data[..data.len() / 3], gix_hash::Kind::Sha1); let tokens = iter.collect::>(); assert!( tokens.last().expect("at least the errored token").is_err(), @@ -145,27 +269,38 @@ KLMHist5yj0sw1E4hDTyQa0= fn invalid() { let fixture = fixture_name("tag", "whitespace.txt"); let partial_tag = &fixture[..fixture.len() / 2]; + assert!(TagRef::from_bytes(partial_tag, gix_hash::Kind::Sha1).is_err()); assert_eq!( - TagRef::from_bytes(partial_tag).unwrap_err().to_string(), - if cfg!(feature = "verbose-object-parsing-errors") { - "object parsing failed at `Sebasti`\ninvalid Closing '>' not found\nexpected ` <> <+|->`, `tagger `" - } else { - "object parsing failed" - } - ); - assert_eq!( - TagRefIter::from_bytes(partial_tag).take_while(Result::is_ok).count(), + TagRefIter::from_bytes(partial_tag, gix_hash::Kind::Sha1) + .take_while(Result::is_ok) + .count(), 3, "we can decode some fields before failing" ); } +#[test] +fn uppercase_target_id() -> crate::Result { + let input = b"object FFA700B4ACA13B80CB6B98A078E7C96804F8E0EC +type commit +tag uppercase-target + +message"; + let tag = TagRef::from_bytes(input, gix_hash::Kind::Sha1)?; + assert_eq!(tag.target, b"FFA700B4ACA13B80CB6B98A078E7C96804F8E0EC".as_bstr()); + assert_eq!( + tag.target(), + crate::hex_to_id("ffa700b4aca13b80cb6b98a078e7c96804f8e0ec") + ); + Ok(()) +} + #[test] fn invalid_target_id_length() { let input = b"object 00000066666666666684666666666666666299297\ntype commit\ntag bad\n"; - assert!(TagRef::from_bytes(input).is_err()); - assert!(TagRefIter::from_bytes(input) + assert!(TagRef::from_bytes(input, gix_hash::Kind::Sha1).is_err()); + assert!(TagRefIter::from_bytes(input, gix_hash::Kind::Sha1) .next() .expect("a decoding error is returned for the first token") .is_err()); @@ -174,22 +309,54 @@ fn invalid_target_id_length() { mod from_bytes { use gix_object::{bstr::ByteSlice, Kind, TagRef, WriteTo}; - use crate::{fixture_name, tag::tag_fixture}; + use crate::{fixture_name, fixture_oid, object_fixture}; #[test] fn signed() -> crate::Result { - assert_eq!(TagRef::from_bytes(&fixture_name("tag", "signed.txt"))?, tag_fixture()); + let fixture = tag_fixture("signed.txt")?; + let target = fixture_oid_hex("ffa700b4aca13b80cb6b98a078e7c96804f8e0ec"); + assert_eq!( + TagRef::from_bytes(&fixture, crate::fixture_hash_kind())?, + TagRef { + target: target.as_bytes().as_bstr(), + name: b"1.0.0".as_bstr(), + target_kind: Kind::Commit, + message: b"for the signature".as_bstr(), + pgp_signature: Some( + b"-----BEGIN PGP SIGNATURE----- +Comment: GPGTools - https://gpgtools.org + +iQIzBAABCgAdFiEEw7xSvXbiwjusbsBqZl+Z+p2ZlmwFAlsapyYACgkQZl+Z+p2Z +lmy6Ug/+KzvzqiNpzz1bMVVAzp8NCbiEO3QGYPyeQc521lBwpaTrRYR+oHJY15r3 +OdL5WDysTpjN8N5FNyfmvzkuPdTkK3JlYmO7VRjdA2xu/B6vIZLaOfAowFrhMvKo +8eoqwGcAP3rC5TuWEgzq2qhbjS4JXFLd4NLjWEFqT2Y2UKm+g8TeGOsa/0pF4Nq5 +xeW4qCYR0WcQLFedbpkKHxag2GfaXKvzNNJdqYhVQssNa6BeSmsfDvlWYNe617wV +NvsR/zJT0wHb5SSH+h6QmwA7LQIQF//83Vc3aF7kv9D54r3ibXW5TjZ3WoeTUZO7 +kefkzJ12EYDCFLPhHvXPog518nO8Ot46dX+okrF0/B4N3RFTvjKr7VAGTzv2D/Dg +DrD531S2F71b+JIRh641eeP7bjWFQi3tWLtrEOtjjsKPJfYRMKpYFnAO4UUJ6Rck +Z5fFXEUCO8d5WT56jzKDjmVoY01lA87O1YsP/J+zQAlc9v1k6jqeQ53LZNgTN+ue +5fJuSPT3T43pSOD1VQSr3aZ2Anc4Qu7K8uX9lkpxF9Sc0tDbeCosFLZMWNVp6m+e +cjHJZXWmV4CcRfmLsXzU8s2cR9A0DBvOxhPD1TlKC2JhBFXigjuL9U4Rbq9tdegB +2n8f2douw6624Tn/6Lm4a7AoxmU+CMiYagDxDL3RuZ8CAfh3bn0= +=aIns +-----END PGP SIGNATURE-----" + .as_bstr(), + ), + tagger: Some(b"Sebastian Thiel 1528473343 +0230".as_bstr()), + } + ); Ok(()) } #[test] fn empty() -> crate::Result { - let fixture = fixture_name("tag", "empty.txt"); - let tag_ref = TagRef::from_bytes(&fixture)?; + let fixture = tag_fixture("empty.txt")?; + let target = fixture_oid_hex("01dd4e2a978a9f5bd773dae6da7aa4a5ac1cdbbc"); + let tag_ref = TagRef::from_bytes(&fixture, crate::fixture_hash_kind())?; assert_eq!( tag_ref, TagRef { - target: b"01dd4e2a978a9f5bd773dae6da7aa4a5ac1cdbbc".as_bstr(), + target: target.as_bytes().as_bstr(), name: b"empty".as_bstr(), target_kind: Kind::Commit, message: b"\n".as_bstr(), @@ -197,18 +364,19 @@ mod from_bytes { pgp_signature: None } ); - assert_eq!(tag_ref.size(), 140); + assert_eq!(tag_ref.size(), fixture.len() as u64); Ok(()) } #[test] fn empty_missing_nl() -> crate::Result { - let fixture = fixture_name("tag", "empty_missing_nl.txt"); - let tag_ref = TagRef::from_bytes(&fixture)?; + let fixture = tag_fixture("empty_missing_nl.txt")?; + let target = fixture_oid_hex("01dd4e2a978a9f5bd773dae6da7aa4a5ac1cdbbc"); + let tag_ref = TagRef::from_bytes(&fixture, crate::fixture_hash_kind())?; assert_eq!( tag_ref, TagRef { - target: b"01dd4e2a978a9f5bd773dae6da7aa4a5ac1cdbbc".as_bstr(), + target: target.as_bytes().as_bstr(), name: b"empty".as_bstr(), target_kind: Kind::Commit, message: b"".as_bstr(), @@ -216,16 +384,17 @@ mod from_bytes { pgp_signature: None } ); - assert_eq!(tag_ref.size(), 139); + assert_eq!(tag_ref.size(), fixture.len() as u64); Ok(()) } #[test] fn with_newlines() -> crate::Result { + let target = fixture_oid_hex("ebdf205038b66108c0331aa590388431427493b7"); assert_eq!( - TagRef::from_bytes(&fixture_name("tag", "with-newlines.txt"))?, + TagRef::from_bytes(&tag_fixture("with-newlines.txt")?, crate::fixture_hash_kind())?, TagRef { - target: b"ebdf205038b66108c0331aa590388431427493b7".as_bstr(), + target: target.as_bytes().as_bstr(), name: b"baz".as_bstr(), target_kind: Kind::Commit, message: b"hello\n\nworld".as_bstr(), @@ -238,10 +407,11 @@ mod from_bytes { #[test] fn no_tagger() -> crate::Result { + let target = fixture_oid_hex("c39ae07f393806ccf406ef966e9a15afc43cc36a"); assert_eq!( - TagRef::from_bytes(&fixture_name("tag", "no-tagger.txt"))?, + TagRef::from_bytes(&tag_fixture("no-tagger.txt")?, crate::fixture_hash_kind())?, TagRef { - target: b"c39ae07f393806ccf406ef966e9a15afc43cc36a".as_bstr(), + target: target.as_bytes().as_bstr(), name: b"v2.6.11-tree".as_bstr(), target_kind: Kind::Tree, message: b"This is the 2.6.11 tree object. @@ -268,12 +438,58 @@ KLMHist5yj0sw1E4hDTyQa0= Ok(()) } + #[test] + fn pgp_begin_marker_not_at_line_start_is_message() -> crate::Result { + let tag = TagRef::from_bytes(super::PGP_BEGIN_NOT_AT_LINE_START, gix_hash::Kind::Sha1)?; + assert_eq!(tag.message, super::PGP_BEGIN_NOT_AT_LINE_START_MESSAGE.as_bstr()); + assert_eq!(tag.pgp_signature, None, "it doesn't parse this as PGP signature"); + assert_roundtrip(super::PGP_BEGIN_NOT_AT_LINE_START)?; + Ok(()) + } + + #[test] + fn trailing_text_after_pgp_end_marker_is_signature() -> crate::Result { + let tag = TagRef::from_bytes(super::PGP_SIGNATURE_WITH_TRAILING_TEXT, gix_hash::Kind::Sha1)?; + assert_eq!(tag.message, b"message text".as_bstr()); + assert_eq!( + tag.pgp_signature, + Some(super::PGP_SIGNATURE_WITH_TRAILING_TEXT_SIGNATURE.as_bstr()) + ); + assert_roundtrip(super::PGP_SIGNATURE_WITH_TRAILING_TEXT)?; + Ok(()) + } + + #[test] + fn pgp_begin_marker_without_end_marker_starts_signature() -> crate::Result { + let tag = TagRef::from_bytes(super::PGP_SIGNATURE_WITHOUT_END_MARKER, gix_hash::Kind::Sha1)?; + assert_eq!(tag.message, b"message text".as_bstr()); + assert_eq!( + tag.pgp_signature, + Some(super::PGP_SIGNATURE_WITHOUT_END_MARKER_SIGNATURE.as_bstr()) + ); + assert_roundtrip(super::PGP_SIGNATURE_WITHOUT_END_MARKER)?; + Ok(()) + } + + #[test] + fn pgp_begin_marker_at_body_start_is_signature() -> crate::Result { + let tag = TagRef::from_bytes(super::PGP_SIGNATURE_AT_BODY_START, gix_hash::Kind::Sha1)?; + assert_eq!(tag.message, b"".as_bstr()); + assert_eq!( + tag.pgp_signature, + Some(super::PGP_SIGNATURE_AT_BODY_START_SIGNATURE.as_bstr()) + ); + assert_roundtrip(super::PGP_SIGNATURE_AT_BODY_START)?; + Ok(()) + } + #[test] fn whitespace() -> crate::Result { + let target = fixture_oid_hex("01dd4e2a978a9f5bd773dae6da7aa4a5ac1cdbbc"); assert_eq!( - TagRef::from_bytes(&fixture_name("tag", "whitespace.txt"))?, + TagRef::from_bytes(&tag_fixture("whitespace.txt")?, crate::fixture_hash_kind())?, TagRef { - target: b"01dd4e2a978a9f5bd773dae6da7aa4a5ac1cdbbc".as_bstr(), + target: target.as_bytes().as_bstr(), name: b"whitespace".as_bstr(), target_kind: Kind::Commit, message: b" \ttab\nnewline\n\nlast-with-trailer\n".as_bstr(), @@ -287,7 +503,10 @@ KLMHist5yj0sw1E4hDTyQa0= #[test] fn tagger_without_timestamp() -> crate::Result { assert_eq!( - TagRef::from_bytes(&fixture_name("tag", "tagger-without-timestamp.txt"))?, + TagRef::from_bytes( + &fixture_name("tag", "tagger-without-timestamp.txt"), + gix_hash::Kind::Sha1 + )?, TagRef { target: b"4fcd840c4935e4c7a5ea3552710a0f26b9178c24".as_bstr(), name: b"ChangeLog".as_bstr(), @@ -299,34 +518,20 @@ KLMHist5yj0sw1E4hDTyQa0= ); Ok(()) } -} -fn tag_fixture() -> TagRef<'static> { - TagRef { - target: b"ffa700b4aca13b80cb6b98a078e7c96804f8e0ec".as_bstr(), - name: b"1.0.0".as_bstr(), - target_kind: Kind::Commit, - message: b"for the signature".as_bstr(), - pgp_signature: Some( - b"-----BEGIN PGP SIGNATURE----- -Comment: GPGTools - https://gpgtools.org + fn assert_roundtrip(input: &[u8]) -> crate::Result { + let tag = TagRef::from_bytes(input, gix_hash::Kind::Sha1)?; + let mut out = Vec::new(); + tag.write_to(&mut out)?; + assert_eq!(out, input); + Ok(()) + } -iQIzBAABCgAdFiEEw7xSvXbiwjusbsBqZl+Z+p2ZlmwFAlsapyYACgkQZl+Z+p2Z -lmy6Ug/+KzvzqiNpzz1bMVVAzp8NCbiEO3QGYPyeQc521lBwpaTrRYR+oHJY15r3 -OdL5WDysTpjN8N5FNyfmvzkuPdTkK3JlYmO7VRjdA2xu/B6vIZLaOfAowFrhMvKo -8eoqwGcAP3rC5TuWEgzq2qhbjS4JXFLd4NLjWEFqT2Y2UKm+g8TeGOsa/0pF4Nq5 -xeW4qCYR0WcQLFedbpkKHxag2GfaXKvzNNJdqYhVQssNa6BeSmsfDvlWYNe617wV -NvsR/zJT0wHb5SSH+h6QmwA7LQIQF//83Vc3aF7kv9D54r3ibXW5TjZ3WoeTUZO7 -kefkzJ12EYDCFLPhHvXPog518nO8Ot46dX+okrF0/B4N3RFTvjKr7VAGTzv2D/Dg -DrD531S2F71b+JIRh641eeP7bjWFQi3tWLtrEOtjjsKPJfYRMKpYFnAO4UUJ6Rck -Z5fFXEUCO8d5WT56jzKDjmVoY01lA87O1YsP/J+zQAlc9v1k6jqeQ53LZNgTN+ue -5fJuSPT3T43pSOD1VQSr3aZ2Anc4Qu7K8uX9lkpxF9Sc0tDbeCosFLZMWNVp6m+e -cjHJZXWmV4CcRfmLsXzU8s2cR9A0DBvOxhPD1TlKC2JhBFXigjuL9U4Rbq9tdegB -2n8f2douw6624Tn/6Lm4a7AoxmU+CMiYagDxDL3RuZ8CAfh3bn0= -=aIns ------END PGP SIGNATURE-----" - .as_bstr(), - ), - tagger: Some(b"Sebastian Thiel 1528473343 +0230".as_bstr()), + fn tag_fixture(path: &str) -> crate::Result> { + object_fixture(&format!("tag/{path}")) + } + + fn fixture_oid_hex(hex: &str) -> String { + fixture_oid(hex).to_hex().to_string() } } diff --git a/gix-object/tests/object/tree/from_bytes.rs b/gix-object/tests/object/tree/from_bytes.rs index a360e4523da..917b3bc55e5 100644 --- a/gix-object/tests/object/tree/from_bytes.rs +++ b/gix-object/tests/object/tree/from_bytes.rs @@ -66,12 +66,7 @@ fn invalid() { let fixture = tree_fixture("definitely-special.tree").expect("fixture is valid"); let partial_tree = &fixture[..fixture.len() / 2]; let hash_kind = crate::fixture_hash_kind(); - let err = TreeRef::from_bytes(partial_tree, hash_kind).unwrap_err().to_string(); - if cfg!(feature = "verbose-object-parsing-errors") { - assert!(err.starts_with("object parsing failed at `"), "{err}"); - } else { - assert_eq!(err, "object parsing failed"); - } + assert!(TreeRef::from_bytes(partial_tree, hash_kind).is_err()); assert!( TreeRefIter::from_bytes(partial_tree, hash_kind) .take_while(Result::is_ok) diff --git a/gix-pack/src/data/output/count/objects/mod.rs b/gix-pack/src/data/output/count/objects/mod.rs index 05f06f58663..1f0997f25cd 100644 --- a/gix-pack/src/data/output/count/objects/mod.rs +++ b/gix-pack/src/data/output/count/objects/mod.rs @@ -175,7 +175,7 @@ mod expand { match obj.kind { Tree | Blob => break, Tag => { - id = TagRefIter::from_bytes(obj.data) + id = TagRefIter::from_bytes(obj.data, obj.hash_kind) .target_id() .expect("every tag has a target"); let tmp = db.find(&id, buf1)?; @@ -188,7 +188,7 @@ mod expand { } Commit => { let current_tree_iter = { - let mut commit_iter = CommitRefIter::from_bytes(obj.data); + let mut commit_iter = CommitRefIter::from_bytes(obj.data, obj.hash_kind); let tree_id = commit_iter.tree_id().expect("every commit has a tree"); parent_commit_ids.clear(); for token in commit_iter { @@ -227,9 +227,12 @@ mod expand { push_obj_count_unique( &mut out, seen_objs, commit_id, location, objects, stats, true, ); - CommitRefIter::from_bytes(parent_commit_obj.data) - .tree_id() - .expect("every commit has a tree") + CommitRefIter::from_bytes( + parent_commit_obj.data, + parent_commit_obj.hash_kind, + ) + .tree_id() + .expect("every commit has a tree") }; let parent_tree = { let (parent_tree_obj, location) = db.find(&parent_tree_id, buf2)?; @@ -296,7 +299,7 @@ mod expand { break; } Commit => { - id = CommitRefIter::from_bytes(obj.0.data) + id = CommitRefIter::from_bytes(obj.0.data, obj.0.hash_kind) .tree_id() .expect("every commit has a tree"); stats.expanded_objects += 1; @@ -305,7 +308,7 @@ mod expand { } Blob => break, Tag => { - id = TagRefIter::from_bytes(obj.0.data) + id = TagRefIter::from_bytes(obj.0.data, obj.0.hash_kind) .target_id() .expect("every tag has a target"); stats.expanded_objects += 1; diff --git a/gix-path/tests/path/realpath.rs b/gix-path/tests/path/realpath.rs index 22a13845362..44d6dd11483 100644 --- a/gix-path/tests/path/realpath.rs +++ b/gix-path/tests/path/realpath.rs @@ -19,7 +19,7 @@ fn fuzzed_timeout() -> crate::Result { } )); assert!( - start.elapsed() < Duration::from_millis(if cfg!(windows) { 1000 } else { 500 }), + start.elapsed() < Duration::from_millis(if cfg!(windows) { 2000 } else { 500 }), "took too long: {:.02} , we can't take too much time for this, and should keep the amount of work reasonable\ as paths can be part of URls which sometimes are canonicalized", start.elapsed().as_secs_f32() diff --git a/gix-protocol/Cargo.toml b/gix-protocol/Cargo.toml index 9bf39420a94..a3c2e6c7814 100644 --- a/gix-protocol/Cargo.toml +++ b/gix-protocol/Cargo.toml @@ -97,7 +97,6 @@ bstr = { version = "1.12.0", default-features = false, features = [ "std", "unicode", ] } -winnow = { version = "1.0.0", features = ["simd"] } # for async-client async-trait = { version = "0.1.51", optional = true } diff --git a/gix-protocol/src/remote_progress.rs b/gix-protocol/src/remote_progress.rs index ab27f7d2799..26c6cc5d2c1 100644 --- a/gix-protocol/src/remote_progress.rs +++ b/gix-protocol/src/remote_progress.rs @@ -1,9 +1,4 @@ use bstr::ByteSlice; -use winnow::{ - combinator::{opt, preceded, terminated}, - prelude::*, - token::take_till, -}; /// The information usually found in remote progress messages as sent by a git server during /// fetch, clone and push operations. @@ -71,32 +66,89 @@ impl RemoteProgress<'_> { } } -fn parse_number(i: &mut &[u8]) -> ModalResult { - take_till(0.., |c: u8| !c.is_ascii_digit()) - .try_map(gix_utils::btoi::to_signed) - .parse_next(i) +/// Parse a non-empty prefix of ASCII decimal digits as an unsigned number. +/// +/// On success, `i` is advanced past the parsed digits and the parsed value is +/// returned. If there are no digits at the current position, `None` is +/// returned. If the digit prefix cannot be represented as `usize`, `i` is +/// advanced anyway to avoid retrying the same input and `None` is returned. +fn parse_number(i: &mut &[u8]) -> Option { + let len = i.iter().take_while(|b| b.is_ascii_digit()).count(); + if len == 0 { + return None; + } + let (number, rest) = i.split_at(len); + *i = rest; + gix_utils::btoi::to_signed(number).ok() } -fn next_optional_percentage(i: &mut &[u8]) -> ModalResult, ()> { - opt(terminated( - preceded( - take_till(0.., |c: u8| c.is_ascii_digit()), - parse_number.try_map(u32::try_from), - ), - b"%", - )) - .parse_next(i) +/// Advance `i` to the first ASCII digit in the remaining input. +/// +/// If no digit is present, `i` is advanced to the end of the input. +/// If `i` already starts with a digit, it is left unchanged. +fn skip_until_digit_or_to_end(i: &mut &[u8]) { + let pos = i.iter().position(u8::is_ascii_digit).unwrap_or(i.len()); + *i = &i[pos..]; } -fn next_optional_number(i: &mut &[u8]) -> ModalResult, ()> { - opt(preceded(take_till(0.., |c: u8| c.is_ascii_digit()), parse_number)).parse_next(i) +/// Find and parse the next ASCII decimal number only if it is followed by `%`. +/// +/// For example, `b" 42% (21/50)"` yields `Some(42)` and advances `i` to +/// `b" (21/50)"`, while `b" (21/50)"` yields `None` because the next number is +/// not a percentage. `b" done"` yields `None` with `i` fully consumed, as there +/// are no digits left to parse. +/// +/// If the digit prefix cannot be represented as `u32`, it is treated as +/// absent and `None` is returned with `i` advanced past all consumed bytes. +fn next_optional_percentage(i: &mut &[u8]) -> Option { + let before = *i; + skip_until_digit_or_to_end(i); + let number = parse_number(i)?; + if let Some(rest) = i.strip_prefix(b"%") { + *i = rest; + u32::try_from(number).ok() + } else { + *i = before; + None + } } -fn parse_progress<'i>(line: &mut &'i [u8]) -> ModalResult, ()> { - let action = take_till(1.., |c| c == b':').parse_next(line)?; - let percent = next_optional_percentage.parse_next(line)?; - let step = next_optional_number.parse_next(line)?; - let max = next_optional_number.parse_next(line)?; +/// Find and parse the next ASCII decimal number, if one is present. +/// +/// For example, `b" (21/50)"` yields `Some(21)` and advances `i` to `b"/50)"`. +/// Calling it again on that remainder yields `Some(50)` and advances `i` to +/// `b")"`. If no digit is present, it yields `None` and advances `i` to the +/// empty suffix. +/// +/// If the next digit prefix cannot be represented as `usize`, it is treated as +/// absent and `None` is returned. In that case, `i` is advanced past the digit +/// prefix because [`parse_number`] consumes it before conversion. +fn next_optional_number(i: &mut &[u8]) -> Option { + skip_until_digit_or_to_end(i); + parse_number(i) +} + +/// Parse a remote progress line with a non-empty action followed by `:`. +/// +/// The remainder is scanned leniently for the common progress fields emitted by +/// git servers: an optional percentage, then up to two optional numbers for the +/// current step and maximum. For example, inputs like +/// `b"Receiving objects: 42% (21/50)"` and `b"Resolving deltas: 21/50"` can +/// produce an action plus `percent`, `step`, and `max` values. +/// +/// `line` is advanced as the fields are found. If parsing succeeds, it points at +/// the unconsumed suffix after the parsed progress fields. Inputs without a +/// colon, or with an empty action before the colon, return an error. +fn parse_progress<'i>(line: &mut &'i [u8]) -> Result, ()> { + let action_end = line.iter().position(|b| *b == b':').ok_or(())?; + if action_end == 0 { + return Err(()); + } + let action = &line[..action_end]; + *line = &line[action_end..]; + let percent = next_optional_percentage(line); + let step = next_optional_number(line); + let max = next_optional_number(line); Ok(RemoteProgress { action: action.into(), percent, diff --git a/gix-ref/Cargo.toml b/gix-ref/Cargo.toml index 09a151e7695..fad4f25ff8c 100644 --- a/gix-ref/Cargo.toml +++ b/gix-ref/Cargo.toml @@ -19,6 +19,8 @@ test = true [features] ## Enable support for the SHA-1 hash by enabling the respective feature in the `gix-hash` crate. sha1 = ["gix-hash/sha1"] +## Enable support for the SHA-256 hash by enabling the respective feature in the `gix-hash` crate. +sha256 = ["gix-hash/sha256"] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. serde = ["dep:serde", "gix-hash/serde", "gix-actor/serde", "gix-object/serde"] @@ -35,7 +37,6 @@ gix-lock = { version = "^22.0.0", path = "../gix-lock" } gix-tempfile = { version = "^22.0.0", default-features = false, path = "../gix-tempfile" } thiserror = "2.0.18" -winnow = { version = "1.0.0", features = ["simd"] } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } # packed refs diff --git a/gix-ref/fuzz/Cargo.toml b/gix-ref/fuzz/Cargo.toml index a2105413c10..a1c1fb82f7f 100644 --- a/gix-ref/fuzz/Cargo.toml +++ b/gix-ref/fuzz/Cargo.toml @@ -11,6 +11,7 @@ cargo-fuzz = true anyhow = "1.0.75" arbitrary = { version = "1.3.2", features = ["derive"] } bstr = "1.8.0" +gix-hash = { path = "../../gix-hash", features = ["sha1"] } libfuzzer-sys = "0.4" [dependencies.gix-ref] diff --git a/gix-ref/fuzz/fuzz_targets/fuzz_packed_buffer.rs b/gix-ref/fuzz/fuzz_targets/fuzz_packed_buffer.rs index a60f340cb7d..241c8cc1628 100644 --- a/gix-ref/fuzz/fuzz_targets/fuzz_packed_buffer.rs +++ b/gix-ref/fuzz/fuzz_targets/fuzz_packed_buffer.rs @@ -14,7 +14,7 @@ struct Ctx<'a> { } fn fuzz(ctx: Ctx) -> Result<()> { - let buffer = Buffer::from_bytes(ctx.packed_file_contents)?; + let buffer = Buffer::from_bytes(ctx.packed_file_contents, gix_hash::Kind::Sha1)?; _ = black_box(buffer.iter()?.count()); let full_name_ref: &FullNameRef = BStr::new(ctx.name).try_into()?; diff --git a/gix-ref/src/parse.rs b/gix-ref/src/parse.rs index 0ac9060297c..5a754b18ba9 100644 --- a/gix-ref/src/parse.rs +++ b/gix-ref/src/parse.rs @@ -1,22 +1,47 @@ use gix_object::bstr::{BStr, ByteSlice}; -use winnow::{combinator::alt, error::ParserError, prelude::*, token::take_while}; -fn is_hex_digit_lc(b: u8) -> bool { - matches!(b, b'0'..=b'9' | b'a'..=b'f') +type ParseResult = Result; + +fn is_hex_digit(b: u8) -> bool { + b.is_ascii_hexdigit() +} + +/// Copy from `gix-object`, validating the hash against `hash_kind`. +pub fn hex_hash<'a>(i: &mut &'a [u8], hash_kind: gix_hash::Kind) -> ParseResult<&'a BStr> { + let len = hash_kind.len_in_hex(); + let Some(hex) = i.get(..len) else { + return Err(()); + }; + if !hex.iter().all(|b| is_hex_digit(*b)) { + return Err(()); + } + *i = &i[len..]; + Ok(hex.as_bstr()) } -/// Copy from https://github.com/GitoxideLabs/gitoxide/blob/64872690e60efdd9267d517f4d9971eecd3b875c/gix-object/src/parse.rs#L60-L67 -pub fn hex_hash<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> ModalResult<&'a BStr, E> { - // NOTE: It's important to be able to read all hashes, do not parameterize it. Hashes can be rejected at a later stage - // if needed. - take_while( - gix_hash::Kind::shortest().len_in_hex()..=gix_hash::Kind::longest().len_in_hex(), - is_hex_digit_lc, - ) - .map(ByteSlice::as_bstr) - .parse_next(i) +/// All supported hash lengths, if they match perfectly. +pub fn hex_hash_any<'a>(i: &mut &'a [u8]) -> ParseResult<&'a BStr> { + let max = gix_hash::Kind::longest().len_in_hex(); + let len = i.iter().take(max).take_while(|b| is_hex_digit(**b)).count(); + if !gix_hash::Kind::all().iter().any(|kind| kind.len_in_hex() == len) { + return Err(()); + } + let (hex, rest) = i.split_at(len); + *i = rest; + Ok(hex.as_bstr()) } -pub fn newline<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> ModalResult<&'a [u8], E> { - alt((b"\r\n", b"\n")).parse_next(i) +/// Parse CRLF or LF, independently of the platform. +pub fn newline<'a>(i: &mut &'a [u8]) -> ParseResult<&'a [u8]> { + if let Some(rest) = i.strip_prefix(b"\r\n") { + let out = &i[..2]; + *i = rest; + Ok(out) + } else if let Some(rest) = i.strip_prefix(b"\n") { + let out = &i[..1]; + *i = rest; + Ok(out) + } else { + Err(()) + } } diff --git a/gix-ref/src/store/file/find.rs b/gix-ref/src/store/file/find.rs index 6e49da77e4e..ae347df2042 100644 --- a/gix-ref/src/store/file/find.rs +++ b/gix-ref/src/store/file/find.rs @@ -194,7 +194,7 @@ impl file::Store { Ok(None) } Some(content) => Ok(Some( - loose::Reference::try_from_path(full_name.to_owned(), &content) + loose::Reference::try_from_path(full_name.to_owned(), &content, self.object_hash) .map(Into::into) .map(|mut r: Reference| { if let Some(namespace) = &self.namespace { diff --git a/gix-ref/src/store/file/log/line.rs b/gix-ref/src/store/file/log/line.rs index c0b7a435da7..3d660b53aa7 100644 --- a/gix-ref/src/store/file/log/line.rs +++ b/gix-ref/src/store/file/log/line.rs @@ -74,14 +74,8 @@ impl<'a> From> for Line { /// pub mod decode { use gix_object::bstr::{BStr, ByteSlice}; - use winnow::{ - combinator::{alt, eof, fail, opt, preceded, terminated}, - error::{AddContext, ParserError, StrContext}, - prelude::*, - token::{rest, take_while}, - }; - use crate::{file::log::LineRef, parse::hex_hash}; + use crate::{file::log::LineRef, parse::hex_hash_any}; /// mod error { @@ -116,80 +110,57 @@ pub mod decode { pub use error::Error; impl<'a> LineRef<'a> { - /// Decode a line from the given bytes which are expected to start at a hex sha. - pub fn from_bytes(mut input: &'a [u8]) -> Result, Error> { - one::<()>(&mut input).map_err(|_| Error::new(input)) + /// Decode a reflog line from the given bytes. + /// + /// Valid input starts with the previous object id, the new object id, a + /// signature, and an optional tab-separated message, for example: + /// + /// `0123456789012345678901234567890123456789 89abcdef89abcdef89abcdef89abcdef89abcdef Name 1700000000 +0000\tmessage` + pub fn from_bytes(input: &'a [u8]) -> Result, Error> { + decode(input).map_err(|_| Error::new(first_line(input))) } } - fn message<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> ModalResult<&'a BStr, E> { - if i.is_empty() { - rest.map(ByteSlice::as_bstr).parse_next(i) - } else { - terminated(take_while(0.., |c| c != b'\n'), opt(b'\n')) - .map(ByteSlice::as_bstr) - .parse_next(i) - } + /// Return the first line from `input`, without its trailing newline. + /// + /// If `input` contains no newline, all of `input` is returned. + fn first_line(input: &[u8]) -> &[u8] { + let line_end = input.iter().position(|b| *b == b'\n').unwrap_or(input.len()); + &input[..line_end] } - fn one<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( - bytes: &mut &'a [u8], - ) -> ModalResult, E> { - let mut tokens = bytes.splitn(2, |b| *b == b'\t'); - if let (Some(mut first), Some(mut second)) = (tokens.next(), tokens.next()) { - let (old, new, signature) = ( - terminated(hex_hash, b" ").context(StrContext::Expected("".into())), - terminated(hex_hash, b" ").context(StrContext::Expected("".into())), - gix_actor::signature::decode.context(StrContext::Expected(" <> ".into())), - ) - .context(StrContext::Expected( - r" <> \t".into(), - )) - .parse_next(&mut first)?; - - // forward the buffer🤦‍♂️ - message.parse_next(bytes)?; - let message = message(&mut second)?; - Ok(LineRef { - previous_oid: old, - new_oid: new, - signature, - message, - }) - } else { - ( - ( - terminated(hex_hash, b" ").context(StrContext::Expected("".into())), - terminated(hex_hash, b" ").context(StrContext::Expected("".into())), - gix_actor::signature::decode.context(StrContext::Expected(" <> ".into())), - ) - .context(StrContext::Expected( - r" <> \t".into(), - )), - alt(( - preceded( - b'\t', - message.context(StrContext::Expected("".into())), - ), - b'\n'.value(Default::default()), - eof.value(Default::default()), - fail.context(StrContext::Expected( - "log message must be separated from signature with whitespace".into(), - )), - )), - ) - .map(|((old, new, signature), message)| LineRef { - previous_oid: old, - new_oid: new, - signature, - message, - }) - .parse_next(bytes) - } + /// Parse one reflog line from `bytes`. + /// + /// Only one line is parsed; any bytes after the first newline are + /// ignored. If the line has no tab separator, the message is empty. + /// + /// Return an error if the first line does not match the reflog line + /// format. + fn decode(bytes: &[u8]) -> Result, ()> { + let line = first_line(bytes); + let (mut head, message) = match line.find_byte(b'\t') { + Some(tab) => (&line[..tab], line[tab + 1..].as_bstr()), + None => (line, BStr::new(b"")), + }; + + let old = hex_hash_any(&mut head)?; + head = head.strip_prefix(b" ").ok_or(())?; + let new = hex_hash_any(&mut head)?; + head = head.strip_prefix(b" ").ok_or(())?; + let signature = gix_actor::signature::decode(&mut head).map_err(|_| ())?; + if !head.is_empty() { + return Err(()); + } + Ok(LineRef { + previous_oid: old, + new_oid: new, + signature, + message, + }) } #[cfg(test)] - mod test { + mod test_decode { use super::*; /// Convert a hexadecimal hash into its corresponding `ObjectId` or _panic_. @@ -203,32 +174,18 @@ pub mod decode { } mod invalid { - use gix_testtools::to_bstr_err; - use winnow::{error::TreeError, prelude::*}; - - use super::one; + use super::decode; #[test] fn completely_bogus_shows_error_with_context() { - let err = one::> - .parse_peek(b"definitely not a log entry") - .map_err(to_bstr_err) - .expect_err("this should fail"); - assert!(err.to_string().contains(" ")); + let input = b"definitely not a log entry".as_slice(); + decode(input).expect_err("this should fail"); } #[test] fn missing_whitespace_between_signature_and_message() { let line = "0000000000000000000000000000000000000000 0000000000000000000000000000000000000000 one 1234567890 -0000message"; - let err = one::> - .parse_peek(line.as_bytes()) - .map_err(to_bstr_err) - .expect_err("this should fail"); - assert!( - err.to_string() - .contains("log message must be separated from signature with whitespace"), - "expected\n `log message must be separated from signature with whitespace`\nin\n```\n{err}\n```" - ); + decode(line.as_bytes()).expect_err("this should fail"); } } @@ -240,10 +197,7 @@ pub mod decode { let line_with_nl = with_newline(line_without_nl.clone()); for input in &[line_without_nl, line_with_nl] { assert_eq!( - one::> - .parse_peek(input) - .expect("successful parsing") - .1, + decode(input.as_slice()).expect("successful parsing"), LineRef { previous_oid: NULL_SHA1.as_bstr(), new_oid: NULL_SHA1.as_bstr(), @@ -264,10 +218,7 @@ pub mod decode { let line_with_nl = with_newline(line_without_nl.clone()); for input in &[line_without_nl, line_with_nl] { - let (remaining, res) = one::> - .parse_peek(input) - .expect("successful parsing"); - assert!(remaining.is_empty(), "all consuming even without trailing newline"); + let res = decode(input.as_slice()).expect("successful parsing"); let actual = LineRef { previous_oid: b"a5828ae6b52137b913b978e16cd2334482eb4c1f".as_bstr(), new_oid: b"89b43f80a514aee58b662ad606e6352e03eaeee4".as_bstr(), @@ -290,20 +241,8 @@ pub mod decode { #[test] fn two_lines_in_a_row_with_and_without_newline() { let lines = b"0000000000000000000000000000000000000000 0000000000000000000000000000000000000000 one 1234567890 -0000\t\n0000000000000000000000000000000000000000 0000000000000000000000000000000000000000 two 1234567890 -0000\thello"; - let (remainder, parsed) = one::> - .parse_peek(lines) - .expect("parse single line"); + let parsed = decode(lines.as_slice()).expect("parse single line"); assert_eq!(parsed.message, b"".as_bstr(), "first message is empty"); - - let (remainder, parsed) = one::> - .parse_peek(remainder) - .expect("parse single line"); - assert_eq!( - parsed.message, - b"hello".as_bstr(), - "second message is not and contains no newline" - ); - assert!(remainder.is_empty()); } } } diff --git a/gix-ref/src/store/file/loose/reference/decode.rs b/gix-ref/src/store/file/loose/reference/decode.rs index c18e0afb204..c47b10beb71 100644 --- a/gix-ref/src/store/file/loose/reference/decode.rs +++ b/gix-ref/src/store/file/loose/reference/decode.rs @@ -1,16 +1,7 @@ use gix_hash::ObjectId; use gix_object::bstr::BString; -use winnow::{ - combinator::{opt, terminated}, - prelude::*, - token::take_while, -}; -use crate::{ - parse::{hex_hash, newline}, - store_impl::file::loose::Reference, - FullName, Target, -}; +use crate::{parse::hex_hash, store_impl::file::loose::Reference, FullName, Target}; enum MaybeUnsafeState { Id(ObjectId), @@ -52,12 +43,12 @@ impl TryFrom for Target { } impl Reference { - /// Create a new reference of the given `parent` store with `relative_path` service as unique identifier - /// at which the `path_contents` was read to obtain the refs value. - pub fn try_from_path(name: FullName, mut path_contents: &[u8]) -> Result { + /// Create a new reference named `name` from the loose reference file contents in `path_contents`, + /// parsing object ids as `hash_kind`. + pub fn try_from_path(name: FullName, path_contents: &[u8], hash_kind: gix_hash::Kind) -> Result { Ok(Reference { name, - target: parse(&mut path_contents) + target: parse(path_contents, hash_kind) .map_err(|_| Error::Parse { content: path_contents.into(), })? @@ -66,14 +57,31 @@ impl Reference { } } -fn parse(i: &mut &[u8]) -> ModalResult { - if let Some(_ref_prefix) = opt(terminated("ref: ", take_while(0.., b' '))).parse_next(i)? { - terminated(take_while(0.., |b| b != b'\r' && b != b'\n'), opt(newline)) - .map(|path| MaybeUnsafeState::UnvalidatedPath(path.into())) - .parse_next(i) +/// Parse the contents of a loose reference file. +/// +/// A *symbolic* reference starts with `ref: `, may have additional spaces before +/// the path, and returns [`MaybeUnsafeState::UnvalidatedPath`] with the path +/// bytes up to the next line ending or the end of input. The path is validated +/// later when it is converted into a [`Target`]. +/// +/// A *direct* reference starts with a hexadecimal object id and returns +/// [`MaybeUnsafeState::Id`]. +/// +/// If neither reference form can be parsed, an error is returned. +fn parse(mut i: &[u8], hash_kind: gix_hash::Kind) -> Result { + if let Some(rest) = i.strip_prefix(b"ref: ") { + i = rest; + while i.first() == Some(&b' ') { + i = &i[1..]; + } + let path_end = i.iter().position(|b| *b == b'\r' || *b == b'\n').unwrap_or(i.len()); + let path = i[..path_end].into(); + Ok(MaybeUnsafeState::UnvalidatedPath(path)) } else { - terminated(hex_hash, opt(newline)) - .map(|hex| MaybeUnsafeState::Id(ObjectId::from_hex(hex).expect("prior validation"))) - .parse_next(i) + let hex = hex_hash(&mut i, hash_kind)?; + if i.first().is_some_and(u8::is_ascii_hexdigit) { + return Err(()); + } + Ok(MaybeUnsafeState::Id(ObjectId::from_hex(hex).expect("prior validation"))) } } diff --git a/gix-ref/src/store/file/overlay_iter.rs b/gix-ref/src/store/file/overlay_iter.rs index 7a2368cebab..e7886ce46cc 100644 --- a/gix-ref/src/store/file/overlay_iter.rs +++ b/gix-ref/src/store/file/overlay_iter.rs @@ -21,6 +21,7 @@ use crate::{ pub struct LooseThenPacked<'p, 's> { git_dir: &'s Path, common_dir: Option<&'s Path>, + hash_kind: gix_hash::Kind, namespace: Option<&'s Namespace>, iter_packed: Option>>, iter_git_dir: Peekable, @@ -97,7 +98,7 @@ impl<'p> LooseThenPacked<'p, '_> { source: err, path: refpath.to_owned(), })?; - loose::Reference::try_from_path(name, buf) + loose::Reference::try_from_path(name, buf, self.hash_kind) .map_err(|err| { let relative_path = refpath .strip_prefix(git_dir) @@ -432,6 +433,7 @@ impl file::Store { Ok(LooseThenPacked { git_dir: self.git_dir(), common_dir: self.common_dir(), + hash_kind: self.object_hash, iter_packed: match packed { Some(packed) => Some( match git_dir_info.prefix() { diff --git a/gix-ref/src/store/file/packed.rs b/gix-ref/src/store/file/packed.rs index cfbe62922d4..30d7f015fc0 100644 --- a/gix-ref/src/store/file/packed.rs +++ b/gix-ref/src/store/file/packed.rs @@ -26,7 +26,11 @@ impl file::Store { /// Note that it will automatically be memory mapped if it exceeds the default threshold of 32KB. /// Change the threshold with [file::Store::set_packed_buffer_mmap_threshold()]. pub fn open_packed_buffer(&self) -> Result, packed::buffer::open::Error> { - match packed::Buffer::open(self.packed_refs_path(), self.packed_buffer_mmap_threshold) { + match packed::Buffer::open( + self.packed_refs_path(), + self.packed_buffer_mmap_threshold, + self.object_hash, + ) { Ok(buf) => Ok(Some(buf)), Err(packed::buffer::open::Error::Io(err)) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), Err(err) => Err(err), diff --git a/gix-ref/src/store/file/raw_ext.rs b/gix-ref/src/store/file/raw_ext.rs index 760cceb9ecb..320690c146e 100644 --- a/gix-ref/src/store/file/raw_ext.rs +++ b/gix-ref/src/store/file/raw_ext.rs @@ -160,24 +160,21 @@ impl ReferenceExt for Reference { let mut oid = self.follow_to_object_packed(store, packed)?; let mut buf = Vec::new(); let peeled_id = loop { - let gix_object::Data { - kind, - data, - hash_kind: _, - } = objects - .try_find(&oid, &mut buf)? - .ok_or_else(|| peel::to_id::Error::NotFound { - oid, - name: self.name.0.clone(), - })?; + let gix_object::Data { kind, data, hash_kind } = + objects + .try_find(&oid, &mut buf)? + .ok_or_else(|| peel::to_id::Error::NotFound { + oid, + name: self.name.0.clone(), + })?; match kind { gix_object::Kind::Tag => { - oid = gix_object::TagRefIter::from_bytes(data).target_id().map_err(|_err| { - peel::to_id::Error::NotFound { + oid = gix_object::TagRefIter::from_bytes(data, hash_kind) + .target_id() + .map_err(|_err| peel::to_id::Error::NotFound { oid, name: self.name.0.clone(), - } - })?; + })?; } _ => break oid, } diff --git a/gix-ref/src/store/file/transaction/prepare.rs b/gix-ref/src/store/file/transaction/prepare.rs index 8b3e7cf9f23..5052bdd8047 100644 --- a/gix-ref/src/store/file/transaction/prepare.rs +++ b/gix-ref/src/store/file/transaction/prepare.rs @@ -34,7 +34,7 @@ impl Transaction<'_, '_> { .and_then(|maybe_loose| { maybe_loose .map(|buf| { - loose::Reference::try_from_path(change.update.name.clone(), &buf) + loose::Reference::try_from_path(change.update.name.clone(), &buf, store.object_hash) .map(Reference::from) .map_err(Error::from) }) diff --git a/gix-ref/src/store/packed/buffer.rs b/gix-ref/src/store/packed/buffer.rs index 2283cf9873b..b1433d1d82d 100644 --- a/gix-ref/src/store/packed/buffer.rs +++ b/gix-ref/src/store/packed/buffer.rs @@ -19,21 +19,21 @@ impl AsRef<[u8]> for packed::Backing { pub mod open { use std::path::PathBuf; - use winnow::{prelude::*, stream::Offset}; - use crate::store_impl::packed; /// Initialization impl packed::Buffer { - fn open_with_backing(backing: packed::Backing, path: PathBuf) -> Result { + fn open_with_backing( + backing: packed::Backing, + path: PathBuf, + hash_kind: gix_hash::Kind, + ) -> Result { let (backing, offset) = { let (offset, sorted) = { let mut input = backing.as_ref(); if *input.first().unwrap_or(&b' ') == b'#' { - let header = packed::decode::header::<()> - .parse_next(&mut input) - .map_err(|_| Error::HeaderParsing)?; - let offset = input.offset_from(&backing.as_ref()); + let header = packed::decode::header(&mut input).map_err(|_| Error::HeaderParsing)?; + let offset = backing.as_ref().len() - input.len(); (offset, header.sorted) } else { (0, false) @@ -42,7 +42,8 @@ pub mod open { if !sorted { // this implementation is likely slower than what git does, but it's less code, too. - let mut entries = packed::Iter::new(&backing.as_ref()[offset..])?.collect::, _>>()?; + let mut entries = + packed::Iter::new(&backing.as_ref()[offset..], hash_kind)?.collect::, _>>()?; entries.sort_by_key(|e| e.name.as_bstr()); let mut serialized = Vec::::new(); for entry in entries { @@ -65,14 +66,20 @@ pub mod open { offset, data: backing, path, + hash_kind, }) } - /// Open the file at `path` and map it into memory if the file size is larger than `use_memory_map_if_larger_than_bytes`. + /// Open the file at `path`, parsing object ids as `hash_kind`, and map it into memory if the file size is larger + /// than `use_memory_map_if_larger_than_bytes`. /// /// In order to allow fast lookups and optimizations, the contents of the packed refs must be sorted. /// If that's not the case, they will be sorted on the fly with the data being written into a memory buffer. - pub fn open(path: PathBuf, use_memory_map_if_larger_than_bytes: u64) -> Result { + pub fn open( + path: PathBuf, + use_memory_map_if_larger_than_bytes: u64, + hash_kind: gix_hash::Kind, + ) -> Result { let backing = if std::fs::metadata(&path)?.len() <= use_memory_map_if_larger_than_bytes { packed::Backing::InMemory(std::fs::read(&path)?) } else { @@ -84,16 +91,17 @@ pub mod open { }, ) }; - Self::open_with_backing(backing, path) + Self::open_with_backing(backing, path, hash_kind) } - /// Open a buffer from `bytes`, which is the content of a typical `packed-refs` file. + /// Open a buffer from `bytes`, which is the content of a typical `packed-refs` file, parsing object ids as + /// `hash_kind`. /// /// In order to allow fast lookups and optimizations, the contents of the packed refs must be sorted. /// If that's not the case, they will be sorted on the fly. - pub fn from_bytes(bytes: &[u8]) -> Result { + pub fn from_bytes(bytes: &[u8], hash_kind: gix_hash::Kind) -> Result { let backing = packed::Backing::InMemory(bytes.into()); - Self::open_with_backing(backing, PathBuf::from("")) + Self::open_with_backing(backing, PathBuf::from(""), hash_kind) } } diff --git a/gix-ref/src/store/packed/decode.rs b/gix-ref/src/store/packed/decode.rs index 09da3ce86d4..c53c670b122 100644 --- a/gix-ref/src/store/packed/decode.rs +++ b/gix-ref/src/store/packed/decode.rs @@ -1,15 +1,6 @@ use gix_object::bstr::{BStr, ByteSlice}; -use winnow::{ - combinator::{delimited, opt, preceded, terminated}, - error::{FromExternalError, ParserError}, - prelude::*, - token::take_while, -}; -use crate::{ - parse::{hex_hash, newline}, - store_impl::packed, -}; +use crate::{parse, store_impl::packed}; #[derive(Debug, PartialEq, Eq)] enum Peeled { @@ -34,47 +25,75 @@ impl Default for Header { } } -fn until_newline<'a, E>(input: &mut &'a [u8]) -> ModalResult<&'a BStr, E> -where - E: ParserError<&'a [u8]>, -{ - terminated(take_while(0.., |b: u8| b != b'\r' && b != b'\n'), newline) - .map(ByteSlice::as_bstr) - .parse_next(input) +/// Return the bytes before the next line ending as a [`BStr`]. +/// +/// On success, `input` is advanced past the line ending. The returned slice +/// does not include the line ending. +fn until_line_end_without_separator<'a>(input: &mut &'a [u8]) -> Result<&'a BStr, ()> { + let line_end = input.iter().position(|b| *b == b'\r' || *b == b'\n').ok_or(())?; + let out = input[..line_end].as_bstr(); + let mut maybe_start_of_newline = &input[line_end..]; + parse::newline(&mut maybe_start_of_newline)?; + *input = maybe_start_of_newline; + Ok(out) } -pub fn header<'a, E>(input: &mut &'a [u8]) -> ModalResult -where - E: ParserError<&'a [u8]>, -{ - preceded(b"# pack-refs with: ", until_newline) - .map(|traits| { - let mut peeled = Peeled::Unspecified; - let mut sorted = false; - for token in traits.as_bstr().split_str(b" ") { - if token == b"fully-peeled" { - peeled = Peeled::Fully; - } else if token == b"peeled" { - peeled = Peeled::Partial; - } else if token == b"sorted" { - sorted = true; - } - } - Header { peeled, sorted } - }) - .parse_next(input) +/// Parse a `packed-refs` header line. +/// +/// A valid header starts with `# pack-refs with: ` and ends with a line ending. +/// Known space-separated traits after the prefix populate the returned +/// [`Header`]: `peeled`, `fully-peeled`, and `sorted`. Unknown traits are +/// ignored. +/// +/// On success, `input` is advanced past the entire header line, including its +/// line ending. +pub fn header(input: &mut &[u8]) -> Result { + let Some(rest) = input.strip_prefix(b"# pack-refs with: ") else { + return Err(()); + }; + *input = rest; + let traits = until_line_end_without_separator(input)?; + let mut peeled = Peeled::Unspecified; + let mut sorted = false; + for token in traits.split_str(b" ") { + if token == b"fully-peeled" { + peeled = Peeled::Fully; + } else if token == b"peeled" { + peeled = Peeled::Partial; + } else if token == b"sorted" { + sorted = true; + } + } + Ok(Header { peeled, sorted }) } -pub fn reference<'a, E: ParserError<&'a [u8]> + FromExternalError<&'a [u8], crate::name::Error>>( - input: &mut &'a [u8], -) -> ModalResult, E> { - ( - terminated(hex_hash, b" "), - until_newline.try_map(TryInto::try_into), - opt(delimited(b"^", hex_hash, newline)), - ) - .map(|(target, name, object)| packed::Reference { name, target, object }) - .parse_next(input) +/// Parse one packed reference entry and its optional peeled object line. +/// +/// The reference line has the form ` ` followed by a +/// line ending. If the following line starts with `^`, it is parsed as the +/// peeled object id for the returned [`packed::Reference`]. +/// Object ids are parsed according to `hash_kind`. +/// +/// On success, `input` is advanced past the reference line and, if present, the +/// peeled object line. +pub fn reference<'a>(input: &mut &'a [u8], hash_kind: gix_hash::Kind) -> Result, ()> { + let target = parse::hex_hash(input, hash_kind)?; + let Some(rest) = input.strip_prefix(b" ") else { + return Err(()); + }; + *input = rest; + let name = until_line_end_without_separator(input)?.try_into().map_err(|_| ())?; + + let object = if let Some(rest) = input.strip_prefix(b"^") { + *input = rest; + let object = parse::hex_hash(input, hash_kind)?; + parse::newline(input)?; + Some(object) + } else { + None + }; + + Ok(packed::Reference { name, target, object }) } #[cfg(test)] diff --git a/gix-ref/src/store/packed/decode/tests.rs b/gix-ref/src/store/packed/decode/tests.rs index d3378469055..7b383c551f0 100644 --- a/gix-ref/src/store/packed/decode/tests.rs +++ b/gix-ref/src/store/packed/decode/tests.rs @@ -1,14 +1,14 @@ type Result = std::result::Result<(), Box>; mod reference { - use winnow::{error::TreeError, prelude::*}; - use super::Result; use crate::{ store_impl::{packed, packed::decode}, FullNameRef, }; + const HASH_KIND: gix_hash::Kind = gix_hash::Kind::Sha1; + /// Convert a hexadecimal hash into its corresponding `ObjectId` or _panic_. fn hex_to_id(hex: &str) -> gix_hash::ObjectId { gix_hash::ObjectId::from_hex(hex.as_bytes()).expect("40 bytes hex") @@ -16,22 +16,52 @@ mod reference { #[test] fn invalid() { - assert!(decode::reference::<()> - .parse_peek(b"# what looks like a comment") - .is_err()); + let mut input = b"# what looks like a comment".as_slice(); + assert!(decode::reference(&mut input, HASH_KIND).is_err()); + let mut input = b"^e9cdc958e7ce2290e2d7958cdb5aa9323ef35d37\n".as_slice(); + assert!(decode::reference(&mut input, HASH_KIND).is_err(), "lonely peel"); + let mut input = + b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa refs/heads/main\n".as_slice(); + assert!( + decode::reference(&mut input, gix_hash::Kind::Sha1).is_err(), + "sha1 refs reject sha256-sized ids" + ); + let mut input = b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa refs/heads/main\n".as_slice(); assert!( - decode::reference::<()> - .parse_peek(b"^e9cdc958e7ce2290e2d7958cdb5aa9323ef35d37\n") - .is_err(), - "lonely peel" + decode::reference(&mut input, gix_hash::Kind::Sha256).is_err(), + "sha256 refs reject sha1-sized ids" ); } + #[test] + fn uppercase_hex() -> Result { + let mut input: &[u8] = b"D53C4B0F91F1B29769C9430F2D1C0BCAB1170C75 refs/heads/uppercase +^E9CDC958E7CE2290E2D7958CDB5AA9323EF35D37\n"; + let parsed = decode::reference(&mut input, HASH_KIND).unwrap(); + + assert!(input.is_empty(), "exhausted"); + assert_eq!(parsed.name, FullNameRef::new_unchecked("refs/heads/uppercase".into())); + assert_eq!(parsed.target(), hex_to_id("d53c4b0f91f1b29769c9430f2d1c0bcab1170c75")); + assert_eq!(parsed.object(), hex_to_id("e9cdc958e7ce2290e2d7958cdb5aa9323ef35d37")); + Ok(()) + } + + #[test] + fn sha256_hex() -> Result { + let mut input: &[u8] = b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa refs/heads/main\n"; + let parsed = decode::reference(&mut input, gix_hash::Kind::Sha256).unwrap(); + + assert!(input.is_empty(), "exhausted"); + assert_eq!(parsed.name, FullNameRef::new_unchecked("refs/heads/main".into())); + assert_eq!(parsed.target().kind(), gix_hash::Kind::Sha256); + Ok(()) + } + #[test] fn two_refs_in_a_row() -> Result { - let input: &[u8] = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 refs/heads/alternates-after-packs-and-loose + let mut input: &[u8] = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 refs/heads/alternates-after-packs-and-loose ^e9cdc958e7ce2290e2d7958cdb5aa9323ef35d37\neaae9c1bc723209d793eb93f5587fa2604d5cd92 refs/heads/avoid-double-lookup\n"; - let (input, parsed) = decode::reference::>.parse_peek(input).unwrap(); + let parsed = decode::reference(&mut input, HASH_KIND).unwrap(); assert_eq!( parsed, @@ -44,7 +74,7 @@ mod reference { assert_eq!(parsed.target(), hex_to_id("d53c4b0f91f1b29769c9430f2d1c0bcab1170c75")); assert_eq!(parsed.object(), hex_to_id("e9cdc958e7ce2290e2d7958cdb5aa9323ef35d37")); - let (input, parsed) = decode::reference::>.parse_peek(input).unwrap(); + let parsed = decode::reference(&mut input, HASH_KIND).unwrap(); assert!(input.is_empty(), "exhausted"); assert_eq!( parsed.name, @@ -58,8 +88,6 @@ mod reference { mod header { use gix_object::bstr::ByteSlice; - use gix_testtools::to_bstr_err; - use winnow::prelude::*; use super::Result; use crate::store_impl::packed::{ @@ -69,28 +97,21 @@ mod header { #[test] fn invalid() { - assert!( - decode::header::<()>.parse_peek(b"# some user comment").is_err(), - "something the user put there" - ); - assert!( - decode::header::<()>.parse_peek(b"# pack-refs: ").is_err(), - "looks right but isn't" - ); - assert!( - decode::header::<()>.parse_peek(b" # pack-refs with: ").is_err(), - "does not start with #" - ); + let mut input = b"# some user comment".as_slice(); + assert!(decode::header(&mut input).is_err(), "something the user put there"); + assert_eq!(input[0], b'#', "it consumed nothing"); + let mut input = b"# pack-refs: ".as_slice(); + assert!(decode::header(&mut input).is_err(), "looks right but isn't"); + let mut input = b" # pack-refs with: ".as_slice(); + assert!(decode::header(&mut input).is_err(), "does not start with #"); } #[test] fn valid_fully_peeled_stored() -> Result { - let input: &[u8] = b"# pack-refs with: peeled fully-peeled sorted \nsomething else"; - let (rest, header) = decode::header::> - .parse_peek(input) - .map_err(to_bstr_err)?; + let mut input: &[u8] = b"# pack-refs with: peeled fully-peeled sorted \nsomething else"; + let header = decode::header(&mut input).expect("valid input"); - assert_eq!(rest.as_bstr(), "something else", "remainder starts after newline"); + assert_eq!(input.as_bstr(), "something else", "remainder starts after newline"); assert_eq!( header, Header { @@ -103,10 +124,10 @@ mod header { #[test] fn valid_peeled_unsorted() -> Result { - let input: &[u8] = b"# pack-refs with: peeled\n"; - let (rest, header) = decode::header::<()>.parse_peek(input).unwrap(); + let mut input: &[u8] = b"# pack-refs with: peeled\n"; + let header = decode::header(&mut input).unwrap(); - assert!(rest.is_empty()); + assert!(input.is_empty()); assert_eq!( header, Header { @@ -119,10 +140,10 @@ mod header { #[test] fn valid_empty() -> Result { - let input: &[u8] = b"# pack-refs with: \n"; - let (rest, header) = decode::header::<()>.parse_peek(input).unwrap(); + let mut input: &[u8] = b"# pack-refs with: \n"; + let header = decode::header(&mut input).unwrap(); - assert!(rest.is_empty()); + assert!(input.is_empty()); assert_eq!( header, Header { diff --git a/gix-ref/src/store/packed/find.rs b/gix-ref/src/store/packed/find.rs index 28c41a7eb8d..015a1aa1625 100644 --- a/gix-ref/src/store/packed/find.rs +++ b/gix-ref/src/store/packed/find.rs @@ -1,5 +1,4 @@ use gix_object::bstr::{BStr, BString, ByteSlice}; -use winnow::prelude::*; use crate::{store_impl::packed, FullNameRef, PartialNameRef}; @@ -42,9 +41,7 @@ impl packed::Buffer { Ok(line_start) => { let mut input = &self.as_ref()[line_start..]; Ok(Some( - packed::decode::reference::<()> - .parse_next(&mut input) - .map_err(|_| Error::Parse)?, + packed::decode::reference(&mut input, self.hash_kind).map_err(|_| Error::Parse)?, )) } Err((parse_failure, _)) => { @@ -93,8 +90,7 @@ impl packed::Buffer { a.binary_search_by_key(&full_name.as_ref(), |b: &u8| { let ofs = std::ptr::from_ref::(b) as usize - a.as_ptr() as usize; let mut line = &a[search_start_of_record(ofs)..]; - packed::decode::reference::<()> - .parse_next(&mut line) + packed::decode::reference(&mut line, self.hash_kind) .map(|r| r.name.as_bstr().as_bytes()) .inspect_err(|_err| { encountered_parse_failure = true; diff --git a/gix-ref/src/store/packed/iter.rs b/gix-ref/src/store/packed/iter.rs index 19c8b995e59..d72c24cefc5 100644 --- a/gix-ref/src/store/packed/iter.rs +++ b/gix-ref/src/store/packed/iter.rs @@ -1,5 +1,4 @@ use gix_object::bstr::{BString, ByteSlice}; -use winnow::{combinator::preceded, prelude::*, token::rest}; use crate::store_impl::{packed, packed::decode}; @@ -11,13 +10,13 @@ impl packed::Buffer { /// /// There is no namespace support in packed iterators. It can be emulated using `iter_prefixed(…)`. pub fn iter(&self) -> Result, packed::iter::Error> { - packed::Iter::new(self.as_ref()) + packed::Iter::new(self.as_ref(), self.hash_kind) } /// Return an iterator yielding only references matching the given prefix, ordered by reference name. pub fn iter_prefixed(&self, prefix: BString) -> Result, packed::iter::Error> { let first_record_with_prefix = self.binary_search_by(prefix.as_bstr()).unwrap_or_else(|(_, pos)| pos); - packed::Iter::new_with_prefix(&self.as_ref()[first_record_with_prefix..], Some(prefix)) + packed::Iter::new_with_prefix(&self.as_ref()[first_record_with_prefix..], self.hash_kind, Some(prefix)) } } @@ -29,8 +28,8 @@ impl<'a> Iterator for packed::Iter<'a> { return None; } - let start = self.cursor.checkpoint(); - match decode::reference::<()>.parse_next(&mut self.cursor) { + let start = self.cursor; + match decode::reference(&mut self.cursor, self.hash_kind) { Ok(reference) => { self.current_line += 1; if let Some(ref prefix) = self.prefix { @@ -42,7 +41,7 @@ impl<'a> Iterator for packed::Iter<'a> { Some(Ok(reference)) } Err(_) => { - self.cursor.reset(&start); + self.cursor = start; let (failed_line, next_cursor) = self .cursor .find_byte(b'\n') @@ -64,39 +63,43 @@ impl<'a> Iterator for packed::Iter<'a> { } impl<'a> packed::Iter<'a> { - /// Return a new iterator after successfully parsing the possibly existing first line of the given `packed` refs buffer. - pub fn new(packed: &'a [u8]) -> Result { - Self::new_with_prefix(packed, None) + /// Return a new iterator after successfully parsing the possibly existing first line of the given `packed` refs buffer, + /// parsing object ids as `hash_kind`. + pub fn new(packed: &'a [u8], hash_kind: gix_hash::Kind) -> Result { + Self::new_with_prefix(packed, hash_kind, None) } - /// Returns an iterators whose references will only match the given prefix. + /// Returns an iterator whose references will only match `prefix`. /// - /// It assumes that the underlying `packed` buffer is indeed sorted + /// It assumes that the underlying `packed` buffer is indeed sorted and parses object ids as `hash_kind`. pub(in crate::store_impl::packed) fn new_with_prefix( packed: &'a [u8], + hash_kind: gix_hash::Kind, prefix: Option, ) -> Result { if packed.is_empty() { Ok(packed::Iter { cursor: packed, + hash_kind, prefix, current_line: 1, }) } else if packed[0] == b'#' { let mut input = packed; - let refs = preceded(decode::header::<()>, rest) - .parse_next(&mut input) - .map_err(|_| Error::Header { - invalid_first_line: packed.lines().next().unwrap_or(packed).into(), - })?; + decode::header(&mut input).map_err(|_| Error::Header { + invalid_first_line: packed.lines().next().unwrap_or(packed).into(), + })?; + let refs = input; Ok(packed::Iter { cursor: refs, + hash_kind, prefix, current_line: 2, }) } else { Ok(packed::Iter { cursor: packed, + hash_kind, prefix, current_line: 1, }) diff --git a/gix-ref/src/store/packed/mod.rs b/gix-ref/src/store/packed/mod.rs index f6f71ac7982..228fd093fa9 100644 --- a/gix-ref/src/store/packed/mod.rs +++ b/gix-ref/src/store/packed/mod.rs @@ -20,6 +20,8 @@ enum Backing { #[derive(Debug)] pub struct Buffer { data: Backing, + /// The hash kind to expect when parsing packed references. + hash_kind: gix_hash::Kind, /// The offset to the first record, how many bytes to skip past the header offset: usize, /// The path from which we were loaded @@ -75,6 +77,8 @@ impl Reference<'_> { pub struct Iter<'a> { /// The position at which to parse the next reference cursor: &'a [u8], + /// The hash kind to expect when parsing packed references. + hash_kind: gix_hash::Kind, /// The next line, starting at 1 current_line: usize, /// If set, references returned will match the prefix, the first failed match will stop all iteration. diff --git a/gix-ref/src/store/packed/transaction.rs b/gix-ref/src/store/packed/transaction.rs index e9990ce90f1..4c220780538 100644 --- a/gix-ref/src/store/packed/transaction.rs +++ b/gix-ref/src/store/packed/transaction.rs @@ -107,14 +107,20 @@ impl packed::Transaction { { let mut next_id = new; edit.peeled = loop { - let kind = objects.try_find(&next_id, &mut buf)?.map(|d| d.kind); - match kind { - Some(gix_object::Kind::Tag) => { - next_id = gix_object::TagRefIter::from_bytes(&buf).target_id().map_err(|_| { - prepare::Error::Resolve( - format!("Couldn't get target object id from tag {next_id}").into(), - ) - })?; + let data = objects.try_find(&next_id, &mut buf)?; + match data { + Some(gix_object::Data { + kind: gix_object::Kind::Tag, + data, + hash_kind, + }) => { + next_id = gix_object::TagRefIter::from_bytes(data, hash_kind) + .target_id() + .map_err(|_| { + prepare::Error::Resolve( + format!("Couldn't get target object id from tag {next_id}").into(), + ) + })?; } Some(_) => { break if next_id == new { None } else { Some(next_id) }; diff --git a/gix-ref/tests/Cargo.toml b/gix-ref/tests/Cargo.toml index 85f0beb520a..863af5572f6 100644 --- a/gix-ref/tests/Cargo.toml +++ b/gix-ref/tests/Cargo.toml @@ -28,7 +28,7 @@ gix-discover = { path = "../../gix-discover" } gix-odb = { path = "../../gix-odb" } gix-actor = { path = "../../gix-actor" } gix-date = { path = "../../gix-date" } -gix-hash = { path = "../../gix-hash" } +gix-hash = { path = "../../gix-hash", features = ["sha1", "sha256"] } gix-validate = { path = "../../gix-validate" } gix-lock = { path = "../../gix-lock" } gix-object = { path = "../../gix-object" } diff --git a/gix-ref/tests/fixtures/generated-archives/.gitignore b/gix-ref/tests/fixtures/generated-archives/.gitignore index 1350be16f54..baa08f41ebe 100644 --- a/gix-ref/tests/fixtures/generated-archives/.gitignore +++ b/gix-ref/tests/fixtures/generated-archives/.gitignore @@ -1,3 +1,5 @@ make_worktree_repo.tar +make_worktree_repo_sha256.tar make_worktree_repo_packed.tar +make_worktree_repo_packed_sha256.tar make_multi_hop_ref*.tar diff --git a/gix-ref/tests/fixtures/generated-archives/make_namespaced_packed_ref_repository_sha256.tar b/gix-ref/tests/fixtures/generated-archives/make_namespaced_packed_ref_repository_sha256.tar new file mode 100644 index 00000000000..6e64745b56c Binary files /dev/null and b/gix-ref/tests/fixtures/generated-archives/make_namespaced_packed_ref_repository_sha256.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_packed_ref_repository_for_overlay_sha256.tar b/gix-ref/tests/fixtures/generated-archives/make_packed_ref_repository_for_overlay_sha256.tar new file mode 100644 index 00000000000..f4a0e891d81 Binary files /dev/null and b/gix-ref/tests/fixtures/generated-archives/make_packed_ref_repository_for_overlay_sha256.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_packed_ref_repository_sha256.tar b/gix-ref/tests/fixtures/generated-archives/make_packed_ref_repository_sha256.tar new file mode 100644 index 00000000000..ee37a55b17a Binary files /dev/null and b/gix-ref/tests/fixtures/generated-archives/make_packed_ref_repository_sha256.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_packed_refs_for_lookup_rules_sha256.tar b/gix-ref/tests/fixtures/generated-archives/make_packed_refs_for_lookup_rules_sha256.tar new file mode 100644 index 00000000000..3b4e923097e Binary files /dev/null and b/gix-ref/tests/fixtures/generated-archives/make_packed_refs_for_lookup_rules_sha256.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_pristine.tar b/gix-ref/tests/fixtures/generated-archives/make_pristine.tar index 40e705b753d..d59c7e8e42f 100644 Binary files a/gix-ref/tests/fixtures/generated-archives/make_pristine.tar and b/gix-ref/tests/fixtures/generated-archives/make_pristine.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_pristine_sha256.tar b/gix-ref/tests/fixtures/generated-archives/make_pristine_sha256.tar new file mode 100644 index 00000000000..d24bc426a37 Binary files /dev/null and b/gix-ref/tests/fixtures/generated-archives/make_pristine_sha256.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_pseudo_ref_repository.tar b/gix-ref/tests/fixtures/generated-archives/make_pseudo_ref_repository.tar index 68efa91bb9c..a03442143f1 100644 Binary files a/gix-ref/tests/fixtures/generated-archives/make_pseudo_ref_repository.tar and b/gix-ref/tests/fixtures/generated-archives/make_pseudo_ref_repository.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_pseudo_ref_repository_sha256.tar b/gix-ref/tests/fixtures/generated-archives/make_pseudo_ref_repository_sha256.tar new file mode 100644 index 00000000000..c133fcf4728 Binary files /dev/null and b/gix-ref/tests/fixtures/generated-archives/make_pseudo_ref_repository_sha256.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_ref_repository.tar b/gix-ref/tests/fixtures/generated-archives/make_ref_repository.tar index 76d6c84863f..2cb9e18c739 100644 Binary files a/gix-ref/tests/fixtures/generated-archives/make_ref_repository.tar and b/gix-ref/tests/fixtures/generated-archives/make_ref_repository.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_ref_repository_sha256.tar b/gix-ref/tests/fixtures/generated-archives/make_ref_repository_sha256.tar new file mode 100644 index 00000000000..ff7c75e933b Binary files /dev/null and b/gix-ref/tests/fixtures/generated-archives/make_ref_repository_sha256.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_repo_for_1850_repro.tar b/gix-ref/tests/fixtures/generated-archives/make_repo_for_1850_repro.tar index 9e5178f7397..f26b1e7c1e1 100644 Binary files a/gix-ref/tests/fixtures/generated-archives/make_repo_for_1850_repro.tar and b/gix-ref/tests/fixtures/generated-archives/make_repo_for_1850_repro.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_repo_for_1850_repro_sha256.tar b/gix-ref/tests/fixtures/generated-archives/make_repo_for_1850_repro_sha256.tar new file mode 100644 index 00000000000..541f3c86f03 Binary files /dev/null and b/gix-ref/tests/fixtures/generated-archives/make_repo_for_1850_repro_sha256.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_repo_for_1928_repro.tar b/gix-ref/tests/fixtures/generated-archives/make_repo_for_1928_repro.tar index 5b7540170fa..9af9034dbee 100644 Binary files a/gix-ref/tests/fixtures/generated-archives/make_repo_for_1928_repro.tar and b/gix-ref/tests/fixtures/generated-archives/make_repo_for_1928_repro.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_repo_for_1928_repro_sha256.tar b/gix-ref/tests/fixtures/generated-archives/make_repo_for_1928_repro_sha256.tar new file mode 100644 index 00000000000..083f4631f06 Binary files /dev/null and b/gix-ref/tests/fixtures/generated-archives/make_repo_for_1928_repro_sha256.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_repo_for_reflog_sha256.tar b/gix-ref/tests/fixtures/generated-archives/make_repo_for_reflog_sha256.tar new file mode 100644 index 00000000000..a6e0628ab83 Binary files /dev/null and b/gix-ref/tests/fixtures/generated-archives/make_repo_for_reflog_sha256.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_repository_with_lots_of_packed_refs_sha256.tar b/gix-ref/tests/fixtures/generated-archives/make_repository_with_lots_of_packed_refs_sha256.tar new file mode 100644 index 00000000000..53d1d9ef25b Binary files /dev/null and b/gix-ref/tests/fixtures/generated-archives/make_repository_with_lots_of_packed_refs_sha256.tar differ diff --git a/gix-ref/tests/fixtures/make_pristine.sh b/gix-ref/tests/fixtures/make_pristine.sh index ed42133f519..744c8c0face 100755 --- a/gix-ref/tests/fixtures/make_pristine.sh +++ b/gix-ref/tests/fixtures/make_pristine.sh @@ -10,7 +10,11 @@ git init changed-headref git init detached (cd detached - echo "abcdefabcdefabcdefabcdefabcdefabcdefabcd" >.git/HEAD + if test "$(git rev-parse --show-object-format)" = "sha256"; then + printf "%0.sa" $(seq 1 64) >.git/HEAD + else + echo "abcdefabcdefabcdefabcdefabcdefabcdefabcd" >.git/HEAD + fi ) git init invalid-loose-ref diff --git a/gix-ref/tests/fixtures/make_pseudo_ref_repository.sh b/gix-ref/tests/fixtures/make_pseudo_ref_repository.sh index ef4b3476dae..e981a8985f3 100755 --- a/gix-ref/tests/fixtures/make_pseudo_ref_repository.sh +++ b/gix-ref/tests/fixtures/make_pseudo_ref_repository.sh @@ -8,7 +8,8 @@ git rev-parse HEAD > .git/JIRI_HEAD touch .git/SOME_ALL_CAPS_FILE touch .git/refs/SHOULD_BE_EXCLUDED_HEAD +head_id=$(git rev-parse HEAD) cat <> .git/FETCH_HEAD -9064ea31fae4dc59a56bdd3a06c0ddc990ee689e branch 'main' of https://github.com/Byron/gitoxide -1b8d9e6a408e480ae1912e919c37a26e5c46639d not-for-merge branch 'faster-discovery' of https://github.com/Byron/gitoxide -EOF \ No newline at end of file +$head_id branch 'main' of https://github.com/Byron/gitoxide +$head_id not-for-merge branch 'faster-discovery' of https://github.com/Byron/gitoxide +EOF diff --git a/gix-ref/tests/fixtures/make_ref_repository.sh b/gix-ref/tests/fixtures/make_ref_repository.sh index d89948193fa..c473b7ad6c3 100755 --- a/gix-ref/tests/fixtures/make_ref_repository.sh +++ b/gix-ref/tests/fixtures/make_ref_repository.sh @@ -28,17 +28,18 @@ git rev-parse HEAD > .git/refs/remotes/origin/multi-link-target3 echo "ref: refs/loop-b" > .git/refs/loop-a echo "ref: refs/loop-a" > .git/refs/loop-b +head_id=$(git rev-parse HEAD) cat <> .git/FETCH_HEAD -9064ea31fae4dc59a56bdd3a06c0ddc990ee689e branch 'main' of https://github.com/Byron/gitoxide -1b8d9e6a408e480ae1912e919c37a26e5c46639d not-for-merge branch 'faster-discovery' of https://github.com/Byron/gitoxide -43f695a9607f1f85f859f2ef944b785b5b6dd238 not-for-merge branch 'fix-823' of https://github.com/Byron/gitoxide -96267708958ead2646aae8766a50fa060739003c not-for-merge branch 'fix-bare-with-index' of https://github.com/Byron/gitoxide -1397e19375bb98522f951b8a452b08c1b35ffbac not-for-merge branch 'gix-archive' of https://github.com/Byron/gitoxide -db71ec8b7c7f2730c47dde3bb662ab56ae89ae7d not-for-merge branch 'index-from-files' of https://github.com/Byron/gitoxide -9f0c71917e57653d2e7121eae65d9385a188a8df not-for-merge branch 'moonwalk' of https://github.com/Byron/gitoxide -44d2b67de5639d4ea3d08ab030ecfe4bdfc8cbfb not-for-merge branch 'release-gix' of https://github.com/Byron/gitoxide -37c3d073b15dafcb52b2040e4b92a413c69a726d not-for-merge branch 'smart-release-without-git2' of https://github.com/Byron/gitoxide -af3608ad397784795c3758a1ac99ec6a367de9be not-for-merge branch 'walk-with-commitgraph' of https://github.com/Byron/gitoxide +$head_id branch 'main' of https://github.com/Byron/gitoxide +$head_id not-for-merge branch 'faster-discovery' of https://github.com/Byron/gitoxide +$head_id not-for-merge branch 'fix-823' of https://github.com/Byron/gitoxide +$head_id not-for-merge branch 'fix-bare-with-index' of https://github.com/Byron/gitoxide +$head_id not-for-merge branch 'gix-archive' of https://github.com/Byron/gitoxide +$head_id not-for-merge branch 'index-from-files' of https://github.com/Byron/gitoxide +$head_id not-for-merge branch 'moonwalk' of https://github.com/Byron/gitoxide +$head_id not-for-merge branch 'release-gix' of https://github.com/Byron/gitoxide +$head_id not-for-merge branch 'smart-release-without-git2' of https://github.com/Byron/gitoxide +$head_id not-for-merge branch 'walk-with-commitgraph' of https://github.com/Byron/gitoxide EOF git tag t1 diff --git a/gix-ref/tests/fixtures/make_repo_for_1850_repro.sh b/gix-ref/tests/fixtures/make_repo_for_1850_repro.sh index fee6ec02e81..0778f153eb1 100755 --- a/gix-ref/tests/fixtures/make_repo_for_1850_repro.sh +++ b/gix-ref/tests/fixtures/make_repo_for_1850_repro.sh @@ -2,16 +2,28 @@ set -eu -o pipefail git init -q +hex_len=40 +hash_kind=$(git rev-parse --show-object-format) +if test "$hash_kind" = "sha256"; then + hex_len=64 +fi +oid () { + if test "$hash_kind" = "sha1"; then + printf "%s" "$2" + else + printf "%0.s$1" $(seq 1 "$hex_len") + fi +} cat <.git/packed-refs # pack-refs with: peeled fully-peeled sorted -17dad46c0ce3be4d4b6d45def031437ab2e40666 refs/heads/ig-branch-remote -83a70366fcc1255d35a00102138293bac673b331 refs/heads/ig-inttest -21b57230833a1733f6685e14eabe936a09689a1b refs/heads/ig-pr4021 -d773228d0ee0012fcca53fffe581b0fce0b1dc56 refs/heads/ig/aliases -ba37abe04f91fec76a6b9a817d40ee2daec47207 refs/heads/ig/cifail +$(oid 1 17dad46c0ce3be4d4b6d45def031437ab2e40666) refs/heads/ig-branch-remote +$(oid 2 83a70366fcc1255d35a00102138293bac673b331) refs/heads/ig-inttest +$(oid 3 3333333333333333333333333333333333333333) refs/heads/ig-pr4021 +$(oid 4 d773228d0ee0012fcca53fffe581b0fce0b1dc56) refs/heads/ig/aliases +$(oid 5 ba37abe04f91fec76a6b9a817d40ee2daec47207) refs/heads/ig/cifail EOF mkdir -p .git/refs/heads/ig/pr -echo d22f46f3d7d2504d56c573b5fe54919bd16be48a >.git/refs/heads/ig/push-name -echo 4dec145966c546402c5a9e28b932e7c8c939e01e >.git/refs/heads/ig-pr4021 +oid 6 d22f46f3d7d2504d56c573b5fe54919bd16be48a >.git/refs/heads/ig/push-name +oid 7 4dec145966c546402c5a9e28b932e7c8c939e01e >.git/refs/heads/ig-pr4021 diff --git a/gix-ref/tests/fixtures/make_repo_for_1928_repro.sh b/gix-ref/tests/fixtures/make_repo_for_1928_repro.sh index 445f4107f99..896ded235bf 100755 --- a/gix-ref/tests/fixtures/make_repo_for_1928_repro.sh +++ b/gix-ref/tests/fixtures/make_repo_for_1928_repro.sh @@ -2,16 +2,23 @@ set -eu -o pipefail git init -q +hex_len=40 +if test "$(git rev-parse --show-object-format)" = "sha256"; then + hex_len=64 +fi +oid () { + printf "%0.s$1" $(seq 1 "$hex_len") +} mkdir -p .git/refs/heads/a cat <.git/packed-refs # pack-refs with: peeled fully-peeled sorted -1111111111111111111111111111111111111111 refs/heads/a- -2222222222222222222222222222222222222222 refs/heads/a/b -3333333333333333333333333333333333333333 refs/heads/a0 +$(oid 1) refs/heads/a- +$(oid 2) refs/heads/a/b +$(oid 3) refs/heads/a0 EOF mkdir -p .git/refs/heads/a -echo aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >.git/refs/heads/a- -echo bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb >.git/refs/heads/a/b -echo cccccccccccccccccccccccccccccccccccccccc >.git/refs/heads/a0 +oid a >.git/refs/heads/a- +oid b >.git/refs/heads/a/b +oid c >.git/refs/heads/a0 diff --git a/gix-ref/tests/fixtures/make_worktree_repo.sh b/gix-ref/tests/fixtures/make_worktree_repo.sh index 960869ebb40..24ee285c6f8 100755 --- a/gix-ref/tests/fixtures/make_worktree_repo.sh +++ b/gix-ref/tests/fixtures/make_worktree_repo.sh @@ -19,6 +19,8 @@ git update-ref refs/stacks/common :/c1 git worktree add ../w-detached HEAD~1 +printf "gitdir: ../repo/.git/worktrees/w-detached\n" >../w-detached/.git +printf "../../../../w-detached/.git\n" >.git/worktrees/w-detached/gitdir ( cd ../w-detached git bisect start @@ -28,6 +30,8 @@ git worktree add ../w-detached HEAD~1 ) git worktree add ../w1 +printf "gitdir: ../repo/.git/worktrees/w1\n" >../w1/.git +printf "../../../../w1/.git\n" >.git/worktrees/w1/gitdir ( cd ../w1 git reset --hard HEAD~2 diff --git a/gix-ref/tests/refs/file/log.rs b/gix-ref/tests/refs/file/log.rs index 697d18e2210..2ce20dbf535 100644 --- a/gix-ref/tests/refs/file/log.rs +++ b/gix-ref/tests/refs/file/log.rs @@ -109,7 +109,7 @@ mod iter { mod with_buffer_big_enough_for_largest_line { use gix_ref::log::Line; - use crate::{file::log::iter::reflog, hex_to_id}; + use crate::{file::log::iter::reflog, sha1_hex_to_id}; #[test] fn single_line() -> crate::Result { @@ -129,8 +129,8 @@ mod iter { signature: _, message, } = iter.next().expect("a single line")?; - assert_eq!(previous_oid, hex_to_id("0000000000000000000000000000000000000000")); - assert_eq!(new_oid, hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03")); + assert_eq!(previous_oid, sha1_hex_to_id("0000000000000000000000000000000000000000")); + assert_eq!(new_oid, sha1_hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03")); assert_eq!(message, "commit (initial): c1"); assert!(iter.next().is_none(), "iterator depleted"); } @@ -157,8 +157,8 @@ mod iter { signature: _, message, } = iter.next().expect("a single line")?; - assert_eq!(previous_oid, hex_to_id("0000000000000000000000000000000000000000")); - assert_eq!(new_oid, hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03")); + assert_eq!(previous_oid, sha1_hex_to_id("0000000000000000000000000000000000000000")); + assert_eq!(new_oid, sha1_hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03")); assert_eq!(message, "commit (initial): c1"); let Line { previous_oid, @@ -167,8 +167,8 @@ mod iter { message, } = iter.next().expect("a single line")?; assert_eq!(message, "commit (initial): c2"); - assert_eq!(previous_oid, hex_to_id("1000000000000000000000000000000000000000")); - assert_eq!(new_oid, hex_to_id("234385f6d781b7e97062102c6a483440bfda2a03")); + assert_eq!(previous_oid, sha1_hex_to_id("1000000000000000000000000000000000000000")); + assert_eq!(new_oid, sha1_hex_to_id("234385f6d781b7e97062102c6a483440bfda2a03")); assert!(iter.next().is_none(), "iterator depleted"); } } @@ -197,7 +197,7 @@ mod iter { mod forward { use gix_object::bstr::B; - use crate::file::log::iter::reflog; + use crate::{file::log::iter::reflog, hex_to_id}; #[test] fn all_success() -> crate::Result { @@ -207,8 +207,8 @@ mod iter { let mut iter = gix_ref::file::log::iter::forward(&log); let line = iter.next().unwrap()?; - assert_eq!(line.previous_oid(), gix_hash::Kind::Sha1.null()); - assert_eq!(line.new_oid, B("134385f6d781b7e97062102c6a483440bfda2a03")); + assert_eq!(line.previous_oid(), crate::fixture_hash_kind().null()); + assert_eq!(line.new_oid(), hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03")); assert_eq!(line.message, B("commit (initial): c1")); assert!(iter.all(|l| l.is_ok()), "all lines parse fine"); Ok(()) diff --git a/gix-ref/tests/refs/file/mod.rs b/gix-ref/tests/refs/file/mod.rs index f9087dd3a6b..4dc5a95b275 100644 --- a/gix-ref/tests/refs/file/mod.rs +++ b/gix-ref/tests/refs/file/mod.rs @@ -19,18 +19,36 @@ pub fn store_at(name: &str) -> crate::Result { pub fn named_store_at(script_name: &str, name: &str) -> crate::Result { let path = gix_testtools::scripted_fixture_read_only_standalone(script_name)?; - Ok(Store::at(path.join(name).join(".git"), Default::default())) + Ok(Store::at(path.join(name).join(".git"), store_options())) } pub fn store_at_with_args(name: &str, args: impl IntoIterator>) -> crate::Result { let path = gix_testtools::scripted_fixture_read_only_with_args_standalone(name, args)?; - Ok(Store::at(path.join(".git"), Default::default())) + Ok(Store::at(path.join(".git"), store_options())) } fn store_writable(name: &str) -> crate::Result<(gix_testtools::tempfile::TempDir, Store)> { let dir = gix_testtools::scripted_fixture_writable_standalone(name)?; let git_dir = dir.path().join(".git"); - Ok((dir, Store::at(git_dir, Default::default()))) + Ok((dir, Store::at(git_dir, store_options()))) +} + +pub fn store_options() -> gix_ref::store::init::Options { + gix_ref::store::init::Options { + object_hash: crate::fixture_hash_kind(), + ..Default::default() + } +} + +pub fn odb_at(objects_dir: impl Into) -> std::io::Result { + gix_odb::at_opts( + objects_dir, + Vec::new(), + gix_odb::store::init::Options { + object_hash: crate::fixture_hash_kind(), + ..Default::default() + }, + ) } struct EmptyCommit; diff --git a/gix-ref/tests/refs/file/reference.rs b/gix-ref/tests/refs/file/reference.rs index 33a83bd57ec..b41e4e623fc 100644 --- a/gix-ref/tests/refs/file/reference.rs +++ b/gix-ref/tests/refs/file/reference.rs @@ -145,7 +145,7 @@ mod peel { "points to a tag object without actual object lookup" ); - let odb = gix_odb::at(store.git_dir().join("objects"))?; + let odb = crate::file::odb_at(store.git_dir().join("objects"))?; let mut r: Reference = store.find_loose("dt1")?.into(); assert_eq!(r.peel_to_id(&store, &odb)?, commit, "points to the commit with lookup"); @@ -156,7 +156,7 @@ mod peel { fn to_id_long_jump() -> crate::Result { for packed in [None, Some("packed")] { let store = file::store_at_with_args("make_multi_hop_ref.sh", packed)?; - let odb = gix_odb::at(store.git_dir().join("objects"))?; + let odb = crate::file::odb_at(store.git_dir().join("objects"))?; let mut r: Reference = store.find("multi-hop")?; r.peel_to_id(&store, &odb)?; @@ -215,8 +215,12 @@ mod parse { #[test] fn $name() { use std::convert::TryInto; - let err = - Reference::try_from_path("HEAD".try_into().expect("this is a valid name"), $input).unwrap_err(); + let err = Reference::try_from_path( + "HEAD".try_into().expect("this is a valid name"), + $input, + gix_hash::Kind::Sha1, + ) + .unwrap_err(); assert_eq!(err.to_string(), $err); } }; @@ -224,20 +228,34 @@ mod parse { mktest!(hex_id, b"foobar", "\"foobar\" could not be parsed"); mktest!(ref_tag, b"reff: hello", "\"reff: hello\" could not be parsed"); + mktest!( + sha256_sized_id_for_sha1, + b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n", + "\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\n\" could not be parsed" + ); + mktest!( + trailing_garbage_after_id, + b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaextra", + "\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaextra\" could not be parsed" + ); } mod valid { use gix_object::bstr::ByteSlice; use gix_ref::file::loose::Reference; - use crate::hex_to_id; + use crate::sha1_hex_to_id; macro_rules! mktest { ($name:ident, $input:literal, $kind:path, $id:expr, $ref:expr) => { #[test] fn $name() { use std::convert::TryInto; - let reference = - Reference::try_from_path("HEAD".try_into().expect("valid static name"), $input).unwrap(); + let reference = Reference::try_from_path( + "HEAD".try_into().expect("valid static name"), + $input, + gix_hash::Kind::Sha1, + ) + .unwrap(); assert_eq!(reference.kind(), $kind); assert_eq!(reference.target.to_ref().try_id(), $id); assert_eq!( @@ -252,7 +270,15 @@ mod parse { peeled, b"c5241b835b93af497cda80ce0dceb8f49800df1c\n", gix_ref::Kind::Object, - Some(hex_to_id("c5241b835b93af497cda80ce0dceb8f49800df1c").as_ref()), + Some(sha1_hex_to_id("c5241b835b93af497cda80ce0dceb8f49800df1c").as_ref()), + None + ); + + mktest!( + peeled_uppercase, + b"C5241B835B93AF497CDA80CE0DCEB8F49800DF1C\n", + gix_ref::Kind::Object, + Some(sha1_hex_to_id("c5241b835b93af497cda80ce0dceb8f49800df1c").as_ref()), None ); @@ -271,5 +297,22 @@ mod parse { None, Some(b"refs/foobar".as_bstr()) ); + + #[test] + fn peeled_sha256() { + use std::convert::TryInto; + + let input = &b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"[..]; + let reference = Reference::try_from_path( + "HEAD".try_into().expect("valid static name"), + input, + gix_hash::Kind::Sha256, + ) + .unwrap(); + assert_eq!(reference.kind(), gix_ref::Kind::Object); + let target_id = reference.target.to_ref().try_id().expect("non-symbolic").to_owned(); + assert_eq!(target_id.kind(), gix_hash::Kind::Sha256); + assert_eq!(target_id, gix_hash::ObjectId::from_hex(input).unwrap()); + } } } diff --git a/gix-ref/tests/refs/file/store/find.rs b/gix-ref/tests/refs/file/store/find.rs index 2ecd52cddd9..51ea5472ab4 100644 --- a/gix-ref/tests/refs/file/store/find.rs +++ b/gix-ref/tests/refs/file/store/find.rs @@ -145,7 +145,7 @@ mod loose { let store = store()?; assert_eq!( store.find_loose("FETCH_HEAD")?.target.id(), - hex_to_id("9064ea31fae4dc59a56bdd3a06c0ddc990ee689e"), + hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"), "despite being special, we are able to read the first commit out of a typical FETCH_HEAD" ); Ok(()) diff --git a/gix-ref/tests/refs/file/store/iter.rs b/gix-ref/tests/refs/file/store/iter.rs index 91639672e6e..7067fb48699 100644 --- a/gix-ref/tests/refs/file/store/iter.rs +++ b/gix-ref/tests/refs/file/store/iter.rs @@ -448,46 +448,89 @@ fn overlay_iter_reproduce_1850() -> crate::Result { .all()? .map(|r| r.map(|r| (r.name.as_bstr().to_owned(), r.target))) .collect::, _>>()?; - insta::assert_debug_snapshot!(ref_names, @r#" - [ - ( - "refs/heads/ig-branch-remote", - Object( - Sha1(17dad46c0ce3be4d4b6d45def031437ab2e40666), + if crate::fixture_hash_kind() != gix_hash::Kind::Sha1 { + insta::assert_debug_snapshot!(ref_names, @r#" + [ + ( + "refs/heads/ig-branch-remote", + Object( + Sha256(1111111111111111111111111111111111111111111111111111111111111111), + ), ), - ), - ( - "refs/heads/ig-inttest", - Object( - Sha1(83a70366fcc1255d35a00102138293bac673b331), + ( + "refs/heads/ig-inttest", + Object( + Sha256(2222222222222222222222222222222222222222222222222222222222222222), + ), ), - ), - ( - "refs/heads/ig-pr4021", - Object( - Sha1(4dec145966c546402c5a9e28b932e7c8c939e01e), + ( + "refs/heads/ig-pr4021", + Object( + Sha256(7777777777777777777777777777777777777777777777777777777777777777), + ), ), - ), - ( - "refs/heads/ig/aliases", - Object( - Sha1(d773228d0ee0012fcca53fffe581b0fce0b1dc56), + ( + "refs/heads/ig/aliases", + Object( + Sha256(4444444444444444444444444444444444444444444444444444444444444444), + ), ), - ), - ( - "refs/heads/ig/cifail", - Object( - Sha1(ba37abe04f91fec76a6b9a817d40ee2daec47207), + ( + "refs/heads/ig/cifail", + Object( + Sha256(5555555555555555555555555555555555555555555555555555555555555555), + ), ), - ), - ( - "refs/heads/ig/push-name", - Object( - Sha1(d22f46f3d7d2504d56c573b5fe54919bd16be48a), + ( + "refs/heads/ig/push-name", + Object( + Sha256(6666666666666666666666666666666666666666666666666666666666666666), + ), ), - ), - ] - "#); + ] + "#); + } else { + insta::assert_debug_snapshot!(ref_names, @r#" + [ + ( + "refs/heads/ig-branch-remote", + Object( + Sha1(17dad46c0ce3be4d4b6d45def031437ab2e40666), + ), + ), + ( + "refs/heads/ig-inttest", + Object( + Sha1(83a70366fcc1255d35a00102138293bac673b331), + ), + ), + ( + "refs/heads/ig-pr4021", + Object( + Sha1(4dec145966c546402c5a9e28b932e7c8c939e01e), + ), + ), + ( + "refs/heads/ig/aliases", + Object( + Sha1(d773228d0ee0012fcca53fffe581b0fce0b1dc56), + ), + ), + ( + "refs/heads/ig/cifail", + Object( + Sha1(ba37abe04f91fec76a6b9a817d40ee2daec47207), + ), + ), + ( + "refs/heads/ig/push-name", + Object( + Sha1(d22f46f3d7d2504d56c573b5fe54919bd16be48a), + ), + ), + ] + "#); + } Ok(()) } @@ -499,6 +542,13 @@ fn overlay_iter_reproduce_1928() -> crate::Result { .all()? .map(|r| r.map(|r| (r.name.as_bstr().to_owned(), r.target))) .collect::, _>>()?; + if crate::fixture_hash_kind() != gix_hash::Kind::Sha1 { + assert_eq!( + ref_names.iter().map(|(name, _)| name.to_string()).collect::>(), + vec!["refs/heads/a-", "refs/heads/a/b", "refs/heads/a0"] + ); + return Ok(()); + } insta::assert_debug_snapshot!(ref_names, @r#" [ ( diff --git a/gix-ref/tests/refs/file/store/mod.rs b/gix-ref/tests/refs/file/store/mod.rs index 7262038e7ae..549d1d2ff22 100644 --- a/gix-ref/tests/refs/file/store/mod.rs +++ b/gix-ref/tests/refs/file/store/mod.rs @@ -29,6 +29,7 @@ fn precompose_unicode_journey() -> crate::Result { root, gix_ref::store::init::Options { write_reflog: WriteReflog::Always, + object_hash: crate::fixture_hash_kind(), precompose_unicode: false, ..Default::default() }, @@ -57,6 +58,7 @@ fn precompose_unicode_journey() -> crate::Result { tmp.path().join(precomposed_a), // it's important that root paths are also precomposed then. gix_ref::store::init::Options { write_reflog: WriteReflog::Always, + object_hash: crate::fixture_hash_kind(), precompose_unicode: true, ..Default::default() }, diff --git a/gix-ref/tests/refs/file/store/reflog.rs b/gix-ref/tests/refs/file/store/reflog.rs index a0a2ad3b124..e47808522f5 100644 --- a/gix-ref/tests/refs/file/store/reflog.rs +++ b/gix-ref/tests/refs/file/store/reflog.rs @@ -3,6 +3,7 @@ fn store() -> crate::Result { gix_testtools::scripted_fixture_read_only_standalone("make_repo_for_reflog.sh")?.join(".git"), gix_ref::store::init::Options { write_reflog: gix_ref::store::WriteReflog::Disable, + object_hash: crate::fixture_hash_kind(), ..Default::default() }, )) diff --git a/gix-ref/tests/refs/file/transaction/mod.rs b/gix-ref/tests/refs/file/transaction/mod.rs index efb719f6534..85f3fd362fa 100644 --- a/gix-ref/tests/refs/file/transaction/mod.rs +++ b/gix-ref/tests/refs/file/transaction/mod.rs @@ -21,7 +21,7 @@ pub(crate) mod prepare_and_commit { pub(crate) fn empty_store() -> crate::Result<(gix_testtools::tempfile::TempDir, file::Store)> { let dir = gix_testtools::tempfile::TempDir::new().unwrap(); - let store = file::Store::at(dir.path().into(), Default::default()); + let store = file::Store::at(dir.path().into(), crate::file::store_options()); Ok((dir, store)) } diff --git a/gix-ref/tests/refs/file/transaction/prepare_and_commit/create_or_update/collisions.rs b/gix-ref/tests/refs/file/transaction/prepare_and_commit/create_or_update/collisions.rs index 4bfd3ca9d60..bdb1a8cc1e2 100644 --- a/gix-ref/tests/refs/file/transaction/prepare_and_commit/create_or_update/collisions.rs +++ b/gix-ref/tests/refs/file/transaction/prepare_and_commit/create_or_update/collisions.rs @@ -119,6 +119,7 @@ fn conflicting_creation_into_packed_refs() -> crate::Result { // The following works because locks aren't actually obtained if there would be no change. // Otherwise there would be a conflict on case-insensitive filesystems + let null = crate::fixture_hash_kind().null(); store .transaction() .packed_refs(PackedRefs::DeletionsAndNonSymbolicUpdatesRemoveLooseSourceReference( @@ -130,7 +131,7 @@ fn conflicting_creation_into_packed_refs() -> crate::Result { change: Change::Update { log: LogChange::default(), expected: PreviousValue::Any, - new: Target::Object(gix_hash::Kind::Sha1.null()), + new: Target::Object(null), }, name: "refs/a".try_into().expect("valid"), deref: false, @@ -141,7 +142,7 @@ fn conflicting_creation_into_packed_refs() -> crate::Result { expected: PreviousValue::MustExistAndMatch(Target::Object(hex_to_id( "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", ))), - new: Target::Object(gix_hash::Kind::Sha1.null()), + new: Target::Object(null), }, name: "refs/A".try_into().expect("valid"), deref: false, diff --git a/gix-ref/tests/refs/file/transaction/prepare_and_commit/create_or_update/mod.rs b/gix-ref/tests/refs/file/transaction/prepare_and_commit/create_or_update/mod.rs index a803c481eab..c1faa0b8f1d 100644 --- a/gix-ref/tests/refs/file/transaction/prepare_and_commit/create_or_update/mod.rs +++ b/gix-ref/tests/refs/file/transaction/prepare_and_commit/create_or_update/mod.rs @@ -109,7 +109,7 @@ fn reference_with_equally_named_empty_or_non_empty_directory_already_in_place_ca fn reference_with_old_value_must_exist_when_creating_it() -> crate::Result { let (_keep, store) = empty_store()?; - let new_target = Target::Object(gix_hash::Kind::Sha1.null()); + let new_target = Target::Object(crate::fixture_hash_kind().null()); let res = store.transaction().prepare( Some(RefEdit { change: Change::Update { @@ -144,7 +144,7 @@ fn reference_with_explicit_value_must_match_the_value_on_update() -> crate::Resu Some(RefEdit { change: Change::Update { log: LogChange::default(), - new: Target::Object(gix_hash::Kind::Sha1.null()), + new: Target::Object(crate::fixture_hash_kind().null()), expected: PreviousValue::MustExistAndMatch(Target::Object(hex_to_id( "28ce6a8b26aa170e1de65536fe8abe1832bd3242", ))), @@ -168,7 +168,7 @@ fn reference_with_explicit_value_must_match_the_value_on_update() -> crate::Resu #[test] fn the_existing_must_match_constraint_allow_non_existing_references_to_be_created() -> crate::Result { let (_keep, store) = store_writable("make_repo_for_reflog.sh")?; - let expected = PreviousValue::ExistingMustMatch(Target::Object(ObjectId::empty_tree(gix_hash::Kind::Sha1))); + let expected = PreviousValue::ExistingMustMatch(Target::Object(ObjectId::empty_tree(crate::fixture_hash_kind()))); let mut buf = TimeBuf::default(); let edits = store .transaction() @@ -176,7 +176,7 @@ fn the_existing_must_match_constraint_allow_non_existing_references_to_be_create Some(RefEdit { change: Change::Update { log: LogChange::default(), - new: Target::Object(gix_hash::Kind::Sha1.null()), + new: Target::Object(crate::fixture_hash_kind().null()), expected: expected.clone(), }, name: "refs/heads/new".try_into()?, @@ -192,7 +192,7 @@ fn the_existing_must_match_constraint_allow_non_existing_references_to_be_create vec![RefEdit { change: Change::Update { log: LogChange::default(), - new: Target::Object(gix_hash::Kind::Sha1.null()), + new: Target::Object(crate::fixture_hash_kind().null()), expected, }, name: "refs/heads/new".try_into()?, @@ -213,7 +213,7 @@ fn the_existing_must_match_constraint_requires_existing_references_to_have_the_g Some(RefEdit { change: Change::Update { log: LogChange::default(), - new: Target::Object(gix_hash::Kind::Sha1.null()), + new: Target::Object(crate::fixture_hash_kind().null()), expected: PreviousValue::ExistingMustMatch(Target::Object(hex_to_id( "28ce6a8b26aa170e1de65536fe8abe1832bd3242", ))), @@ -277,7 +277,7 @@ fn reference_with_must_exist_constraint_must_exist_already_with_any_value() -> c let target = head.target; let previous_reflog_count = reflog_lines(&store, "HEAD")?.len(); - let new_target = Target::Object(ObjectId::empty_tree(gix_hash::Kind::Sha1)); + let new_target = Target::Object(ObjectId::empty_tree(crate::fixture_hash_kind())); let edits = store .transaction() .prepare( @@ -630,7 +630,7 @@ fn symbolic_head_missing_referent_then_update_referent() -> crate::Result { for ref_name in &["HEAD", referent] { match reflog_writemode { WriteReflog::Normal | WriteReflog::Always => { - let expected_line = log_line(gix_hash::Kind::Sha1.null(), new_oid, "an actual change"); + let expected_line = log_line(crate::fixture_hash_kind().null(), new_oid, "an actual change"); assert_eq!(reflog_lines(&store, ref_name)?, vec![expected_line]); } WriteReflog::Disable => { @@ -775,7 +775,7 @@ fn packed_refs_creation_with_packed_refs_mode_prune_removes_original_loose_refs( store.open_packed_buffer()?.is_none(), "there should be no packed refs to start out with" ); - let odb = gix_odb::at(store.git_dir().join("objects"))?; + let odb = crate::file::odb_at(store.git_dir().join("objects"))?; let edits = store .transaction() .packed_refs(PackedRefs::DeletionsAndNonSymbolicUpdatesRemoveLooseSourceReference( @@ -889,7 +889,7 @@ fn packed_refs_deletion_in_deletions_and_updates_mode() -> crate::Result { store.try_find_loose("refs/heads/d1")?.is_none(), "no loose d1 available, it's packed" ); - let odb = gix_odb::at(store.git_dir().join("objects"))?; + let odb = crate::file::odb_at(store.git_dir().join("objects"))?; let old_id = hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"); let edits = store .transaction() diff --git a/gix-ref/tests/refs/file/transaction/prepare_and_commit/delete.rs b/gix-ref/tests/refs/file/transaction/prepare_and_commit/delete.rs index bb9f1c801da..ffcd6a93bc5 100644 --- a/gix-ref/tests/refs/file/transaction/prepare_and_commit/delete.rs +++ b/gix-ref/tests/refs/file/transaction/prepare_and_commit/delete.rs @@ -127,7 +127,13 @@ fn delete_ref_with_incorrect_previous_value_fails() -> crate::Result { match res { Err(err) => { - assert_eq!(err.to_string(), "The reference \"refs/heads/main\" should have content ref: refs/heads/main, actual content was 02a7a22d90d7c02fb494ed25551850b868e634f0"); + assert_eq!( + err.to_string(), + format!( + "The reference \"refs/heads/main\" should have content ref: refs/heads/main, actual content was {}", + hex_to_id("02a7a22d90d7c02fb494ed25551850b868e634f0") + ) + ); } Ok(_) => unreachable!("must be err"), } diff --git a/gix-ref/tests/refs/file/worktree.rs b/gix-ref/tests/refs/file/worktree.rs index 468fcbfebb9..f90550281f1 100644 --- a/gix-ref/tests/refs/file/worktree.rs +++ b/gix-ref/tests/refs/file/worktree.rs @@ -29,8 +29,8 @@ fn main_store( let (dir, tmp) = dir(packed, writable)?; let git_dir = dir.join("repo").join(".git"); Ok(( - gix_ref::file::Store::at(git_dir.clone(), Default::default()), - gix_odb::at(git_dir.join("objects"))?, + gix_ref::file::Store::at(git_dir.clone(), crate::file::store_options()), + crate::file::odb_at(git_dir.join("objects"))?, tmp, )) } @@ -50,8 +50,8 @@ fn worktree_store( .into_repository_and_work_tree_directories(); let common_dir = git_dir.join("../.."); Ok(( - gix_ref::file::Store::for_linked_worktree(git_dir, common_dir.clone(), Default::default()), - gix_odb::at(common_dir.join("objects"))?, + gix_ref::file::Store::for_linked_worktree(git_dir, common_dir.clone(), crate::file::store_options()), + crate::file::odb_at(common_dir.join("objects"))?, tmp, )) } @@ -68,6 +68,10 @@ enum Mode { Write, } +fn oid(hex: &str) -> String { + crate::hex_to_id(hex).to_string() +} + impl From for bool { fn from(v: Mode) -> Self { match v { @@ -202,7 +206,7 @@ mod writable { use crate::{ file::{ transaction::prepare_and_commit::committer, - worktree::{main_store, worktree_store, Mode}, + worktree::{main_store, oid, worktree_store, Mode}, EmptyCommit, }, hex_to_id, @@ -218,10 +222,10 @@ mod writable { #[test] fn main() -> crate::Result { - let new_id_main_str = "11111111111111111162102c6a483440bfda2a03"; - let new_id_main = hex_to_id(new_id_main_str); - let new_id_linked_str = "22222222222222222262102c6a483440bfda2a03"; - let new_id_linked = hex_to_id(new_id_linked_str); + let new_id_main = hex_to_id("11111111111111111162102c6a483440bfda2a03"); + let new_id_main_str = new_id_main.to_string(); + let new_id_linked = hex_to_id("22222222222222222262102c6a483440bfda2a03"); + let new_id_linked_str = new_id_linked.to_string(); for packed in [false, true] { let (store, _odb, _tmp) = main_store(packed, Mode::Write)?; @@ -273,20 +277,23 @@ mod writable { .map(|r| (r.name.to_string(), r.target.to_string())) .collect::>(), [ - ("refs/bisect/bad", "9556057aee5abb06912922e9f26c46386a816822"), - ("refs/bisect/good", new_id_main_str), - ("refs/heads/main", "9556057aee5abb06912922e9f26c46386a816822"), - ("refs/heads/new", new_id_main_str), - ("refs/heads/shared", new_id_linked_str), - ("refs/heads/w1", "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), - ("refs/stacks/common", "134385f6d781b7e97062102c6a483440bfda2a03"), - ("refs/stacks/w1", "17d78c64cef6c33a10a604573fd2c429e477fd63"), - ("refs/stacks/wtdetached", "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), - ("refs/tags/dt1", "d3ba65e5e3be5cdd7210da9998307a4762999cc5"), - ("refs/tags/t1", "9556057aee5abb06912922e9f26c46386a816822") + ("refs/bisect/bad", oid("9556057aee5abb06912922e9f26c46386a816822")), + ("refs/bisect/good", new_id_main_str.clone()), + ("refs/heads/main", oid("9556057aee5abb06912922e9f26c46386a816822")), + ("refs/heads/new", new_id_main_str.clone()), + ("refs/heads/shared", new_id_linked_str.clone()), + ("refs/heads/w1", oid("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7")), + ("refs/stacks/common", oid("134385f6d781b7e97062102c6a483440bfda2a03")), + ("refs/stacks/w1", oid("17d78c64cef6c33a10a604573fd2c429e477fd63")), + ( + "refs/stacks/wtdetached", + oid("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7") + ), + ("refs/tags/dt1", oid("d3ba65e5e3be5cdd7210da9998307a4762999cc5")), + ("refs/tags/t1", oid("9556057aee5abb06912922e9f26c46386a816822")) ] .iter() - .map(|(a, b)| (a.to_string(), b.to_string())) + .map(|(a, b)| (a.to_string(), b.clone())) .collect::>(), "we traverse only refs of the main worktree" ); @@ -298,12 +305,15 @@ mod writable { .map(|r| (r.name.to_string(), r.target.to_string())) .collect::>(), [ - ("refs/stacks/common", "134385f6d781b7e97062102c6a483440bfda2a03"), - ("refs/stacks/w1", "17d78c64cef6c33a10a604573fd2c429e477fd63"), - ("refs/stacks/wtdetached", "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), + ("refs/stacks/common", oid("134385f6d781b7e97062102c6a483440bfda2a03")), + ("refs/stacks/w1", oid("17d78c64cef6c33a10a604573fd2c429e477fd63")), + ( + "refs/stacks/wtdetached", + oid("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7") + ), ] .iter() - .map(|(a, b)| (a.to_string(), b.to_string())) + .map(|(a, b)| (a.to_string(), b.clone())) .collect::>(), ); @@ -476,10 +486,10 @@ mod writable { #[test] fn linked() -> crate::Result { - let new_id_str = "134385f6d781b7e97062102c6a483440bfda2a03"; - let new_id = hex_to_id(new_id_str); - let new_id_main_str = "22222222222222227062102c6a483440bfda2a03"; - let new_id_main = hex_to_id(new_id_main_str); + let new_id = hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"); + let new_id_str = new_id.to_string(); + let new_id_main = hex_to_id("22222222222222227062102c6a483440bfda2a03"); + let new_id_main_str = new_id_main.to_string(); for packed in [false, true] { let (store, _odb, _tmp) = worktree_store(packed, "w1", Mode::Write)?; @@ -553,21 +563,24 @@ mod writable { .map(|r| (r.name.to_string(), r.target.to_string())) .collect::>(), [ - ("refs/bisect/bad", "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), - ("refs/bisect/good", new_id_str), - ("refs/heads/main", "9556057aee5abb06912922e9f26c46386a816822"), - ("refs/heads/new", new_id_main_str), - ("refs/heads/shared", new_id_str), - ("refs/heads/w1", "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), - ("refs/stacks/common", "134385f6d781b7e97062102c6a483440bfda2a03"), - ("refs/stacks/w1", "17d78c64cef6c33a10a604573fd2c429e477fd63"), - ("refs/stacks/wtdetached", "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), - ("refs/tags/dt1", "d3ba65e5e3be5cdd7210da9998307a4762999cc5"), - ("refs/tags/t1", "9556057aee5abb06912922e9f26c46386a816822"), - ("refs/worktree/private", new_id_str) + ("refs/bisect/bad", oid("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7")), + ("refs/bisect/good", new_id_str.clone()), + ("refs/heads/main", oid("9556057aee5abb06912922e9f26c46386a816822")), + ("refs/heads/new", new_id_main_str.clone()), + ("refs/heads/shared", new_id_str.clone()), + ("refs/heads/w1", oid("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7")), + ("refs/stacks/common", oid("134385f6d781b7e97062102c6a483440bfda2a03")), + ("refs/stacks/w1", oid("17d78c64cef6c33a10a604573fd2c429e477fd63")), + ( + "refs/stacks/wtdetached", + oid("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7") + ), + ("refs/tags/dt1", oid("d3ba65e5e3be5cdd7210da9998307a4762999cc5")), + ("refs/tags/t1", oid("9556057aee5abb06912922e9f26c46386a816822")), + ("refs/worktree/private", new_id_str.clone()) ] .iter() - .map(|(a, b)| (a.to_string(), b.to_string())) + .map(|(a, b)| (a.to_string(), b.clone())) .collect::>(), "we traverse only refs of the main worktree" ); @@ -579,12 +592,15 @@ mod writable { .map(|r| (r.name.to_string(), r.target.to_string())) .collect::>(), [ - ("refs/stacks/common", "134385f6d781b7e97062102c6a483440bfda2a03"), - ("refs/stacks/w1", "17d78c64cef6c33a10a604573fd2c429e477fd63"), - ("refs/stacks/wtdetached", "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), + ("refs/stacks/common", oid("134385f6d781b7e97062102c6a483440bfda2a03")), + ("refs/stacks/w1", oid("17d78c64cef6c33a10a604573fd2c429e477fd63")), + ( + "refs/stacks/wtdetached", + oid("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7") + ), ] .iter() - .map(|(a, b)| (a.to_string(), b.to_string())) + .map(|(a, b)| (a.to_string(), b.clone())) .collect::>(), ); diff --git a/gix-ref/tests/refs/main.rs b/gix-ref/tests/refs/main.rs index 7ec1b16c266..f97bf0c2ff2 100644 --- a/gix-ref/tests/refs/main.rs +++ b/gix-ref/tests/refs/main.rs @@ -1,9 +1,62 @@ use gix_hash::ObjectId; pub fn hex_to_id(hex: &str) -> ObjectId { + match fixture_hash_kind() { + gix_hash::Kind::Sha1 => ObjectId::from_hex(hex.as_bytes()).expect("40 bytes hex"), + gix_hash::Kind::Sha256 => { + ObjectId::from_hex(translate_sha1_to_fixture_sha256(hex).as_bytes()).expect("64 bytes hex") + } + _ => unreachable!("tests only support known hash kinds"), + } +} + +pub fn sha1_hex_to_id(hex: &str) -> ObjectId { ObjectId::from_hex(hex.as_bytes()).expect("40 bytes hex") } +pub fn fixture_hash_kind() -> gix_hash::Kind { + gix_testtools::hash_kind_from_env().unwrap_or_default() +} + +fn translate_sha1_to_fixture_sha256(hex: &str) -> String { + match hex { + "0000000000000000000000000000000000000000" => { + "0000000000000000000000000000000000000000000000000000000000000000".into() + } + "134385f6d781b7e97062102c6a483440bfda2a03" => { + "5c4c31e0551f0d1fb410b7b9366604b050ea3388b96885063f10ba4c3e2dedd0".into() + } + "4c3f4cce493d7beb45012e478021b5f65295e5a3" => { + "2c309d047b92197ef711ba55ab652c42d36750d6571a3e024a7325e324be3033".into() + } + "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7" => { + "bbaf9640a7404a15394dae2606c5090cb44a722be2167d9d78485779aaf4e065".into() + } + "17d78c64cef6c33a10a604573fd2c429e477fd63" => { + "e47e1df5636110feefb5b858c346dbd1c0feebfc37651a238ec5a6300ed2f666".into() + } + "9556057aee5abb06912922e9f26c46386a816822" => { + "9a3e230fc8479e41397b78b9295510e38be525ec05a08c1ceb797547dc93ed4c".into() + } + "d3ba65e5e3be5cdd7210da9998307a4762999cc5" => { + "8aa62135237b610c0e58159f0a0d7a763371ed72dc046dfda6baf1a30ab8511a".into() + } + "b3109a7e51fc593f85b145a76c70ddd1d133fafd" => { + "1ce70f5e127ba939d70e3b1643213b1dbb0dfedc3079f57a73b2d18cd6cc8a02".into() + } + "02a7a22d90d7c02fb494ed25551850b868e634f0" => { + "c87659e8e5d86a499a88a3869342d1367b918aab9675577571fbc2d2ea2a24b8".into() + } + "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391" => gix_hash::Kind::Sha256.empty_blob().to_string(), + other => { + let oid = ObjectId::from_hex(other.as_bytes()).expect("40 bytes hex"); + let mut hasher = gix_hash::hasher(gix_hash::Kind::Sha256); + hasher.update(oid.as_bytes()); + hasher.try_finalize().expect("sha256 hashing works").to_string() + } + } +} + pub use gix_testtools::Result; mod file; diff --git a/gix-ref/tests/refs/packed/find.rs b/gix-ref/tests/refs/packed/find.rs index 1502b58f06c..31b4aabb132 100644 --- a/gix-ref/tests/refs/packed/find.rs +++ b/gix-ref/tests/refs/packed/find.rs @@ -3,9 +3,12 @@ use gix_testtools::fixture_path_standalone; use crate::{ file::{store_at, store_with_packed_refs}, + hex_to_id, packed::write_packed_refs_with, }; +const HASH_KIND: gix_hash::Kind = gix_hash::Kind::Sha1; + #[test] fn a_lock_file_would_not_be_a_valid_partial_name() { // doesn't really belong here but want to make sure refname validation works as expected. @@ -47,6 +50,7 @@ fn binary_search_a_name_past_the_end_of_the_packed_refs_file() -> crate::Result let packed_refs = packed::Buffer::open( fixture_path_standalone("packed-refs").join("triggers-out-of-bounds"), 32, + HASH_KIND, )?; assert!(packed_refs.try_find("v0.0.1")?.is_none()); Ok(()) @@ -61,7 +65,7 @@ c4cebba92af964f2d126be90b8a6298c4cf84d45 refs/tags/gix-actor-v0.1.0 0b92c8a256ae06c189e3b9c30b646d62ac8f7d10 refs/tags/gix-actor-v0.1.1\n"; let (_keep, path) = write_packed_refs_with(packed_refs)?; - let buf = packed::Buffer::open(path, 1024)?; + let buf = packed::Buffer::open(path, 1024, HASH_KIND)?; let name = "refs/tags/TEST-0.0.1"; assert_eq!( buf.try_find(name)?.expect("reference exists"), @@ -140,12 +144,14 @@ fn partial_name_to_full_name_conversion_rules_are_applied() -> crate::Result { "refs/remotes/origin/main", "more specification is possible, too" ); + let target = hex_to_id("b3109a7e51fc593f85b145a76c70ddd1d133fafd").to_string(); + let object = hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03").to_string(); assert_eq!( packed.try_find("tag-object")?.expect("present"), packed::Reference { name: "refs/tags/tag-object".try_into()?, - target: "b3109a7e51fc593f85b145a76c70ddd1d133fafd".into(), - object: Some("134385f6d781b7e97062102c6a483440bfda2a03".into()) + target: target.as_str().into(), + object: Some(object.as_str().into()) }, "tag objects aren't special, but lets test a little more" ); @@ -161,7 +167,7 @@ bogus refs/tags/gix-actor-v0.1.0 0b92c8a256ae06c189e3b9c30b646d62ac8f7d10 refs/tags/gix-actor-v0.1.1\n"; let (_keep, path) = write_packed_refs_with(broken_packed_refs)?; - let buf = packed::Buffer::open(path, 1024)?; + let buf = packed::Buffer::open(path, 1024, HASH_KIND)?; let name = "refs/tags/gix-actor-v0.1.1"; assert_eq!( diff --git a/gix-ref/tests/refs/packed/iter.rs b/gix-ref/tests/refs/packed/iter.rs index 78db809721a..aaa5db53b06 100644 --- a/gix-ref/tests/refs/packed/iter.rs +++ b/gix-ref/tests/refs/packed/iter.rs @@ -3,10 +3,12 @@ use gix_ref::packed; use crate::file::{store_at, store_with_packed_refs}; +const HASH_KIND: gix_hash::Kind = gix_hash::Kind::Sha1; + #[test] fn empty() -> crate::Result { assert_eq!( - packed::Iter::new(&[])?.count(), + packed::Iter::new(&[], HASH_KIND)?.count(), 0, "empty buffers are fine and lead to no line returned" ); @@ -17,7 +19,7 @@ fn empty() -> crate::Result { fn packed_refs_with_header() -> crate::Result { let dir = gix_testtools::scripted_fixture_read_only_standalone("make_packed_ref_repository.sh")?; let buf = std::fs::read(dir.join(".git").join("packed-refs"))?; - let iter = packed::Iter::new(&buf)?; + let iter = packed::Iter::new(&buf, crate::fixture_hash_kind())?; assert_eq!(iter.count(), 11, "it finds the right amount of items"); Ok(()) } @@ -86,7 +88,7 @@ c4cebba92af964f2d126be90b8a6298c4cf84d45 refs/tags/gix-actor-v0.1.0 ^13da90b54699a6b500ec5cd7d175f2cd5a1bed06 0b92c8a256ae06c189e3b9c30b646d62ac8f7d10 refs/tags/gix-actor-v0.1.1\n"; assert_eq!( - packed::Iter::new(packed_refs)?.collect::, _>>()?, + packed::Iter::new(packed_refs, HASH_KIND)?.collect::, _>>()?, vec![ packed::Reference { name: "refs/tags/TEST-0.0.1".try_into()?, @@ -114,7 +116,7 @@ fn broken_ref_doesnt_end_the_iteration() -> crate::Result { buggy-hash refs/wrong ^buggy-hash-too 0b92c8a256ae06c189e3b9c30b646d62ac8f7d10 refs/tags/gix-actor-v0.1.1\n"; - let mut iter = packed::Iter::new(packed_refs)?; + let mut iter = packed::Iter::new(packed_refs, HASH_KIND)?; assert!(iter.next().expect("first ref").is_ok(), "first line is valid"); assert_eq!( diff --git a/gix-ref/tests/refs/packed/open.rs b/gix-ref/tests/refs/packed/open.rs index 6371294f96d..f1dc4846301 100644 --- a/gix-ref/tests/refs/packed/open.rs +++ b/gix-ref/tests/refs/packed/open.rs @@ -4,6 +4,8 @@ use gix_testtools::fixture_path_standalone; use crate::{file::store_with_packed_refs, packed::write_packed_refs_with}; +const HASH_KIND: gix_hash::Kind = gix_hash::Kind::Sha1; + #[test] fn sorted_buffer_works() { let store = store_with_packed_refs().unwrap(); @@ -13,7 +15,7 @@ fn sorted_buffer_works() { #[test] fn empty_buffers_should_not_exist_but_are_fine_to_open() -> crate::Result { let (_keep, path) = write_packed_refs_with(&[])?; - assert_eq!(gix_ref::packed::Buffer::open(path, 512)?.iter()?.count(), 0); + assert_eq!(gix_ref::packed::Buffer::open(path, 512, HASH_KIND)?.iter()?.count(), 0); Ok(()) } @@ -23,6 +25,7 @@ fn unsorted_buffers_or_those_without_a_header_can_be_opened_and_searched() { let buffer = gix_ref::packed::Buffer::open( fixture_path_standalone(Path::new("packed-refs").join(fixture).to_str().expect("utf8")), cutoff, + HASH_KIND, ) .unwrap(); for packed_ref in buffer.iter().unwrap().map(Result::unwrap) { @@ -42,7 +45,7 @@ fn bogus_content_triggers_an_error() -> crate::Result { let packed_refs_data = b"starts with a bogus record, not a header anyway"; let (_keep, path) = write_packed_refs_with(packed_refs_data)?; - match gix_ref::packed::Buffer::open(path, 32) { + match gix_ref::packed::Buffer::open(path, 32, HASH_KIND) { Ok(_) => unreachable!("unsorted buffers can't be opened"), Err(err) => assert_eq!( err.to_string(), diff --git a/gix-ref/tests/refs/store.rs b/gix-ref/tests/refs/store.rs index 9f6ae14b7d9..62da472f92a 100644 --- a/gix-ref/tests/refs/store.rs +++ b/gix-ref/tests/refs/store.rs @@ -7,7 +7,7 @@ fn is_send_and_sync() { path.join(".git"), gix_ref::store::init::Options { write_reflog: gix_ref::store::WriteReflog::Normal, - object_hash: gix_hash::Kind::Sha1, + object_hash: crate::fixture_hash_kind(), ..Default::default() }, )) diff --git a/gix-revwalk/src/graph/commit.rs b/gix-revwalk/src/graph/commit.rs index 390dfa84d63..4d8c5b0494e 100644 --- a/gix-revwalk/src/graph/commit.rs +++ b/gix-revwalk/src/graph/commit.rs @@ -8,7 +8,7 @@ impl<'graph, 'cache> LazyCommit<'graph, 'cache> { /// Return an iterator over the parents of this commit. pub fn iter_parents(&self) -> Parents<'graph, 'cache> { let backing = match &self.backing { - Either::Left(buf) => Either::Left(gix_object::CommitRefIter::from_bytes(buf)), + Either::Left(buf) => Either::Left(gix_object::CommitRefIter::from_bytes(buf, self.hash_kind)), Either::Right((cache, pos)) => Either::Right((*cache, cache.commit_at(*pos).iter_parents())), }; Parents { backing } @@ -20,7 +20,9 @@ impl<'graph, 'cache> LazyCommit<'graph, 'cache> { /// Note that this can only fail if the commit is backed by the object database *and* parsing fails. pub fn committer_timestamp(&self) -> Result { Ok(match &self.backing { - Either::Left(buf) => gix_object::CommitRefIter::from_bytes(buf).committer()?.seconds(), + Either::Left(buf) => gix_object::CommitRefIter::from_bytes(buf, self.hash_kind) + .committer()? + .seconds(), Either::Right((cache, pos)) => cache.commit_at(*pos).committer_timestamp() as SecondsSinceUnixEpoch, // a cast as we cannot represent the error and trying seems overkill }) } @@ -38,7 +40,12 @@ impl<'graph, 'cache> LazyCommit<'graph, 'cache> { &self, ) -> Result<(Option, SecondsSinceUnixEpoch), gix_object::decode::Error> { Ok(match &self.backing { - Either::Left(buf) => (None, gix_object::CommitRefIter::from_bytes(buf).committer()?.seconds()), + Either::Left(buf) => ( + None, + gix_object::CommitRefIter::from_bytes(buf, self.hash_kind) + .committer()? + .seconds(), + ), Either::Right((cache, pos)) => { let commit = cache.commit_at(*pos); ( @@ -57,7 +64,7 @@ impl<'graph, 'cache> LazyCommit<'graph, 'cache> { Ok(match &self.backing { Either::Left(buf) => { use gix_object::commit::ref_iter::Token; - let iter = gix_object::CommitRefIter::from_bytes(buf); + let iter = gix_object::CommitRefIter::from_bytes(buf, self.hash_kind); let mut parents = SmallVec::default(); let mut timestamp = None; for token in iter { diff --git a/gix-revwalk/src/graph/mod.rs b/gix-revwalk/src/graph/mod.rs index a896034c467..691d2ad7cfb 100644 --- a/gix-revwalk/src/graph/mod.rs +++ b/gix-revwalk/src/graph/mod.rs @@ -369,6 +369,7 @@ fn try_lookup<'graph, 'cache>( if let Some(cache) = cache { if let Some(pos) = cache.lookup(id) { return Ok(Some(LazyCommit { + hash_kind: id.kind(), backing: Either::Right((cache, pos)), })); } @@ -380,6 +381,7 @@ fn try_lookup<'graph, 'cache>( .map_err(gix_object::find::existing_iter::Error::Find)? { Some(data) => data.kind.is_commit().then_some(LazyCommit { + hash_kind: data.hash_kind, backing: Either::Left(buf), }), None => None, @@ -439,6 +441,7 @@ where /// /// The owned version of this type is called [`Commit`] and can be obtained by calling [`LazyCommit::to_owned()`]. pub struct LazyCommit<'graph, 'cache> { + hash_kind: gix_hash::Kind, backing: Either<&'graph [u8], (&'cache gix_commitgraph::Graph, gix_commitgraph::Position)>, } diff --git a/gix-traverse/src/commit/simple.rs b/gix-traverse/src/commit/simple.rs index 404d947e2e6..d237ce225b6 100644 --- a/gix-traverse/src/commit/simple.rs +++ b/gix-traverse/src/commit/simple.rs @@ -138,6 +138,10 @@ pub(super) struct State { queue: CommitDateQueue, /// Backing storage for the currently yielded commit. buf: Vec, + /// The object hash kind of the currently yielded commit data in `buf`. + /// It's used to know the kind of hash to expect when a new iterator is returned from `buf` + /// via `Simple::commit_iter()`. + object_hash: gix_hash::Kind, /// Set of commits that were already enqueued for the visible traversal, for cycle-checking. seen: gix_hashtable::HashSet, /// Hidden frontier commits that must not be yielded or crossed during traversal. @@ -247,6 +251,7 @@ mod init { next: Default::default(), queue: gix_revwalk::PriorityQueue::new(), buf: vec![], + object_hash: gix_hash::Kind::Sha1, seen: Default::default(), hidden: Default::default(), hidden_tips: Vec::new(), @@ -262,6 +267,7 @@ mod init { next, queue, buf, + object_hash, seen, hidden, hidden_tips, @@ -271,6 +277,7 @@ mod init { next.clear(); queue.clear(); buf.clear(); + *object_hash = gix_hash::Kind::Sha1; seen.clear(); hidden.clear(); hidden_tips.clear(); @@ -464,7 +471,7 @@ mod init { impl Simple { /// Return an iterator for accessing data of the current commit, parsed lazily. pub fn commit_iter(&self) -> CommitRefIter<'_> { - CommitRefIter::from_bytes(self.commit_data()) + CommitRefIter::from_bytes(self.commit_data(), self.state.object_hash) } /// Return the current commits' raw data, which can be parsed using [`gix_object::CommitRef::from_bytes()`]. @@ -519,6 +526,7 @@ mod init { let (commit_time, oid) = match next.pop()? { (Ok(t) | Err(Reverse(t)), o) => (t, o), }; + state.object_hash = oid.kind(); if state.hidden.contains_key(&oid) { continue; } @@ -592,6 +600,7 @@ mod init { loop { let oid = next.pop_front()?; + state.object_hash = oid.kind(); if state.hidden.contains_key(&oid) { continue; } diff --git a/gix/Cargo.toml b/gix/Cargo.toml index f26d560b52a..7fb6c2130e0 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -330,12 +330,6 @@ tracing = ["gix-features/tracing"] ## Enable tracing using the `tracing` crate for detailed tracing. Also enables coarse tracing. tracing-detail = ["gix-features/tracing-detail", "tracing"] -## When parsing objects by default errors will only be available on the granularity of success or failure, and with the above flag enabled -## details information about the error location will be collected. -## Use it in applications which expect broken or invalid objects or for debugging purposes. -## Incorrectly formatted objects aren't very common otherwise. -verbose-object-parsing-errors = ["gix-object/verbose-object-parsing-errors"] - ## Data structures implement `serde::Serialize` and `serde::Deserialize`. serde = [ "dep:serde", diff --git a/gix/src/object/commit.rs b/gix/src/object/commit.rs index c0e6272fec2..8588b828c6d 100644 --- a/gix/src/object/commit.rs +++ b/gix/src/object/commit.rs @@ -86,7 +86,7 @@ impl<'repo> Commit<'repo> { /// # Ok(()) } /// ``` pub fn message_raw(&self) -> Result<&'_ BStr, gix_object::decode::Error> { - gix_object::CommitRefIter::from_bytes(&self.data).message() + gix_object::CommitRefIter::from_bytes(&self.data, self.id.kind()).message() } /// Obtain the message by using intricate knowledge about the encoding, which is fastest and /// can't fail at the expense of error handling. @@ -114,24 +114,24 @@ impl<'repo> Commit<'repo> { /// used for successive calls to string-ish information to avoid decoding the object /// more than once. pub fn decode(&self) -> Result, gix_object::decode::Error> { - gix_object::CommitRef::from_bytes(&self.data) + gix_object::CommitRef::from_bytes(&self.data, self.id.kind()) } /// Return an iterator over tokens, representing this commit piece by piece. pub fn iter(&self) -> gix_object::CommitRefIter<'_> { - gix_object::CommitRefIter::from_bytes(&self.data) + gix_object::CommitRefIter::from_bytes(&self.data, self.id.kind()) } /// Return the commits author, with surrounding whitespace trimmed. pub fn author(&self) -> Result, gix_object::decode::Error> { - gix_object::CommitRefIter::from_bytes(&self.data) + gix_object::CommitRefIter::from_bytes(&self.data, self.id.kind()) .author() .map(|s| s.trim()) } /// Return the commits committer. with surrounding whitespace trimmed. pub fn committer(&self) -> Result, gix_object::decode::Error> { - gix_object::CommitRefIter::from_bytes(&self.data) + gix_object::CommitRefIter::from_bytes(&self.data, self.id.kind()) .committer() .map(|s| s.trim()) } @@ -153,7 +153,7 @@ impl<'repo> Commit<'repo> { pub fn parent_ids(&self) -> impl Iterator> + '_ { use crate::ext::ObjectIdExt; let repo = self.repo; - gix_object::CommitRefIter::from_bytes(&self.data) + gix_object::CommitRefIter::from_bytes(&self.data, self.id.kind()) .parent_ids() .map(move |id| id.attach(repo)) } @@ -181,7 +181,7 @@ impl<'repo> Commit<'repo> { /// Parse the commit and return the tree id it points to. pub fn tree_id(&self) -> Result, gix_object::decode::Error> { - gix_object::CommitRefIter::from_bytes(&self.data) + gix_object::CommitRefIter::from_bytes(&self.data, self.id.kind()) .tree_id() .map(|id| crate::Id::from_id(id, self.repo)) } @@ -217,7 +217,7 @@ impl<'repo> Commit<'repo> { &self, ) -> Result, gix_object::commit::SignedData<'_>)>, gix_object::decode::Error> { - gix_object::CommitRefIter::signature(&self.data) + gix_object::CommitRefIter::signature(&self.data, self.id.kind()) } } diff --git a/gix/src/object/tag.rs b/gix/src/object/tag.rs index 1a7a3de86c0..6ae6363a3a6 100644 --- a/gix/src/object/tag.rs +++ b/gix/src/object/tag.rs @@ -9,19 +9,19 @@ impl<'repo> Tag<'repo> { /// used for successive calls to string-ish information to avoid decoding the object /// more than once. pub fn decode(&self) -> Result, gix_object::decode::Error> { - gix_object::TagRef::from_bytes(&self.data) + gix_object::TagRef::from_bytes(&self.data, self.id.kind()) } /// Decode this tag partially and return the id of its target. pub fn target_id(&self) -> Result, gix_object::decode::Error> { - gix_object::TagRefIter::from_bytes(&self.data) + gix_object::TagRefIter::from_bytes(&self.data, self.id.kind()) .target_id() .map(|id| id.attach(self.repo)) } /// Decode this tag partially and return the tagger, if the field exists. pub fn tagger(&self) -> Result>, gix_object::decode::Error> { - gix_object::TagRefIter::from_bytes(&self.data).tagger() + gix_object::TagRefIter::from_bytes(&self.data, self.id.kind()).tagger() } } diff --git a/justfile b/justfile index 0c247e8a4d9..e4acd89388d 100755 --- a/justfile +++ b/justfile @@ -75,8 +75,8 @@ check: cargo check -p gix-hashtable 2>&1 >/dev/null | grep 'Please set either the `sha1` or the `sha256` feature flag' cargo check -p gix-hashtable --features sha1 cargo check -p gix-object --all-features - cargo check -p gix-object --features verbose-object-parsing-errors 2>&1 >/dev/null | grep 'Please set either the `sha1` or the `sha256` feature flag' - cargo check -p gix-object --features sha1,verbose-object-parsing-errors + cargo check -p gix-object 2>&1 >/dev/null | grep 'Please set either the `sha1` or the `sha256` feature flag' + cargo check -p gix-object --features sha1 cargo check -p gix-archive 2>&1 >/dev/null | grep 'Please set either the `sha1` or the `sha256` feature flag' cargo check -p gix-archive --features sha1 cargo check -p gix-attributes --features serde @@ -215,11 +215,12 @@ unit-tests: env GIX_TEST_FIXTURE_HASH=sha256 cargo nextest run -p gix-commitgraph --no-fail-fast env GIX_TEST_FIXTURE_HASH=sha1 cargo nextest run -p gix-object --no-fail-fast env GIX_TEST_FIXTURE_HASH=sha256 cargo nextest run -p gix-object --no-fail-fast - env GIX_TEST_FIXTURE_HASH=sha1 cargo nextest run -p gix-object --features verbose-object-parsing-errors --no-fail-fast - env GIX_TEST_FIXTURE_HASH=sha256 cargo nextest run -p gix-object --features verbose-object-parsing-errors --no-fail-fast + env GIX_TEST_FIXTURE_HASH=sha1 cargo nextest run -p gix-object --no-fail-fast + env GIX_TEST_FIXTURE_HASH=sha256 cargo nextest run -p gix-object --no-fail-fast cargo nextest run -p gix-tempfile --features signals --no-fail-fast cargo nextest run -p gix-features --all-features --no-fail-fast - cargo nextest run -p gix-ref-tests --all-features --no-fail-fast + env GIX_TEST_FIXTURE_HASH=sha1 cargo nextest run -p gix-ref-tests --all-features --no-fail-fast + env GIX_TEST_FIXTURE_HASH=sha256 cargo nextest run -p gix-ref-tests --all-features --no-fail-fast cargo nextest run -p gix-odb --all-features --no-fail-fast cargo nextest run -p gix-odb-tests --features gix-features-parallel --no-fail-fast env GIX_TEST_FIXTURE_HASH=sha1 cargo nextest run -p gix-pack --all-features --no-fail-fast diff --git a/tests/tools/Cargo.toml b/tests/tools/Cargo.toml index bb5ef4e3f23..a9719b8b817 100644 --- a/tests/tools/Cargo.toml +++ b/tests/tools/Cargo.toml @@ -32,7 +32,6 @@ gix-worktree = { version = "^0.51.0", path = "../../gix-worktree" } gix-fs = { version = "^0.20.0", path = "../../gix-fs" } gix-tempfile = { version = "^22.0.0", path = "../../gix-tempfile", default-features = false, features = ["signals"] } -winnow = { version = "1.0.0", features = ["simd"] } fastrand = "2.0.0" bstr = { version = "1.12.0", default-features = false } crc = "3.4.0" diff --git a/tests/tools/src/lib.rs b/tests/tools/src/lib.rs index 2efdd5f0648..ffbf65873bc 100644 --- a/tests/tools/src/lib.rs +++ b/tests/tools/src/lib.rs @@ -1607,14 +1607,6 @@ fn extract_archive( Ok((archive_identity, platform)) } -/// Transform a verbose parser errors from raw bytes into a `BStr` to make printing/debugging human-readable. -pub fn to_bstr_err( - err: winnow::error::ErrMode>, -) -> winnow::error::TreeError<&winnow::stream::BStr, winnow::error::StrContext> { - let err = err.into_inner().expect("not a streaming parser"); - err.map_input(winnow::stream::BStr::new) -} - fn family_name() -> &'static str { if cfg!(windows) { "windows"