diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 21db6b6374e..26d4755da19 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -391,7 +391,7 @@ jobs: - name: features of gix-features run: | set +x - for feature in progress parallel io-pipe crc32 zlib zlib-rust-backend fast-sha1 rustsha1 cache-efficiency-debug; do + for feature in progress parallel io-pipe crc32 zlib zlib-rust-backend cache-efficiency-debug; do (cd gix-features && cargo build --features "$feature" --target "$TARGET") done - name: crates with 'wasm' feature diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8de670112b9..0a2801a6e78 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -137,8 +137,7 @@ jobs: os: windows-latest - target: aarch64-pc-windows-msvc os: windows-latest - # on linux we build with musl which causes trouble with open-ssl. For now, just build max-pure there - # even though we could also build with `--features max-control,http-client-reqwest,gitoxide-core-blocking-client,gix-features/fast-sha1` for fast hashing. + # on linux we build with musl which causes trouble with open-ssl. For now, just build max-pure there. # It's a TODO. exclude: - target: x86_64-unknown-linux-musl diff --git a/Cargo.lock b/Cargo.lock index 4bd5b1712ad..0649859d58b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1615,7 +1615,6 @@ dependencies = [ "document-features", "gix-chunk 0.4.11", "gix-date 0.9.3", - "gix-features 0.40.0", "gix-hash 0.16.0", "gix-testtools", "memmap2", @@ -1846,15 +1845,12 @@ dependencies = [ "crossbeam-channel", "document-features", "flate2", - "gix-hash 0.16.0", "gix-trace 0.1.12", "gix-utils 0.1.14", "libc", "once_cell", "parking_lot", "prodash 29.0.1", - "sha1", - "sha1_smol", "thiserror 2.0.12", "walkdir", ] @@ -1967,6 +1963,7 @@ dependencies = [ "gix-features 0.40.0", "gix-testtools", "serde", + "sha1-checked", "thiserror 2.0.12", ] @@ -4872,16 +4869,16 @@ dependencies = [ "cfg-if", "cpufeatures", "digest", - "sha1-asm", ] [[package]] -name = "sha1-asm" -version = "0.5.3" +name = "sha1-checked" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "286acebaf8b67c1130aedffad26f594eff0c1292389158135327d2e23aed582b" +checksum = "89f599ac0c323ebb1c6082821a54962b839832b03984598375bff3975b804423" dependencies = [ - "cc", + "digest", + "sha1", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 5b8963a9661..e367ddebcf0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,7 +45,7 @@ max = ["max-control", "fast", "gitoxide-core-tools-query", "gitoxide-core-tools- ## transports as it uses Rust's HTTP implementation. ## ## As fast as possible, with TUI progress, progress line rendering with auto-configuration, all transports available but less mature pure Rust HTTP implementation, all `ein` tools, CLI colors and local-time support, JSON output, regex support for rev-specs. -max-pure = ["max-control", "gix-features/rustsha1", "gix-features/zlib-rust-backend", "http-client-reqwest", "gitoxide-core-blocking-client"] +max-pure = ["max-control", "gix-features/zlib-rust-backend", "http-client-reqwest", "gitoxide-core-blocking-client"] ## Like `max`, but with more control for configuration. See the *Package Maintainers* headline for more information. max-control = ["tracing", "fast-safe", "pretty-cli", "gitoxide-core-tools", "prodash-render-line", "prodash-render-tui", "prodash/render-line-autoconfigure", "gix/revparse-regex"] @@ -60,7 +60,7 @@ lean = ["fast", "tracing", "pretty-cli", "http-client-curl", "gitoxide-core-tool ## This build is essentially limited to local operations without any fanciness. ## ## Optimized for size, no parallelism thus much slower, progress line rendering. -small = ["pretty-cli", "gix-features/rustsha1", "gix-features/zlib-rust-backend", "prodash-render-line", "is-terminal"] +small = ["pretty-cli", "gix-features/zlib-rust-backend", "prodash-render-line", "is-terminal"] ## Like lean, but uses Rusts async implementations for networking. ## @@ -74,7 +74,7 @@ small = ["pretty-cli", "gix-features/rustsha1", "gix-features/zlib-rust-backend" lean-async = ["fast", "tracing", "pretty-cli", "gitoxide-core-tools", "gitoxide-core-tools-query", "gitoxide-core-tools-corpus", "gitoxide-core-async-client", "prodash-render-line"] #! ### Package Maintainers -#! `*-control` features leave it to you to configure C libraries, involving choices for `zlib`, ! hashing and transport implementation. +#! `*-control` features leave it to you to configure C libraries, involving choices for `zlib` and transport implementation. #! #! Additional features *can* be provided with `--features` and are handled by the [`gix-features` crate](https://docs.rs/gix-features/latest). #! If nothing else is specified, the Rust implementation is used. ! Note that only one feature of each section can be enabled at a time. @@ -84,28 +84,25 @@ lean-async = ["fast", "tracing", "pretty-cli", "gitoxide-core-tools", "gitoxide- #! - `gix-features/zlib-ng-compat` #! - `gix-features/zlib-stock` #! - `gix-features/zlib-rust-backend` (*default if no choice is made*) -#! * **sha1** -#! - `gix-features/fast-sha1` -#! - `gix-features/rustsha1` (*default if no choice is made*) #! * **HTTP** - see the *Building Blocks for mutually exclusive networking* headline #! #! #### Examples #! #! * `cargo build --release --no-default-features --features max-control,gix-features/zlib-stock,gitoxide-core-blocking-client,http-client-curl` #! - Create a build just like `max`, but using the stock `zlib` library instead of `zlib-ng` -#! * `cargo build --release --no-default-features --features max-control,http-client-reqwest,gitoxide-core-blocking-client,gix-features/fast-sha1` -#! - Create a build just like `max-pure`, but with faster hashing due to `fast-sha1`. +#! * `cargo build --release --no-default-features --features max-control,http-client-reqwest,gitoxide-core-blocking-client,gix-features/zlib-ng` +#! - Create a build just like `max-pure`, but with faster compression due to `zlib-ng`. #! ### Building Blocks #! Typical combinations of features of our dependencies, some of which are referred to in the `gitoxide` crate's code for conditional compilation. ## Makes the crate execute as fast as possible by supporting parallel computation of otherwise long-running functions -## as well as fast, hardware accelerated hashing, along with a faster zlib backend. +## as well as a faster zlib backend. ## If disabled, the binary will be visibly smaller. fast = ["gix/max-performance", "gix/comfort"] ## Makes the crate execute as fast as possible by supporting parallel computation of otherwise long-running functions -## as well as fast, hardware accelerated hashing, along with a faster zlib backend. +## as well as a faster zlib backend. ## If disabled, the binary will be visibly smaller. fast-safe = ["gix/max-performance-safe", "gix/comfort"] @@ -205,8 +202,7 @@ gix-hash = { opt-level = 3 } gix-actor = { opt-level = 3 } gix-config = { opt-level = 3 } miniz_oxide = { opt-level = 3 } -sha1 = { opt-level = 3 } -sha1_smol = { opt-level = 3 } +sha1-checked = { opt-level = 3 } [profile.release] overflow-checks = false diff --git a/SHORTCOMINGS.md b/SHORTCOMINGS.md index f25e320698e..3bdf20aa43d 100644 --- a/SHORTCOMINGS.md +++ b/SHORTCOMINGS.md @@ -35,7 +35,3 @@ This file is for tracking features that are less well implemented or less powerf * **gix-url** _might_ be more restrictive than what git allows as for the most part, it uses a browser grade URL parser. * Thus far there is no proof for this, and as _potential remedy_ we could certainly re-implement exactly what git does to handle its URLs. - -### `gix-features` - -* **sha1** isn't hardened (i.e. doesn't have collision detection). Needs [to be contributed](https://github.com/GitoxideLabs/gitoxide/issues/585). diff --git a/crate-status.md b/crate-status.md index 3274407dd48..f5081e45826 100644 --- a/crate-status.md +++ b/crate-status.md @@ -894,8 +894,6 @@ See its [README.md](https://github.com/GitoxideLabs/gitoxide/blob/main/gix-lock/ * `in_parallel` * `join` * _When off all functions execute serially_ -* **fast-sha1** - * provides a faster SHA1 implementation using CPU intrinsics * [x] API documentation ### gix-tui diff --git a/gitoxide-core/src/pack/explode.rs b/gitoxide-core/src/pack/explode.rs index 0ae360d95b5..30581b97cc4 100644 --- a/gitoxide-core/src/pack/explode.rs +++ b/gitoxide-core/src/pack/explode.rs @@ -78,11 +78,11 @@ enum Error { }, #[error("Object didn't verify after right after writing it")] Verify(#[from] objs::data::verify::Error), - #[error("{kind} object {expected} wasn't re-encoded without change - new hash is {actual}")] + #[error("{kind} object wasn't re-encoded without change")] ObjectEncodeMismatch { + #[source] + source: gix::hash::verify::Error, kind: object::Kind, - actual: ObjectId, - expected: ObjectId, }, #[error("The recently written file for loose object {id} could not be found")] WrittenFileMissing { id: ObjectId }, @@ -201,7 +201,7 @@ pub fn pack_or_pack_index( kind: object_kind, id: index_entry.oid, })?; - if written_id != index_entry.oid { + if let Err(err) = written_id.verify(&index_entry.oid) { if let object::Kind::Tree = object_kind { progress.info(format!( "The tree in pack named {} was written as {} due to modes 100664 and 100640 rewritten as 100644.", @@ -209,9 +209,8 @@ pub fn pack_or_pack_index( )); } else { return Err(Error::ObjectEncodeMismatch { + source: err, kind: object_kind, - actual: index_entry.oid, - expected: written_id, }); } } diff --git a/gix-commitgraph/Cargo.toml b/gix-commitgraph/Cargo.toml index ec5ca164c96..585dbf1b415 100644 --- a/gix-commitgraph/Cargo.toml +++ b/gix-commitgraph/Cargo.toml @@ -20,7 +20,6 @@ doctest = false serde = ["dep:serde", "gix-hash/serde", "bstr/serde"] [dependencies] -gix-features = { version = "^0.40.0", path = "../gix-features", features = ["rustsha1"] } gix-hash = { version = "^0.16.0", path = "../gix-hash" } gix-chunk = { version = "^0.4.11", path = "../gix-chunk" } diff --git a/gix-commitgraph/src/file/verify.rs b/gix-commitgraph/src/file/verify.rs index 5bff7309504..cbcde73f743 100644 --- a/gix-commitgraph/src/file/verify.rs +++ b/gix-commitgraph/src/file/verify.rs @@ -28,11 +28,8 @@ pub enum Error { Filename(String), #[error("commit {id} has invalid generation {generation}")] Generation { generation: u32, id: gix_hash::ObjectId }, - #[error("checksum mismatch: expected {expected}, got {actual}")] - Mismatch { - actual: gix_hash::ObjectId, - expected: gix_hash::ObjectId, - }, + #[error(transparent)] + Checksum(#[from] checksum::Error), #[error("{0}")] Processor(#[source] E), #[error("commit {id} has invalid root tree ID {root_tree_id}")] @@ -42,6 +39,19 @@ pub enum Error { }, } +/// +pub mod checksum { + /// The error used in [`super::File::verify_checksum()`]. + #[derive(thiserror::Error, Debug)] + #[allow(missing_docs)] + pub enum Error { + #[error("failed to hash commit graph file")] + Hasher(#[from] gix_hash::hasher::Error), + #[error(transparent)] + Verify(#[from] gix_hash::verify::Error), + } +} + /// The positive result of [`File::traverse()`] providing some statistical information. #[derive(Clone, Debug, Eq, PartialEq)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] @@ -73,8 +83,7 @@ impl File { E: std::error::Error + 'static, Processor: FnMut(&file::Commit<'a>) -> Result<(), E>, { - self.verify_checksum() - .map_err(|(actual, expected)| Error::Mismatch { actual, expected })?; + self.verify_checksum()?; verify_split_chain_filename_hash(&self.path, self.checksum()).map_err(Error::Filename)?; let null_id = self.object_hash().null_ref(); @@ -138,23 +147,18 @@ impl File { /// Assure the [`checksum`][File::checksum()] matches the actual checksum over all content of this file, excluding the trailing /// checksum itself. /// - /// Return the actual checksum on success or `(actual checksum, expected checksum)` if there is a mismatch. - pub fn verify_checksum(&self) -> Result { - // Even though we could use gix_features::hash::bytes_of_file(…), this would require using our own - // Error type to support io::Error and Mismatch. As we only gain progress, there probably isn't much value + /// Return the actual checksum on success or [`checksum::Error`] if there is a mismatch. + pub fn verify_checksum(&self) -> Result { + // Even though we could use gix_hash::bytes_of_file(…), this would require extending our + // Error type to support io::Error. As we only gain progress, there probably isn't much value // as these files are usually small enough to process them in less than a second, even for the large ones. // But it's possible, once a progress instance is passed. let data_len_without_trailer = self.data.len() - self.hash_len; - let mut hasher = gix_features::hash::hasher(self.object_hash()); + let mut hasher = gix_hash::hasher(self.object_hash()); hasher.update(&self.data[..data_len_without_trailer]); - let actual = gix_hash::ObjectId::from_bytes_or_panic(hasher.digest().as_ref()); - - let expected = self.checksum(); - if actual == expected { - Ok(actual) - } else { - Err((actual, expected.into())) - } + let actual = hasher.try_finalize()?; + actual.verify(self.checksum())?; + Ok(actual) } } diff --git a/gix-commitgraph/src/verify.rs b/gix-commitgraph/src/verify.rs index 9a37cff83ad..f1798514bb4 100644 --- a/gix-commitgraph/src/verify.rs +++ b/gix-commitgraph/src/verify.rs @@ -164,9 +164,7 @@ impl Graph { file::verify::Error::RootTreeId { id, root_tree_id } => { file::verify::Error::RootTreeId { id, root_tree_id } } - file::verify::Error::Mismatch { actual, expected } => { - file::verify::Error::Mismatch { actual, expected } - } + file::verify::Error::Checksum(err) => file::verify::Error::Checksum(err), file::verify::Error::Generation { generation, id } => { file::verify::Error::Generation { generation, id } } diff --git a/gix-diff/tests/diff/blob/pipeline.rs b/gix-diff/tests/diff/blob/pipeline.rs index 664e080bf3d..d042e8fda51 100644 --- a/gix-diff/tests/diff/blob/pipeline.rs +++ b/gix-diff/tests/diff/blob/pipeline.rs @@ -72,7 +72,7 @@ pub(crate) mod convert_to_diffable { let mut db = ObjectDb::default(); let b_content = "b-content"; - let id = db.insert(b_content); + let id = db.insert(b_content)?; let out = filter.convert_to_diffable( &id, @@ -129,7 +129,7 @@ pub(crate) mod convert_to_diffable { assert_eq!(buf.len(), 0, "it should avoid querying that data in the first place"); let mut db = ObjectDb::default(); - let id = db.insert(large_content); + let id = db.insert(large_content)?; let out = filter.convert_to_diffable( &id, EntryKind::Blob, @@ -211,7 +211,7 @@ pub(crate) mod convert_to_diffable { drop(tmp); let mut db = ObjectDb::default(); - let id = db.insert(large_content); + let id = db.insert(large_content)?; let out = filter.convert_to_diffable( &id, @@ -397,7 +397,7 @@ pub(crate) mod convert_to_diffable { let mut db = ObjectDb::default(); let b_content = "b-content\n"; - let id = db.insert(b_content); + let id = db.insert(b_content)?; let out = filter.convert_to_diffable( &id, @@ -416,7 +416,7 @@ pub(crate) mod convert_to_diffable { let mut db = ObjectDb::default(); let b_content = "b\n"; - let id = db.insert(b_content); + let id = db.insert(b_content)?; let out = filter.convert_to_diffable( &id, EntryKind::Blob, @@ -490,7 +490,7 @@ pub(crate) mod convert_to_diffable { let mut db = ObjectDb::default(); let b_content = "b-co\0ntent\n"; - let id = db.insert(b_content); + let id = db.insert(b_content)?; let out = filter.convert_to_diffable( &id, @@ -509,7 +509,7 @@ pub(crate) mod convert_to_diffable { let platform = attributes.at_entry("c", None, &gix_object::find::Never)?; - let id = db.insert("b"); + let id = db.insert("b")?; let out = filter.convert_to_diffable( &id, EntryKind::Blob, @@ -638,7 +638,7 @@ pub(crate) mod convert_to_diffable { assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false })); assert_eq!(buf.as_bstr(), "a\n", "unconditionally use git according to mode"); - let id = db.insert("a\n"); + let id = db.insert("a\n")?; for mode in worktree_modes { let out = filter.convert_to_diffable( &id, @@ -714,7 +714,7 @@ pub(crate) mod convert_to_diffable { assert_eq!(buf.len(), 0, "always cleared"); buf.push(1); - let id = db.insert("link-target"); + let id = db.insert("link-target")?; let out = filter.convert_to_diffable( &id, EntryKind::Link, @@ -761,7 +761,7 @@ pub(crate) mod convert_to_diffable { assert_eq!(buf.len(), 0, "it's always cleared before any potential use"); } - let id = db.insert("b\n"); + let id = db.insert("b\n")?; for mode in all_modes { buf.push(1); let out = filter.convert_to_diffable( @@ -814,7 +814,7 @@ pub(crate) mod convert_to_diffable { ); } - let id = db.insert("c\n"); + let id = db.insert("c\n")?; for mode in worktree_modes { let out = filter.convert_to_diffable( &id, @@ -863,7 +863,7 @@ pub(crate) mod convert_to_diffable { assert_eq!(buf.len(), 0); } - let id = db.insert("unset\n"); + let id = db.insert("unset\n")?; for mode in all_modes { let out = filter.convert_to_diffable( &id, @@ -890,7 +890,7 @@ pub(crate) mod convert_to_diffable { } let platform = attributes.at_entry("d", None, &gix_object::find::Never)?; - let id = db.insert("d-in-db"); + let id = db.insert("d-in-db")?; for mode in worktree_modes { let out = filter.convert_to_diffable( &null, @@ -959,7 +959,7 @@ pub(crate) mod convert_to_diffable { "no text filter, so git conversion was applied for worktree source" ); - let id = db.insert("e-in-db"); + let id = db.insert("e-in-db")?; let out = filter.convert_to_diffable( &id, EntryKind::Blob, diff --git a/gix-diff/tests/diff/blob/platform.rs b/gix-diff/tests/diff/blob/platform.rs index 6ab3a3dbc68..6976d805270 100644 --- a/gix-diff/tests/diff/blob/platform.rs +++ b/gix-diff/tests/diff/blob/platform.rs @@ -30,7 +30,7 @@ fn resources_of_worktree_and_odb_and_check_link() -> crate::Result { let mut db = ObjectDb::default(); let a_content = "a-content"; - let id = db.insert(a_content); + let id = db.insert(a_content)?; platform.set_resource( id, EntryKind::BlobExecutable, @@ -194,7 +194,7 @@ fn diff_binary() -> crate::Result { let mut db = ObjectDb::default(); let a_content = "b"; - let id = db.insert(a_content); + let id = db.insert(a_content)?; platform.set_resource(id, EntryKind::Blob, "b".into(), ResourceKind::NewOrDestination, &db)?; let out = platform.prepare_diff()?; @@ -235,7 +235,7 @@ fn diff_performed_despite_external_command() -> crate::Result { let mut db = ObjectDb::default(); let a_content = "b"; - let id = db.insert(a_content); + let id = db.insert(a_content)?; platform.set_resource(id, EntryKind::Blob, "b".into(), ResourceKind::NewOrDestination, &db)?; let out = platform.prepare_diff()?; @@ -277,7 +277,7 @@ fn diff_skipped_due_to_external_command_and_enabled_option() -> crate::Result { let mut db = ObjectDb::default(); let a_content = "b"; - let id = db.insert(a_content); + let id = db.insert(a_content)?; platform.set_resource(id, EntryKind::Blob, "b".into(), ResourceKind::NewOrDestination, &db)?; let out = platform.prepare_diff()?; diff --git a/gix-diff/tests/diff/main.rs b/gix-diff/tests/diff/main.rs index a6be530ffdb..667f1daf8d3 100644 --- a/gix-diff/tests/diff/main.rs +++ b/gix-diff/tests/diff/main.rs @@ -51,10 +51,10 @@ mod util { impl ObjectDb { /// Insert `data` and return its hash. That can be used to find it again. - pub fn insert(&mut self, data: &str) -> gix_hash::ObjectId { - let id = gix_object::compute_hash(gix_hash::Kind::Sha1, gix_object::Kind::Blob, data.as_bytes()); + pub fn insert(&mut self, data: &str) -> Result { + let id = gix_object::compute_hash(gix_hash::Kind::Sha1, gix_object::Kind::Blob, data.as_bytes())?; self.data_by_id.insert(id, data.into()); - id + Ok(id) } } } diff --git a/gix-diff/tests/diff/rewrites/tracker.rs b/gix-diff/tests/diff/rewrites/tracker.rs index e010a9c81f8..9469494e89a 100644 --- a/gix-diff/tests/diff/rewrites/tracker.rs +++ b/gix-diff/tests/diff/rewrites/tracker.rs @@ -92,7 +92,7 @@ fn copy_by_similarity_reports_limit_if_encountered() -> crate::Result { (Change::addition(), "a-cpy-2", "a"), (Change::modification(), "d", "ab"), ], - ); + )?; let mut calls = 0; let out = util::assert_emit_with_objects( @@ -145,7 +145,7 @@ fn copy_by_id() -> crate::Result { (Change::addition(), "a-cpy-2", "a"), (Change::modification(), "d", "a"), ], - ); + )?; let mut calls = 0; let out = util::assert_emit_with_objects( @@ -218,7 +218,7 @@ fn copy_by_id_search_in_all_sources() -> crate::Result { (Change::addition(), "a-cpy-1", "a"), (Change::addition(), "a-cpy-2", "a"), ], - ); + )?; let mut calls = 0; let content_id = hex_to_id("2e65efe2a145dda7ee51d1741299f848e5bf752e"); @@ -299,7 +299,7 @@ fn copy_by_50_percent_similarity() -> crate::Result { (Change::addition(), "a-cpy-2", "a\nc"), (Change::modification(), "d", "a"), ], - ); + )?; let mut calls = 0; let out = util::assert_emit_with_objects( @@ -377,7 +377,7 @@ fn copy_by_id_in_additions_only() -> crate::Result { (Change::modification(), "a", "a"), (Change::modification(), "a-cpy-1", "a"), ], - ); + )?; let mut calls = 0; let out = util::assert_emit_with_objects( @@ -429,7 +429,7 @@ fn rename_by_similarity_reports_limit_if_encountered() -> crate::Result { (Change::addition(), "b", "firt\nsecond\n"), (Change::addition(), "c", "second\nunrelated\n"), ], - ); + )?; let mut calls = 0; let out = util::assert_emit_with_objects( @@ -475,7 +475,7 @@ fn rename_by_50_percent_similarity() -> crate::Result { (Change::addition(), "b", "firt\nsecond\n"), (Change::addition(), "c", "second\nunrelated\n"), ], - ); + )?; let mut calls = 0; let out = util::assert_emit_with_objects( @@ -596,7 +596,7 @@ fn directory_renames_by_id_can_fail_gracefully() -> crate::Result { (Change::deletion(), "a", "first\nsecond\n"), (Change::addition(), "b", "firt\nsecond\n"), ], - ); + )?; let mut calls = 0; let out = util::assert_emit_with_objects( @@ -803,16 +803,16 @@ mod util { pub fn add_retained_blobs<'a>( tracker: &mut rewrites::Tracker, blobs: impl IntoIterator, - ) -> ObjectDb { + ) -> crate::Result { let mut db = ObjectDb::default(); for (mut change, location, data) in blobs { - change.id = db.insert(data); + change.id = db.insert(data)?; assert!( tracker.try_push_change(change, location.into()).is_none(), "input changes must be tracked" ); } - db + Ok(db) } pub fn assert_emit( diff --git a/gix-features/Cargo.toml b/gix-features/Cargo.toml index 67983ee4f34..ae089a62f9b 100644 --- a/gix-features/Cargo.toml +++ b/gix-features/Cargo.toml @@ -77,40 +77,25 @@ zlib-stock = ["zlib", "flate2?/zlib"] ## may build in environments where other backends don't. zlib-rust-backend = ["zlib", "flate2?/rust_backend"] -#! ### Mutually Exclusive SHA1 -## A fast SHA1 implementation is critical to `gitoxide's` object database performance -## A multi-crate implementation that can use hardware acceleration, thus bearing the potential for up to 2Gb/s throughput on -## CPUs that support it, like AMD Ryzen or Intel Core i3, as well as Apple Silicon like M1. -## Takes precedence over `rustsha1` if both are specified. -fast-sha1 = ["dep:sha1"] -## A standard and well performing pure Rust implementation of Sha1. Will significantly slow down various git operations. -rustsha1 = ["dep:sha1_smol"] - #! ### Other ## Count cache hits and misses and print that debug information on drop. ## Caches implement this by default, which costs nothing unless this feature is enabled cache-efficiency-debug = [] -[[test]] -name = "hash" -path = "tests/hash.rs" -required-features = ["rustsha1"] - [[test]] name = "parallel" path = "tests/parallel_threaded.rs" -required-features = ["parallel", "rustsha1"] +required-features = ["parallel"] [[test]] name = "multi-threaded" path = "tests/parallel_shared_threaded.rs" -required-features = ["parallel", "rustsha1"] +required-features = ["parallel"] [[test]] name = "single-threaded" path = "tests/parallel_shared.rs" -required-features = ["rustsha1"] [[test]] name = "pipe" @@ -118,7 +103,6 @@ path = "tests/pipe.rs" required-features = ["io-pipe"] [dependencies] -gix-hash = { version = "^0.16.0", path = "../gix-hash" } gix-trace = { version = "^0.1.12", path = "../gix-trace" } # for walkdir @@ -130,10 +114,8 @@ parking_lot = { version = "0.12.0", default-features = false, optional = true } walkdir = { version = "2.3.2", optional = true } # used when parallel is off -# hashing and 'fast-sha1' feature -sha1_smol = { version = "1.0.0", optional = true } +# hashing crc32fast = { version = "1.2.1", optional = true } -sha1 = { version = "0.10.0", optional = true } # progress prodash = { version = "29.0.1", optional = true } @@ -156,12 +138,6 @@ libc = { version = "0.2.119" } [dev-dependencies] bstr = { version = "1.3.0", default-features = false } - -# Assembly doesn't yet compile on MSVC on windows, but does on GNU, see https://github.com/RustCrypto/asm-hashes/issues/17 -# At this time, only aarch64, x86 and x86_64 are supported. -[target.'cfg(all(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64"), not(target_os = "windows")))'.dependencies] -sha1 = { version = "0.10.0", optional = true, features = ["asm"] } - [package.metadata.docs.rs] all-features = true features = ["document-features"] diff --git a/gix-features/src/hash.rs b/gix-features/src/hash.rs index eebc40fa075..27db01384fe 100644 --- a/gix-features/src/hash.rs +++ b/gix-features/src/hash.rs @@ -1,60 +1,10 @@ //! Hash functions and hash utilities -//! -//! With the `fast-sha1` feature, the `Sha1` hash type will use a more elaborate implementation utilizing hardware support -//! in case it is available. Otherwise the `rustsha1` feature should be set. `fast-sha1` will take precedence. -//! Otherwise, a minimal yet performant implementation is used instead for a decent trade-off between compile times and run-time performance. -#[cfg(all(feature = "rustsha1", not(feature = "fast-sha1")))] -mod _impl { - use super::Digest; - - /// A implementation of the Sha1 hash, which can be used once. - #[derive(Default, Clone)] - pub struct Sha1(sha1_smol::Sha1); - - impl Sha1 { - /// Digest the given `bytes`. - pub fn update(&mut self, bytes: &[u8]) { - self.0.update(bytes); - } - /// Finalize the hash and produce a digest. - pub fn digest(self) -> Digest { - self.0.digest().bytes() - } - } -} - -/// A hash-digest produced by a [`Hasher`] hash implementation. -#[cfg(any(feature = "fast-sha1", feature = "rustsha1"))] -pub type Digest = [u8; 20]; - -#[cfg(feature = "fast-sha1")] -mod _impl { - use sha1::Digest; - - /// A implementation of the Sha1 hash, which can be used once. - #[derive(Default, Clone)] - pub struct Sha1(sha1::Sha1); - - impl Sha1 { - /// Digest the given `bytes`. - pub fn update(&mut self, bytes: &[u8]) { - self.0.update(bytes); - } - /// Finalize the hash and produce a digest. - pub fn digest(self) -> super::Digest { - self.0.finalize().into() - } - } -} - -#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] -pub use _impl::Sha1 as Hasher; /// Compute a CRC32 hash from the given `bytes`, returning the CRC32 hash. /// -/// When calling this function for the first time, `previous_value` should be `0`. Otherwise it -/// should be the previous return value of this function to provide a hash of multiple sequential -/// chunks of `bytes`. +/// When calling this function for the first time, `previous_value` should be `0`. +/// Otherwise, it should be the previous return value of this function to provide a hash +/// of multiple sequential chunks of `bytes`. #[cfg(feature = "crc32")] pub fn crc32_update(previous_value: u32, bytes: &[u8]) -> u32 { let mut h = crc32fast::Hasher::new_with_initial(previous_value); @@ -71,132 +21,3 @@ pub fn crc32(bytes: &[u8]) -> u32 { h.update(bytes); h.finalize() } - -/// Produce a hasher suitable for the given kind of hash. -#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] -pub fn hasher(kind: gix_hash::Kind) -> Hasher { - match kind { - gix_hash::Kind::Sha1 => Hasher::default(), - } -} - -/// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start` -/// while initializing and calling `progress`. -/// -/// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself, -/// denoting the amount of bytes to hash starting from the beginning of the file. -/// -/// # Note -/// -/// * Only available with the `gix-object` feature enabled due to usage of the [`gix_hash::Kind`] enum and the -/// [`gix_hash::ObjectId`] return value. -/// * [Interrupts][crate::interrupt] are supported. -#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] -pub fn bytes_of_file( - path: &std::path::Path, - num_bytes_from_start: u64, - kind: gix_hash::Kind, - progress: &mut dyn crate::progress::Progress, - should_interrupt: &std::sync::atomic::AtomicBool, -) -> std::io::Result { - bytes( - &mut std::fs::File::open(path)?, - num_bytes_from_start, - kind, - progress, - should_interrupt, - ) -} - -/// Similar to [`bytes_of_file`], but operates on a stream of bytes. -#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] -pub fn bytes( - read: &mut dyn std::io::Read, - num_bytes_from_start: u64, - kind: gix_hash::Kind, - progress: &mut dyn crate::progress::Progress, - should_interrupt: &std::sync::atomic::AtomicBool, -) -> std::io::Result { - bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt) -} - -/// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind. -#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] -pub fn bytes_with_hasher( - read: &mut dyn std::io::Read, - num_bytes_from_start: u64, - mut hasher: Hasher, - progress: &mut dyn crate::progress::Progress, - should_interrupt: &std::sync::atomic::AtomicBool, -) -> std::io::Result { - let start = std::time::Instant::now(); - // init progress before the possibility for failure, as convenience in case people want to recover - progress.init( - Some(num_bytes_from_start as prodash::progress::Step), - crate::progress::bytes(), - ); - - const BUF_SIZE: usize = u16::MAX as usize; - let mut buf = [0u8; BUF_SIZE]; - let mut bytes_left = num_bytes_from_start; - - while bytes_left > 0 { - let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)]; - read.read_exact(out)?; - bytes_left -= out.len() as u64; - progress.inc_by(out.len()); - hasher.update(out); - if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) { - return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted")); - } - } - - let id = gix_hash::ObjectId::from(hasher.digest()); - progress.show_throughput(start); - Ok(id) -} - -#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] -mod write { - use crate::hash::Hasher; - - /// A utility to automatically generate a hash while writing into an inner writer. - pub struct Write { - /// The hash implementation. - pub hash: Hasher, - /// The inner writer. - pub inner: T, - } - - impl std::io::Write for Write - where - T: std::io::Write, - { - fn write(&mut self, buf: &[u8]) -> std::io::Result { - let written = self.inner.write(buf)?; - self.hash.update(&buf[..written]); - Ok(written) - } - - fn flush(&mut self) -> std::io::Result<()> { - self.inner.flush() - } - } - - impl Write - where - T: std::io::Write, - { - /// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`. - pub fn new(inner: T, object_hash: gix_hash::Kind) -> Self { - match object_hash { - gix_hash::Kind::Sha1 => Write { - inner, - hash: Hasher::default(), - }, - } - } - } -} -#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] -pub use write::Write; diff --git a/gix-features/tests/hash.rs b/gix-features/tests/hash.rs deleted file mode 100644 index 7e7f6654dbd..00000000000 --- a/gix-features/tests/hash.rs +++ /dev/null @@ -1,16 +0,0 @@ -use gix_features::hash::Hasher; - -#[cfg(not(feature = "fast-sha1"))] -#[test] -fn size_of_sha1() { - assert_eq!(std::mem::size_of::(), 96); -} - -#[cfg(feature = "fast-sha1")] -#[test] -fn size_of_sha1() { - assert_eq!( - std::mem::size_of::(), - if cfg!(target_arch = "x86") { 96 } else { 104 } - ); -} diff --git a/gix-filter/src/eol/convert_to_worktree.rs b/gix-filter/src/eol/convert_to_worktree.rs index bc3bc474813..a7b8efe828b 100644 --- a/gix-filter/src/eol/convert_to_worktree.rs +++ b/gix-filter/src/eol/convert_to_worktree.rs @@ -5,6 +5,14 @@ use crate::{ eol::{AttributesDigest, Configuration, Mode, Stats}, }; +/// The error produced by [`convert_to_worktree()`]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Could not allocate buffer")] + OutOfMemory(#[from] std::collections::TryReserveError), +} + /// Convert all `\n` in `src` to `crlf` if `digest` and `config` indicate it, returning `true` if `buf` holds the result, or `false` /// if no change was made after all. pub fn convert_to_worktree( @@ -12,7 +20,7 @@ pub fn convert_to_worktree( digest: AttributesDigest, buf: &mut Vec, config: Configuration, -) -> Result { +) -> Result { if src.is_empty() || digest.to_eol(config) != Some(Mode::CrLf) { return Ok(false); } diff --git a/gix-filter/src/eol/mod.rs b/gix-filter/src/eol/mod.rs index ad1553826e0..a338e786630 100644 --- a/gix-filter/src/eol/mod.rs +++ b/gix-filter/src/eol/mod.rs @@ -2,7 +2,8 @@ pub mod convert_to_git; pub use convert_to_git::function::convert_to_git; -mod convert_to_worktree; +/// +pub mod convert_to_worktree; pub use convert_to_worktree::convert_to_worktree; mod utils; diff --git a/gix-filter/src/ident.rs b/gix-filter/src/ident.rs index a5c2e0bdae5..3a5ee814db2 100644 --- a/gix-filter/src/ident.rs +++ b/gix-filter/src/ident.rs @@ -40,6 +40,19 @@ pub fn undo(src: &[u8], buf: &mut Vec) -> Result$` if present in `src` and write all changes to `buf`, /// with `object_hash` being used accordingly. Return `true` if `buf` was written to or `false` if no change was made /// (as there was nothing to do). @@ -48,18 +61,14 @@ pub fn undo(src: &[u8], buf: &mut Vec) -> Result$`, but we don't do that, sticking exactly to what ought to be done. /// The respective code is up to 16 years old and one might assume that `git` by now handles checking and checkout filters correctly. -pub fn apply( - src: &[u8], - object_hash: gix_hash::Kind, - buf: &mut Vec, -) -> Result { +pub fn apply(src: &[u8], object_hash: gix_hash::Kind, buf: &mut Vec) -> Result { const HASH_LEN: usize = ": ".len() + gix_hash::Kind::longest().len_in_hex(); let mut id = None; let mut ofs = 0; while let Some(pos) = src[ofs..].find(b"$Id$") { let id = match id { None => { - let new_id = gix_object::compute_hash(object_hash, gix_object::Kind::Blob, src); + let new_id = gix_object::compute_hash(object_hash, gix_object::Kind::Blob, src)?; id = new_id.into(); clear_and_set_capacity(buf, src.len() + HASH_LEN)?; // pre-allocate for one ID new_id diff --git a/gix-filter/src/pipeline/convert.rs b/gix-filter/src/pipeline/convert.rs index 4253e91ec5c..a236fc26e40 100644 --- a/gix-filter/src/pipeline/convert.rs +++ b/gix-filter/src/pipeline/convert.rs @@ -49,14 +49,16 @@ pub mod to_worktree { #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { + #[error(transparent)] + Ident(#[from] crate::ident::apply::Error), + #[error(transparent)] + Eol(#[from] crate::eol::convert_to_worktree::Error), #[error(transparent)] Worktree(#[from] crate::worktree::encode_to_worktree::Error), #[error(transparent)] Driver(#[from] crate::driver::apply::Error), #[error(transparent)] Configuration(#[from] super::configuration::Error), - #[error("Could not allocate buffer")] - OutOfMemory(#[from] std::collections::TryReserveError), } } diff --git a/gix-hash/Cargo.toml b/gix-hash/Cargo.toml index b037e9fa7c9..94a1cd5ad59 100644 --- a/gix-hash/Cargo.toml +++ b/gix-hash/Cargo.toml @@ -20,15 +20,17 @@ test = false serde = ["dep:serde"] [dependencies] +gix-features = { version = "^0.40.0", path = "../gix-features", features = ["progress"] } + thiserror = "2.0.0" faster-hex = { version = "0.9.0" } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } +sha1-checked = { version = "0.10.0", default-features = false } document-features = { version = "0.2.0", optional = true } [dev-dependencies] gix-testtools = { path = "../tests/tools" } -gix-features = { path = "../gix-features", features = ["rustsha1"] } [package.metadata.docs.rs] all-features = true diff --git a/gix-hash/src/hasher.rs b/gix-hash/src/hasher.rs new file mode 100644 index 00000000000..2a03c3d219a --- /dev/null +++ b/gix-hash/src/hasher.rs @@ -0,0 +1,71 @@ +/// The error returned by [`Hasher::try_finalize()`](crate::Hasher::try_finalize()). +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Detected SHA-1 collision attack with digest {digest}")] + CollisionAttack { digest: crate::ObjectId }, +} + +pub(super) mod _impl { + use crate::hasher::Error; + use sha1_checked::{CollisionResult, Digest}; + + /// An implementation of the Sha1 hash, which can be used once. + /// + /// We use [`sha1_checked`] to implement the same collision detection + /// algorithm as Git. + #[derive(Clone)] + pub struct Hasher(sha1_checked::Sha1); + + impl Hasher { + /// Let's not provide a public default implementation to force people to go through [`hasher()`]. + fn default() -> Self { + // This matches the configuration used by Git, which only uses + // the collision detection to bail out, rather than computing + // alternate “safe hashes” for inputs where a collision attack + // was detected. + Self(sha1_checked::Builder::default().safe_hash(false).build()) + } + } + + impl Hasher { + /// Digest the given `bytes`. + pub fn update(&mut self, bytes: &[u8]) { + self.0.update(bytes); + } + + /// Finalize the hash and produce an object ID. + /// + /// Returns [`Error`] if a collision attack is detected. + #[inline] + pub fn try_finalize(self) -> Result { + match self.0.try_finalize() { + CollisionResult::Ok(digest) => Ok(crate::ObjectId::Sha1(digest.into())), + CollisionResult::Mitigated(_) => { + // SAFETY: `CollisionResult::Mitigated` is only + // returned when `safe_hash()` is on. `Hasher`’s field + // is private, and we only construct it in the + // `Default` instance, which turns `safe_hash()` off. + // + // As of Rust 1.84.1, the compiler can’t figure out + // this function cannot panic without this. + #[allow(unsafe_code)] + unsafe { + std::hint::unreachable_unchecked() + } + } + CollisionResult::Collision(digest) => Err(Error::CollisionAttack { + digest: crate::ObjectId::Sha1(digest.into()), + }), + } + } + } + + /// Produce a hasher suitable for the given `kind` of hash. + #[inline] + pub fn hasher(kind: crate::Kind) -> Hasher { + match kind { + crate::Kind::Sha1 => Hasher::default(), + } + } +} diff --git a/gix-hash/src/io.rs b/gix-hash/src/io.rs new file mode 100644 index 00000000000..f764d7c6b81 --- /dev/null +++ b/gix-hash/src/io.rs @@ -0,0 +1,126 @@ +use crate::hasher; + +/// The error type for I/O operations that compute hashes. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error(transparent)] + Io(#[from] std::io::Error), + #[error("Failed to hash data")] + Hasher(#[from] hasher::Error), +} + +pub(super) mod _impl { + use crate::io::Error; + use crate::{hasher, Hasher}; + + /// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start` + /// while initializing and calling `progress`. + /// + /// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself, + /// denoting the amount of bytes to hash starting from the beginning of the file. + /// + /// # Note + /// + /// * [Interrupts][gix_features::interrupt] are supported. + pub fn bytes_of_file( + path: &std::path::Path, + num_bytes_from_start: u64, + kind: crate::Kind, + progress: &mut dyn gix_features::progress::Progress, + should_interrupt: &std::sync::atomic::AtomicBool, + ) -> Result { + bytes( + &mut std::fs::File::open(path)?, + num_bytes_from_start, + kind, + progress, + should_interrupt, + ) + } + + /// Similar to [`bytes_of_file`], but operates on a stream of bytes. + pub fn bytes( + read: &mut dyn std::io::Read, + num_bytes_from_start: u64, + kind: crate::Kind, + progress: &mut dyn gix_features::progress::Progress, + should_interrupt: &std::sync::atomic::AtomicBool, + ) -> Result { + bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt) + } + + /// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind. + pub fn bytes_with_hasher( + read: &mut dyn std::io::Read, + num_bytes_from_start: u64, + mut hasher: Hasher, + progress: &mut dyn gix_features::progress::Progress, + should_interrupt: &std::sync::atomic::AtomicBool, + ) -> Result { + let start = std::time::Instant::now(); + // init progress before the possibility for failure, as convenience in case people want to recover + progress.init( + Some(num_bytes_from_start as gix_features::progress::prodash::progress::Step), + gix_features::progress::bytes(), + ); + + const BUF_SIZE: usize = u16::MAX as usize; + let mut buf = [0u8; BUF_SIZE]; + let mut bytes_left = num_bytes_from_start; + + while bytes_left > 0 { + let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)]; + read.read_exact(out)?; + bytes_left -= out.len() as u64; + progress.inc_by(out.len()); + hasher.update(out); + if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) { + return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted").into()); + } + } + + let id = hasher.try_finalize()?; + progress.show_throughput(start); + Ok(id) + } + + /// A utility to automatically generate a hash while writing into an inner writer. + pub struct Write { + /// The hash implementation. + pub hash: Hasher, + /// The inner writer. + pub inner: T, + } + + impl std::io::Write for Write + where + T: std::io::Write, + { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + let written = self.inner.write(buf)?; + self.hash.update(&buf[..written]); + Ok(written) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.inner.flush() + } + } + + impl Write + where + T: std::io::Write, + { + /// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`. + pub fn new(inner: T, object_hash: crate::Kind) -> Self { + match object_hash { + crate::Kind::Sha1 => Write { + inner, + hash: crate::hasher(object_hash), + }, + } + } + } +} +pub use _impl::Write; diff --git a/gix-hash/src/lib.rs b/gix-hash/src/lib.rs index 20cb9c4327e..785fbe28c1a 100644 --- a/gix-hash/src/lib.rs +++ b/gix-hash/src/lib.rs @@ -13,12 +13,23 @@ mod borrowed; pub use borrowed::{oid, Error}; +/// Hash functions and hash utilities +pub mod hasher; +pub use hasher::_impl::{hasher, Hasher}; + +/// Error types for utility hash functions +pub mod io; +pub use io::_impl::{bytes, bytes_of_file, bytes_with_hasher}; + mod object_id; pub use object_id::{decode, ObjectId}; /// pub mod prefix; +/// +pub mod verify; + /// A partial, owned hash possibly identifying an object uniquely, whose non-prefix bytes are zeroed. /// /// An example would `0000000000000000000000000000000032bd3242`, where `32bd3242` is the prefix, diff --git a/gix-hash/src/verify.rs b/gix-hash/src/verify.rs new file mode 100644 index 00000000000..7fba80549b0 --- /dev/null +++ b/gix-hash/src/verify.rs @@ -0,0 +1,27 @@ +use crate::{oid, ObjectId}; + +/// The error returned by [`oid::verify()`]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +#[error("Hash was {actual}, but should have been {expected}")] +pub struct Error { + pub actual: ObjectId, + pub expected: ObjectId, +} + +impl oid { + /// Verify that `self` matches the `expected` object ID. + /// + /// Returns an [`Error`] containing both object IDs if they differ. + #[inline] + pub fn verify(&self, expected: &oid) -> Result<(), Error> { + if self == expected { + Ok(()) + } else { + Err(Error { + actual: self.to_owned(), + expected: expected.to_owned(), + }) + } + } +} diff --git a/gix-hash/tests/fixtures/shambles/messageA b/gix-hash/tests/fixtures/shambles/messageA new file mode 100644 index 00000000000..5a7c30e9764 Binary files /dev/null and b/gix-hash/tests/fixtures/shambles/messageA differ diff --git a/gix-hash/tests/fixtures/shambles/messageB b/gix-hash/tests/fixtures/shambles/messageB new file mode 100644 index 00000000000..fe39178400a Binary files /dev/null and b/gix-hash/tests/fixtures/shambles/messageB differ diff --git a/gix-hash/tests/hash/hasher.rs b/gix-hash/tests/hash/hasher.rs new file mode 100644 index 00000000000..282b7c8666f --- /dev/null +++ b/gix-hash/tests/hash/hasher.rs @@ -0,0 +1,20 @@ +use gix_hash::{Hasher, ObjectId}; + +#[test] +fn size_of_hasher() { + assert_eq!( + std::mem::size_of::(), + if cfg!(target_arch = "x86") { 820 } else { 824 }, + "The size of this type may be relevant when hashing millions of objects,\ + and shouldn't change unnoticed. The DetectionState alone clocks in at 724 bytes." + ); +} + +#[test] +fn size_of_try_finalize_return_type() { + assert_eq!( + std::mem::size_of::>(), + 21, + "The size of the return value is just 1 byte larger than just returning the object hash itself" + ); +} diff --git a/gix-hash/tests/kind/mod.rs b/gix-hash/tests/hash/kind.rs similarity index 100% rename from gix-hash/tests/kind/mod.rs rename to gix-hash/tests/hash/kind.rs diff --git a/gix-hash/tests/hash.rs b/gix-hash/tests/hash/main.rs similarity index 93% rename from gix-hash/tests/hash.rs rename to gix-hash/tests/hash/main.rs index 9cdd947657e..766b53619b7 100644 --- a/gix-hash/tests/hash.rs +++ b/gix-hash/tests/hash/main.rs @@ -1,5 +1,6 @@ use gix_hash::ObjectId; +mod hasher; mod kind; mod object_id; mod oid; diff --git a/gix-hash/tests/hash/object_id.rs b/gix-hash/tests/hash/object_id.rs new file mode 100644 index 00000000000..a91aef4b2bc --- /dev/null +++ b/gix-hash/tests/hash/object_id.rs @@ -0,0 +1,122 @@ +mod from_hex { + + mod valid { + use gix_hash::ObjectId; + + #[test] + fn twenty_hex_chars_lowercase() { + assert!(ObjectId::from_hex(b"1234567890abcdefaaaaaaaaaaaaaaaaaaaaaaaa").is_ok()); + } + + #[test] + fn twenty_hex_chars_uppercase() { + assert!(ObjectId::from_hex(b"1234567890ABCDEFAAAAAAAAAAAAAAAAAAAAAAAA").is_ok()); + } + } + + mod invalid { + use gix_hash::{decode, ObjectId}; + + #[test] + fn non_hex_characters() { + assert!(matches!( + ObjectId::from_hex(b"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz").unwrap_err(), + decode::Error::Invalid + )); + } + + #[test] + fn too_short() { + assert!(matches!( + ObjectId::from_hex(b"abcd").unwrap_err(), + decode::Error::InvalidHexEncodingLength(4) + )); + } + #[test] + fn too_long() { + assert!(matches!( + ObjectId::from_hex(b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaf").unwrap_err(), + decode::Error::InvalidHexEncodingLength(41) + )); + } + } +} + +mod sha1 { + use std::str::FromStr as _; + + use gix_hash::{hasher, Kind, ObjectId}; + + fn hash_contents(s: &[u8]) -> Result { + let mut hasher = hasher(Kind::Sha1); + hasher.update(s); + hasher.try_finalize() + } + + #[test] + fn empty_blob() { + assert_eq!( + ObjectId::empty_blob(Kind::Sha1), + hash_contents(b"blob 0\0").expect("empty blob to not collide"), + ); + } + + #[test] + fn empty_tree() { + assert_eq!( + ObjectId::empty_tree(Kind::Sha1), + hash_contents(b"tree 0\0").expect("empty tree to not collide"), + ); + } + + /// Check the test vectors from RFC 3174. + #[test] + fn rfc_3174() { + let fixtures: &[(&[u8], &str)] = &[ + (b"abc", "A9 99 3E 36 47 06 81 6A BA 3E 25 71 78 50 C2 6C 9C D0 D8 9D"), + ( + b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", + "84 98 3E 44 1C 3B D2 6E BA AE 4A A1 F9 51 29 E5 E5 46 70 F1", + ), + ( + &b"a".repeat(1000000), + "34 AA 97 3C D4 C4 DA A4 F6 1E EB 2B DB AD 27 31 65 34 01 6F", + ), + ( + &b"0123456701234567012345670123456701234567012345670123456701234567".repeat(10), + "DE A3 56 A2 CD DD 90 C7 A7 EC ED C5 EB B5 63 93 4F 46 04 52", + ), + ]; + for (input, output) in fixtures { + assert_eq!( + hash_contents(input).expect("RFC inputs to not collide"), + ObjectId::from_str(&output.to_lowercase().replace(' ', "")).expect("RFC digests to be valid"), + ); + } + } + + /// Check the “SHA‐1 is a Shambles” chosen‐prefix collision. + /// + /// See . + /// + /// We test these and not the earlier SHAttered PDFs because they are much smaller. + #[test] + fn shambles() { + let message_a = include_bytes!("../fixtures/shambles/messageA"); + let message_b = include_bytes!("../fixtures/shambles/messageB"); + assert_ne!(message_a, message_b); + + let expected = + ObjectId::from_str("8ac60ba76f1999a1ab70223f225aefdc78d4ddc0").expect("Shambles digest to be valid"); + + let Err(hasher::Error::CollisionAttack { digest }) = hash_contents(message_a) else { + panic!("expected Shambles input to collide"); + }; + assert_eq!(digest, expected); + + let Err(hasher::Error::CollisionAttack { digest }) = hash_contents(message_b) else { + panic!("expected Shambles input to collide"); + }; + assert_eq!(digest, expected); + } +} diff --git a/gix-hash/tests/oid/mod.rs b/gix-hash/tests/hash/oid.rs similarity index 100% rename from gix-hash/tests/oid/mod.rs rename to gix-hash/tests/hash/oid.rs diff --git a/gix-hash/tests/prefix/mod.rs b/gix-hash/tests/hash/prefix.rs similarity index 100% rename from gix-hash/tests/prefix/mod.rs rename to gix-hash/tests/hash/prefix.rs diff --git a/gix-hash/tests/object_id/mod.rs b/gix-hash/tests/object_id/mod.rs deleted file mode 100644 index 2ccc53abe9e..00000000000 --- a/gix-hash/tests/object_id/mod.rs +++ /dev/null @@ -1,64 +0,0 @@ -mod from_hex { - - mod valid { - use gix_hash::ObjectId; - - #[test] - fn twenty_hex_chars_lowercase() { - assert!(ObjectId::from_hex(b"1234567890abcdefaaaaaaaaaaaaaaaaaaaaaaaa").is_ok()); - } - - #[test] - fn twenty_hex_chars_uppercase() { - assert!(ObjectId::from_hex(b"1234567890ABCDEFAAAAAAAAAAAAAAAAAAAAAAAA").is_ok()); - } - } - - mod invalid { - use gix_hash::{decode, ObjectId}; - - #[test] - fn non_hex_characters() { - assert!(matches!( - ObjectId::from_hex(b"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz").unwrap_err(), - decode::Error::Invalid - )); - } - - #[test] - fn too_short() { - assert!(matches!( - ObjectId::from_hex(b"abcd").unwrap_err(), - decode::Error::InvalidHexEncodingLength(4) - )); - } - #[test] - fn too_long() { - assert!(matches!( - ObjectId::from_hex(b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaf").unwrap_err(), - decode::Error::InvalidHexEncodingLength(41) - )); - } - } -} - -mod empty { - use gix_features::hash::hasher; - use gix_hash::{Kind, ObjectId}; - - fn hash_contents(s: &[u8]) -> ObjectId { - let mut hasher = hasher(Kind::Sha1); - hasher.update(s); - ObjectId::Sha1(hasher.digest()) - } - - #[test] - fn blob() { - assert_eq!(ObjectId::empty_blob(Kind::Sha1), hash_contents(b"blob 0\0")); - } - - #[test] - fn tree() { - assert_eq!(ObjectId::empty_tree(Kind::Sha1), hash_contents(b"tree 0\0")); - } -} diff --git a/gix-index/Cargo.toml b/gix-index/Cargo.toml index 060e86de52b..0d6753592e0 100644 --- a/gix-index/Cargo.toml +++ b/gix-index/Cargo.toml @@ -23,7 +23,6 @@ serde = ["dep:serde", "smallvec/serde", "gix-hash/serde"] [dependencies] gix-features = { version = "^0.40.0", path = "../gix-features", features = [ - "rustsha1", "progress", ] } gix-hash = { version = "^0.16.0", path = "../gix-hash" } diff --git a/gix-index/src/decode/mod.rs b/gix-index/src/decode/mod.rs index aea189248bd..4872c7e23fc 100644 --- a/gix-index/src/decode/mod.rs +++ b/gix-index/src/decode/mod.rs @@ -16,17 +16,16 @@ mod error { pub enum Error { #[error(transparent)] Header(#[from] decode::header::Error), + #[error("Could not hash index data")] + Hasher(#[from] gix_hash::hasher::Error), #[error("Could not parse entry at index {index}")] Entry { index: u32 }, #[error("Mandatory extension wasn't implemented or malformed.")] Extension(#[from] extension::decode::Error), #[error("Index trailer should have been {expected} bytes long, but was {actual}")] UnexpectedTrailerLength { expected: usize, actual: usize }, - #[error("Shared index checksum was {actual_checksum} but should have been {expected_checksum}")] - ChecksumMismatch { - actual_checksum: gix_hash::ObjectId, - expected_checksum: gix_hash::ObjectId, - }, + #[error("Shared index checksum mismatch")] + Verify(#[from] gix_hash::verify::Error), } } pub use error::Error; @@ -67,7 +66,7 @@ impl State { ) -> Result<(Self, Option), Error> { let _span = gix_features::trace::detail!("gix_index::State::from_bytes()", options = ?_options); let (version, num_entries, post_header_data) = header::decode(data, object_hash)?; - let start_of_extensions = extension::end_of_index_entry::decode(data, object_hash); + let start_of_extensions = extension::end_of_index_entry::decode(data, object_hash)?; let mut num_threads = gix_features::parallel::num_threads(thread_limit); let path_backing_buffer_size = entries::estimate_path_storage_requirements_in_bytes( @@ -217,12 +216,7 @@ impl State { let checksum = gix_hash::ObjectId::from_bytes_or_panic(data); let checksum = (!checksum.is_null()).then_some(checksum); if let Some((expected_checksum, actual_checksum)) = expected_checksum.zip(checksum) { - if actual_checksum != expected_checksum { - return Err(Error::ChecksumMismatch { - actual_checksum, - expected_checksum, - }); - } + actual_checksum.verify(&expected_checksum)?; } let EntriesOutcome { entries, diff --git a/gix-index/src/extension/end_of_index_entry/decode.rs b/gix-index/src/extension/end_of_index_entry/decode.rs index 4acc0be84bb..a9666bd64ce 100644 --- a/gix-index/src/extension/end_of_index_entry/decode.rs +++ b/gix-index/src/extension/end_of_index_entry/decode.rs @@ -13,10 +13,10 @@ use crate::{ /// stored prior to this one to assure they are correct. /// /// If the checksum wasn't matched, we will ignore this extension entirely. -pub fn decode(data: &[u8], object_hash: gix_hash::Kind) -> Option { +pub fn decode(data: &[u8], object_hash: gix_hash::Kind) -> Result, gix_hash::hasher::Error> { let hash_len = object_hash.len_in_bytes(); if data.len() < MIN_SIZE_WITH_HEADER + hash_len { - return None; + return Ok(None); } let start_of_eoie = data.len() - MIN_SIZE_WITH_HEADER - hash_len; @@ -24,16 +24,19 @@ pub fn decode(data: &[u8], object_hash: gix_hash::Kind) -> Option { let (signature, ext_size, ext_data) = extension::decode::header(ext_data); if signature != SIGNATURE || ext_size as usize != MIN_SIZE { - return None; + return Ok(None); } let (offset, checksum) = ext_data.split_at(4); + let Ok(checksum) = gix_hash::oid::try_from_bytes(checksum) else { + return Ok(None); + }; let offset = from_be_u32(offset) as usize; - if offset < header::SIZE || offset > start_of_eoie || checksum.len() != gix_hash::Kind::Sha1.len_in_bytes() { - return None; + if offset < header::SIZE || offset > start_of_eoie || checksum.kind() != object_hash { + return Ok(None); } - let mut hasher = gix_features::hash::hasher(gix_hash::Kind::Sha1); + let mut hasher = gix_hash::hasher(object_hash); let mut last_chunk = None; for (signature, chunk) in extension::Iter::new(&data[offset..data.len() - MIN_SIZE_WITH_HEADER - hash_len]) { hasher.update(&signature); @@ -41,13 +44,13 @@ pub fn decode(data: &[u8], object_hash: gix_hash::Kind) -> Option { last_chunk = Some(chunk); } - if hasher.digest() != checksum { - return None; + if hasher.try_finalize()?.verify(checksum).is_err() { + return Ok(None); } // The last-to-this chunk ends where ours starts if last_chunk.map_or(true, |s| s.as_ptr_range().end != (&data[start_of_eoie]) as *const _) { - return None; + return Ok(None); } - Some(offset) + Ok(Some(offset)) } diff --git a/gix-index/src/extension/end_of_index_entry/write.rs b/gix-index/src/extension/end_of_index_entry/write.rs index 6752bd23866..649edae4c2d 100644 --- a/gix-index/src/extension/end_of_index_entry/write.rs +++ b/gix-index/src/extension/end_of_index_entry/write.rs @@ -11,19 +11,19 @@ pub fn write_to( hash_kind: gix_hash::Kind, offset_to_extensions: u32, prior_extensions: impl IntoIterator, -) -> Result<(), std::io::Error> { +) -> Result<(), gix_hash::io::Error> { out.write_all(&SIGNATURE)?; let extension_size: u32 = 4 + hash_kind.len_in_bytes() as u32; out.write_all(&extension_size.to_be_bytes())?; out.write_all(&offset_to_extensions.to_be_bytes())?; - let mut hasher = gix_features::hash::hasher(hash_kind); + let mut hasher = gix_hash::hasher(hash_kind); for (signature, size) in prior_extensions { hasher.update(&signature); hasher.update(&size.to_be_bytes()); } - out.write_all(&hasher.digest())?; + out.write_all(hasher.try_finalize()?.as_slice())?; Ok(()) } diff --git a/gix-index/src/file/init.rs b/gix-index/src/file/init.rs index 10c01015fcb..29419e241fc 100644 --- a/gix-index/src/file/init.rs +++ b/gix-index/src/file/init.rs @@ -75,20 +75,19 @@ impl File { let _span = gix_features::trace::detail!("gix::open_index::hash_index", path = ?path); let meta = file.metadata()?; let num_bytes_to_hash = meta.len() - object_hash.len_in_bytes() as u64; - let actual_hash = gix_features::hash::bytes( + gix_hash::bytes( &mut file, num_bytes_to_hash, object_hash, &mut gix_features::progress::Discard, &Default::default(), - )?; - - if actual_hash != expected { - return Err(Error::Decode(decode::Error::ChecksumMismatch { - actual_checksum: actual_hash, - expected_checksum: expected, - })); - } + ) + .map_err(|err| match err { + gix_hash::io::Error::Io(err) => Error::Io(err), + gix_hash::io::Error::Hasher(err) => Error::Decode(err.into()), + })? + .verify(&expected) + .map_err(decode::Error::from)?; } } diff --git a/gix-index/src/file/verify.rs b/gix-index/src/file/verify.rs index 1c0dc7539b6..ee3be86cc93 100644 --- a/gix-index/src/file/verify.rs +++ b/gix-index/src/file/verify.rs @@ -8,12 +8,9 @@ mod error { #[allow(missing_docs)] pub enum Error { #[error("Could not read index file to generate hash")] - Io(#[from] std::io::Error), - #[error("Index checksum should have been {expected}, but was {actual}")] - ChecksumMismatch { - actual: gix_hash::ObjectId, - expected: gix_hash::ObjectId, - }, + Io(#[from] gix_hash::io::Error), + #[error("Index checksum mismatch")] + Verify(#[from] gix_hash::verify::Error), } } pub use error::Error; @@ -23,21 +20,18 @@ impl File { pub fn verify_integrity(&self) -> Result<(), Error> { let _span = gix_features::trace::coarse!("gix_index::File::verify_integrity()"); if let Some(checksum) = self.checksum { - let num_bytes_to_hash = self.path.metadata()?.len() - checksum.as_bytes().len() as u64; + let num_bytes_to_hash = + self.path.metadata().map_err(gix_hash::io::Error::from)?.len() - checksum.as_bytes().len() as u64; let should_interrupt = AtomicBool::new(false); - let actual = gix_features::hash::bytes_of_file( + gix_hash::bytes_of_file( &self.path, num_bytes_to_hash, checksum.kind(), &mut gix_features::progress::Discard, &should_interrupt, - )?; - (actual == checksum).then_some(()).ok_or(Error::ChecksumMismatch { - actual, - expected: checksum, - }) - } else { - Ok(()) + )? + .verify(&checksum)?; } + Ok(()) } } diff --git a/gix-index/src/file/write.rs b/gix-index/src/file/write.rs index 0623f6c59c3..87fd2e2d1b4 100644 --- a/gix-index/src/file/write.rs +++ b/gix-index/src/file/write.rs @@ -1,5 +1,3 @@ -use gix_features::hash; - use crate::{write, File, Version}; /// The error produced by [`File::write()`]. @@ -7,7 +5,7 @@ use crate::{write, File, Version}; #[allow(missing_docs)] pub enum Error { #[error(transparent)] - Io(#[from] std::io::Error), + Io(#[from] gix_hash::io::Error), #[error("Could not acquire lock for index file")] AcquireLock(#[from] gix_lock::acquire::Error), #[error("Could not commit lock for index file")] @@ -21,17 +19,17 @@ impl File { &self, mut out: impl std::io::Write, options: write::Options, - ) -> std::io::Result<(Version, gix_hash::ObjectId)> { + ) -> Result<(Version, gix_hash::ObjectId), gix_hash::io::Error> { let _span = gix_features::trace::detail!("gix_index::File::write_to()", skip_hash = options.skip_hash); let (version, hash) = if options.skip_hash { let out: &mut dyn std::io::Write = &mut out; let version = self.state.write_to(out, options)?; (version, self.state.object_hash.null()) } else { - let mut hasher = hash::Write::new(&mut out, self.state.object_hash); + let mut hasher = gix_hash::io::Write::new(&mut out, self.state.object_hash); let out: &mut dyn std::io::Write = &mut hasher; let version = self.state.write_to(out, options)?; - (version, gix_hash::ObjectId::from(hasher.hash.digest())) + (version, hasher.hash.try_finalize()?) }; out.write_all(hash.as_slice())?; Ok((version, hash)) @@ -49,7 +47,7 @@ impl File { let (version, digest) = self.write_to(&mut lock, options)?; match lock.into_inner() { Ok(lock) => lock.commit()?, - Err(err) => return Err(err.into_error().into()), + Err(err) => return Err(Error::Io(err.into_error().into())), }; self.state.version = version; self.checksum = Some(digest); diff --git a/gix-index/src/write.rs b/gix-index/src/write.rs index e0b9554c220..e0edbb0eb89 100644 --- a/gix-index/src/write.rs +++ b/gix-index/src/write.rs @@ -67,7 +67,7 @@ impl State { extensions, skip_hash: _, }: Options, - ) -> std::io::Result { + ) -> Result { let _span = gix_features::trace::detail!("gix_index::State::write()"); let version = self.detect_required_version(); diff --git a/gix-index/tests/Cargo.toml b/gix-index/tests/Cargo.toml index 8d707b1ed80..85356da7dd2 100644 --- a/gix-index/tests/Cargo.toml +++ b/gix-index/tests/Cargo.toml @@ -20,7 +20,7 @@ gix-features-parallel = ["gix-features/parallel"] [dev-dependencies] gix-index = { path = ".." } -gix-features = { path = "../../gix-features", features = ["rustsha1", "progress"] } +gix-features = { path = "../../gix-features", features = ["progress"] } gix-testtools = { path = "../../tests/tools" } gix-odb = { path = "../../gix-odb" } gix-object = { path = "../../gix-object" } diff --git a/gix-index/tests/index/file/read.rs b/gix-index/tests/index/file/read.rs index e514934b04a..df85fbff492 100644 --- a/gix-index/tests/index/file/read.rs +++ b/gix-index/tests/index/file/read.rs @@ -328,7 +328,7 @@ fn v2_split_index_recursion_is_handled_gracefully() { let err = try_file("v2_split_index_recursive").expect_err("recursion fails gracefully"); assert!(matches!( err, - gix_index::file::init::Error::Decode(gix_index::decode::Error::ChecksumMismatch { .. }) + gix_index::file::init::Error::Decode(gix_index::decode::Error::Verify(_)) )); } diff --git a/gix-merge/tests/merge/blob/mod.rs b/gix-merge/tests/merge/blob/mod.rs index 57d9205d79a..580ef7dfeb4 100644 --- a/gix-merge/tests/merge/blob/mod.rs +++ b/gix-merge/tests/merge/blob/mod.rs @@ -43,10 +43,10 @@ mod util { impl ObjectDb { /// Insert `data` and return its hash. That can be used to find it again. - pub fn insert(&mut self, data: &str) -> gix_hash::ObjectId { - let id = gix_object::compute_hash(gix_hash::Kind::Sha1, gix_object::Kind::Blob, data.as_bytes()); + pub fn insert(&mut self, data: &str) -> Result { + let id = gix_object::compute_hash(gix_hash::Kind::Sha1, gix_object::Kind::Blob, data.as_bytes())?; self.data_by_id.insert(id, data.into()); - id + Ok(id) } } } diff --git a/gix-merge/tests/merge/blob/pipeline.rs b/gix-merge/tests/merge/blob/pipeline.rs index 080a9d601f6..86c856808db 100644 --- a/gix-merge/tests/merge/blob/pipeline.rs +++ b/gix-merge/tests/merge/blob/pipeline.rs @@ -67,7 +67,7 @@ fn without_transformation() -> crate::Result { let mut db = ObjectDb::default(); let b_content = "b-content"; - let id = db.insert(b_content); + let id = db.insert(b_content)?; let out = filter.convert_to_mergeable( &id, @@ -148,7 +148,7 @@ fn binary_below_large_file_threshold() -> crate::Result { assert_eq!(buf.as_bstr(), binary_content); let mut db = ObjectDb::default(); - let id = db.insert(binary_content); + let id = db.insert(binary_content)?; let out = filter.convert_to_mergeable( &id, EntryKind::Blob, @@ -203,7 +203,7 @@ fn above_large_file_threshold() -> crate::Result { drop(tmp); let mut db = ObjectDb::default(); - let id = db.insert(large_content); + let id = db.insert(large_content)?; let out = filter.convert_to_mergeable( &id, @@ -352,7 +352,7 @@ fn worktree_filter() -> crate::Result { "worktree files need to be converted back to what's stored in Git" ); - let id = db.insert(a_content); + let id = db.insert(a_content)?; let out = filter.convert_to_mergeable( &id, EntryKind::Blob, @@ -385,7 +385,7 @@ fn worktree_filter() -> crate::Result { drop(tmp); let b_content = "b-content\n"; - let id = db.insert(b_content); + let id = db.insert(b_content)?; let out = filter.convert_to_mergeable( &id, @@ -403,7 +403,7 @@ fn worktree_filter() -> crate::Result { let mut db = ObjectDb::default(); let b_content = "b-content\r\n"; - let id = db.insert(b_content); + let id = db.insert(b_content)?; let out = filter.convert_to_mergeable( &id, EntryKind::Blob, diff --git a/gix-merge/tests/merge/blob/platform.rs b/gix-merge/tests/merge/blob/platform.rs index 75aa716e83c..7bb38ed305e 100644 --- a/gix-merge/tests/merge/blob/platform.rs +++ b/gix-merge/tests/merge/blob/platform.rs @@ -31,7 +31,7 @@ mod merge { ("ours", ResourceKind::CurrentOrOurs), ("theirs\0", ResourceKind::OtherOrTheirs), ] { - let id = db.insert(content); + let id = db.insert(content)?; platform.set_resource( id, EntryKind::Blob, @@ -81,7 +81,7 @@ mod merge { ("any\0", ResourceKind::CurrentOrOurs), ("any\0", ResourceKind::OtherOrTheirs), ] { - let id = db.insert(content); + let id = db.insert(content)?; platform.set_resource( id, EntryKind::Blob, @@ -131,7 +131,7 @@ mod merge { ("ours", ResourceKind::CurrentOrOurs), ("theirs", ResourceKind::OtherOrTheirs), ] { - let id = db.insert(content); + let id = db.insert(content)?; platform.set_resource(id, EntryKind::Blob, "b".into(), kind, &db)?; } @@ -284,7 +284,7 @@ theirs ("ours", ResourceKind::CurrentOrOurs), ("theirs", ResourceKind::OtherOrTheirs), ] { - let id = db.insert(content); + let id = db.insert(content)?; platform.set_resource(id, EntryKind::Blob, "b".into(), kind, &db)?; } @@ -313,7 +313,7 @@ theirs "we handle word-splitting and definitely pick-up what's written into the %A buffer" ); - let id = db.insert("binary\0"); + let id = db.insert("binary\0")?; platform.set_resource(id, EntryKind::Blob, "b".into(), ResourceKind::OtherOrTheirs, &db)?; let platform_ref = platform.prepare_merge(&db, Default::default())?; let res = platform_ref.merge(&mut buf, default_labels(), &Default::default())?; diff --git a/gix-object/Cargo.toml b/gix-object/Cargo.toml index f05f05fc592..e6713b15a6d 100644 --- a/gix-object/Cargo.toml +++ b/gix-object/Cargo.toml @@ -42,7 +42,6 @@ verbose-object-parsing-errors = ["winnow/std"] [dependencies] gix-features = { version = "^0.40.0", path = "../gix-features", features = [ - "rustsha1", "progress", ] } gix-hash = { version = "^0.16.0", path = "../gix-hash" } diff --git a/gix-object/benches/edit_tree.rs b/gix-object/benches/edit_tree.rs index 41f1e5f9839..8154a7c1c76 100644 --- a/gix-object/benches/edit_tree.rs +++ b/gix-object/benches/edit_tree.rs @@ -136,7 +136,7 @@ criterion_main!(benches); type TreeStore = Rc>>; -fn new_inmemory_writes() -> (TreeStore, impl FnMut(&Tree) -> Result) { +fn new_inmemory_writes() -> (TreeStore, impl FnMut(&Tree) -> Result) { let store = TreeStore::default(); let write_tree = { let store = store.clone(); @@ -144,11 +144,7 @@ fn new_inmemory_writes() -> (TreeStore, impl FnMut(&Tree) -> Result {} diff --git a/gix-object/src/data.rs b/gix-object/src/data.rs index cea807684d9..1f733263811 100644 --- a/gix-object/src/data.rs +++ b/gix-object/src/data.rs @@ -51,31 +51,24 @@ impl<'a> Data<'a> { /// Types supporting object hash verification pub mod verify { - /// Returned by [`crate::Data::verify_checksum()`] #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { - #[error("Object expected to have id {desired}, but actual id was {actual}")] - ChecksumMismatch { - desired: gix_hash::ObjectId, - actual: gix_hash::ObjectId, - }, + #[error("Failed to hash object")] + Hasher(#[from] gix_hash::hasher::Error), + #[error(transparent)] + Verify(#[from] gix_hash::verify::Error), } impl crate::Data<'_> { - /// Compute the checksum of `self` and compare it with the `desired` hash. + /// Compute the checksum of `self` and compare it with the `expected` hash. /// If the hashes do not match, an [`Error`] is returned, containing the actual /// hash of `self`. - pub fn verify_checksum(&self, desired: &gix_hash::oid) -> Result<(), Error> { - let actual_id = crate::compute_hash(desired.kind(), self.kind, self.data); - if desired != actual_id { - return Err(Error::ChecksumMismatch { - desired: desired.into(), - actual: actual_id, - }); - } - Ok(()) + pub fn verify_checksum(&self, expected: &gix_hash::oid) -> Result { + let actual = crate::compute_hash(expected.kind(), self.kind, self.data)?; + actual.verify(expected)?; + Ok(actual) } } } diff --git a/gix-object/src/lib.rs b/gix-object/src/lib.rs index 398374bee16..7e0f52d28fa 100644 --- a/gix-object/src/lib.rs +++ b/gix-object/src/lib.rs @@ -398,16 +398,22 @@ pub mod decode { } } +fn object_hasher(hash_kind: gix_hash::Kind, object_kind: Kind, object_size: u64) -> gix_hash::Hasher { + let mut hasher = gix_hash::hasher(hash_kind); + hasher.update(&encode::loose_header(object_kind, object_size)); + hasher +} + /// A function to compute a hash of kind `hash_kind` for an object of `object_kind` and its `data`. #[doc(alias = "hash_object", alias = "git2")] -pub fn compute_hash(hash_kind: gix_hash::Kind, object_kind: Kind, data: &[u8]) -> gix_hash::ObjectId { - let header = encode::loose_header(object_kind, data.len() as u64); - - let mut hasher = gix_features::hash::hasher(hash_kind); - hasher.update(&header); +pub fn compute_hash( + hash_kind: gix_hash::Kind, + object_kind: Kind, + data: &[u8], +) -> Result { + let mut hasher = object_hasher(hash_kind, object_kind, data.len() as u64); hasher.update(data); - - hasher.digest().into() + hasher.try_finalize() } /// A function to compute a hash of kind `hash_kind` for an object of `object_kind` and its data read from `stream` @@ -422,10 +428,7 @@ pub fn compute_stream_hash( stream_len: u64, progress: &mut dyn gix_features::progress::Progress, should_interrupt: &std::sync::atomic::AtomicBool, -) -> std::io::Result { - let header = encode::loose_header(object_kind, stream_len); - let mut hasher = gix_features::hash::hasher(hash_kind); - - hasher.update(&header); - gix_features::hash::bytes_with_hasher(stream, stream_len, hasher, progress, should_interrupt) +) -> Result { + let hasher = object_hasher(hash_kind, object_kind, stream_len); + gix_hash::bytes_with_hasher(stream, stream_len, hasher, progress, should_interrupt) } diff --git a/gix-object/tests/object/main.rs b/gix-object/tests/object/main.rs index 2ff45a19081..b58d6118c18 100644 --- a/gix-object/tests/object/main.rs +++ b/gix-object/tests/object/main.rs @@ -12,11 +12,11 @@ mod tree; fn compute_hash() { let hk = gix_hash::Kind::Sha1; assert_eq!( - gix_object::compute_hash(hk, gix_object::Kind::Blob, &[]), + gix_object::compute_hash(hk, gix_object::Kind::Blob, &[]).expect("empty hash doesn’t collide"), gix_hash::ObjectId::empty_blob(hk) ); assert_eq!( - gix_object::compute_hash(hk, gix_object::Kind::Tree, &[]), + gix_object::compute_hash(hk, gix_object::Kind::Tree, &[]).expect("empty hash doesn’t collide"), gix_hash::ObjectId::empty_tree(hk) ); } diff --git a/gix-object/tests/object/tree/editor.rs b/gix-object/tests/object/tree/editor.rs index bd9ca96fd80..7f7bc6ccc11 100644 --- a/gix-object/tests/object/tree/editor.rs +++ b/gix-object/tests/object/tree/editor.rs @@ -793,7 +793,7 @@ mod utils { pub(super) fn new_inmemory_writes() -> ( TreeStore, - impl FnMut(&Tree) -> Result, + impl FnMut(&Tree) -> Result, impl Fn() -> usize, ) { let store = TreeStore::default(); @@ -805,11 +805,7 @@ mod utils { move |tree: &Tree| { buf.clear(); tree.write_to(&mut buf)?; - let header = gix_object::encode::loose_header(gix_object::Kind::Tree, buf.len() as u64); - let mut hasher = gix_features::hash::hasher(gix_hash::Kind::Sha1); - hasher.update(&header); - hasher.update(&buf); - let id = hasher.digest().into(); + let id = gix_object::compute_hash(gix_hash::Kind::Sha1, gix_object::Kind::Tree, &buf)?; store.borrow_mut().insert(id, tree.clone()); let old = num_writes.get(); num_writes.set(old + 1); diff --git a/gix-odb/Cargo.toml b/gix-odb/Cargo.toml index 6215d48ed8c..70ac0492509 100644 --- a/gix-odb/Cargo.toml +++ b/gix-odb/Cargo.toml @@ -20,7 +20,7 @@ doctest = false serde = ["dep:serde", "gix-hash/serde", "gix-object/serde", "gix-pack/serde"] [dependencies] -gix-features = { version = "^0.40.0", path = "../gix-features", features = ["rustsha1", "walkdir", "zlib", "crc32"] } +gix-features = { version = "^0.40.0", path = "../gix-features", features = ["walkdir", "zlib", "crc32"] } gix-hashtable = { version = "^0.7.0", path = "../gix-hashtable" } gix-hash = { version = "^0.16.0", path = "../gix-hash" } gix-date = { version = "^0.9.3", path = "../gix-date" } diff --git a/gix-odb/src/memory.rs b/gix-odb/src/memory.rs index 6d5f405e7cb..4058b47e609 100644 --- a/gix-odb/src/memory.rs +++ b/gix-odb/src/memory.rs @@ -219,7 +219,7 @@ where let mut buf = Vec::new(); from.read_to_end(&mut buf)?; - let id = gix_object::compute_hash(self.object_hash, kind, &buf); + let id = gix_object::compute_hash(self.object_hash, kind, &buf)?; map.borrow_mut().insert(id, (kind, buf)); Ok(id) } diff --git a/gix-odb/src/sink.rs b/gix-odb/src/sink.rs index 46077ff37cf..c06df83631c 100644 --- a/gix-odb/src/sink.rs +++ b/gix-odb/src/sink.rs @@ -36,7 +36,7 @@ impl gix_object::Write for Sink { Ok(()) }; - let mut hasher = gix_features::hash::hasher(self.object_hash); + let mut hasher = gix_hash::hasher(self.object_hash); hasher.update(&header); possibly_compress(&header).map_err(Box::new)?; @@ -53,6 +53,6 @@ impl gix_object::Write for Sink { c.reset(); } - Ok(hasher.digest().into()) + Ok(hasher.try_finalize()?) } } diff --git a/gix-odb/src/store_impls/loose/verify.rs b/gix-odb/src/store_impls/loose/verify.rs index dd08e9cd9f2..afe3aa27887 100644 --- a/gix-odb/src/store_impls/loose/verify.rs +++ b/gix-odb/src/store_impls/loose/verify.rs @@ -19,12 +19,19 @@ pub mod integrity { kind: gix_object::Kind, id: gix_hash::ObjectId, }, - #[error("{kind} object {expected} wasn't re-encoded without change - new hash is {actual}")] - ObjectHashMismatch { + #[error("{kind} object {expected} could not be hashed")] + ObjectHasher { + #[source] + source: gix_hash::hasher::Error, kind: gix_object::Kind, - actual: gix_hash::ObjectId, expected: gix_hash::ObjectId, }, + #[error("{kind} object wasn't re-encoded without change")] + ObjectEncodeMismatch { + #[source] + source: gix_hash::verify::Error, + kind: gix_object::Kind, + }, #[error("Objects were deleted during iteration - try again")] Retry, #[error("Interrupted")] @@ -77,14 +84,17 @@ impl Store { .try_find(&id, &mut buf) .map_err(|_| integrity::Error::Retry)? .ok_or(integrity::Error::Retry)?; - let actual_id = sink.write_buf(object.kind, object.data).expect("sink never fails"); - if actual_id != id { - return Err(integrity::Error::ObjectHashMismatch { + sink.write_buf(object.kind, object.data) + .map_err(|err| integrity::Error::ObjectHasher { + source: *err.downcast().expect("sink can only fail in hasher"), kind: object.kind, - actual: actual_id, expected: id, - }); - } + })? + .verify(&id) + .map_err(|err| integrity::Error::ObjectEncodeMismatch { + source: err, + kind: object.kind, + })?; object.decode().map_err(|err| integrity::Error::ObjectDecode { source: err, kind: object.kind, diff --git a/gix-odb/src/store_impls/loose/write.rs b/gix-odb/src/store_impls/loose/write.rs index bdef0b90005..d94c16556bd 100644 --- a/gix-odb/src/store_impls/loose/write.rs +++ b/gix-odb/src/store_impls/loose/write.rs @@ -1,6 +1,6 @@ use std::{fs, io, io::Write, path::PathBuf}; -use gix_features::{hash, zlib::stream::deflate}; +use gix_features::zlib::stream::deflate; use gix_object::WriteTo; use tempfile::NamedTempFile; @@ -13,7 +13,7 @@ use crate::store_impls::loose; pub enum Error { #[error("Could not {message} '{path}'")] Io { - source: io::Error, + source: gix_hash::io::Error, message: &'static str, path: PathBuf, }, @@ -30,12 +30,12 @@ impl gix_object::Write for Store { fn write(&self, object: &dyn WriteTo) -> Result { let mut to = self.dest()?; to.write_all(&object.loose_header()).map_err(|err| Error::Io { - source: err, + source: err.into(), message: "write header to tempfile in", path: self.path.to_owned(), })?; object.write_to(&mut to).map_err(|err| Error::Io { - source: err, + source: err.into(), message: "stream all data into tempfile in", path: self.path.to_owned(), })?; @@ -50,13 +50,13 @@ impl gix_object::Write for Store { let mut to = self.dest().map_err(Box::new)?; to.write_all(&gix_object::encode::loose_header(kind, from.len() as u64)) .map_err(|err| Error::Io { - source: err, + source: err.into(), message: "write header to tempfile in", path: self.path.to_owned(), })?; to.write_all(from).map_err(|err| Error::Io { - source: err, + source: err.into(), message: "stream all data into tempfile in", path: self.path.to_owned(), })?; @@ -76,14 +76,14 @@ impl gix_object::Write for Store { let mut to = self.dest().map_err(Box::new)?; to.write_all(&gix_object::encode::loose_header(kind, size)) .map_err(|err| Error::Io { - source: err, + source: err.into(), message: "write header to tempfile in", path: self.path.to_owned(), })?; io::copy(&mut from, &mut to) .map_err(|err| Error::Io { - source: err, + source: err.into(), message: "stream all data into tempfile in", path: self.path.to_owned(), }) @@ -106,7 +106,7 @@ impl Store { } impl Store { - fn dest(&self) -> Result, Error> { + fn dest(&self) -> Result, Error> { #[cfg_attr(not(unix), allow(unused_mut))] let mut builder = tempfile::Builder::new(); #[cfg(unix)] @@ -115,9 +115,9 @@ impl Store { let perms = std::fs::Permissions::from_mode(0o444); builder.permissions(perms); } - Ok(hash::Write::new( + Ok(gix_hash::io::Write::new( deflate::Write::new(builder.tempfile_in(&self.path).map_err(|err| Error::Io { - source: err, + source: err.into(), message: "create named temp file in", path: self.path.to_owned(), })?), @@ -127,9 +127,13 @@ impl Store { fn finalize_object( &self, - hash::Write { hash, inner: file }: hash::Write, + gix_hash::io::Write { hash, inner: file }: gix_hash::io::Write, ) -> Result { - let id = gix_hash::ObjectId::from(hash.digest()); + let id = hash.try_finalize().map_err(|err| Error::Io { + source: err.into(), + message: "hash tempfile in", + path: self.path.to_owned(), + })?; let object_path = loose::hash_path(&id, self.path.clone()); let object_dir = object_path .parent() diff --git a/gix-pack/Cargo.toml b/gix-pack/Cargo.toml index 2552f95f742..e6449a3e64e 100644 --- a/gix-pack/Cargo.toml +++ b/gix-pack/Cargo.toml @@ -34,7 +34,7 @@ serde = ["dep:serde", "gix-object/serde"] wasm = ["gix-diff?/wasm"] [dependencies] -gix-features = { version = "^0.40.0", path = "../gix-features", features = ["crc32", "rustsha1", "progress", "zlib"] } +gix-features = { version = "^0.40.0", path = "../gix-features", features = ["crc32", "progress", "zlib"] } gix-path = { version = "^0.10.14", path = "../gix-path" } gix-hash = { version = "^0.16.0", path = "../gix-hash" } gix-chunk = { version = "^0.4.11", path = "../gix-chunk" } diff --git a/gix-pack/src/data/input/bytes_to_entries.rs b/gix-pack/src/data/input/bytes_to_entries.rs index f8f7aa926bd..7f54977504c 100644 --- a/gix-pack/src/data/input/bytes_to_entries.rs +++ b/gix-pack/src/data/input/bytes_to_entries.rs @@ -1,7 +1,7 @@ use std::{fs, io}; -use gix_features::{hash::Hasher, zlib::Decompress}; -use gix_hash::ObjectId; +use gix_features::zlib::Decompress; +use gix_hash::{Hasher, ObjectId}; use crate::data::input; @@ -52,7 +52,7 @@ where object_hash: gix_hash::Kind, ) -> Result, input::Error> { let mut header_data = [0u8; 12]; - read.read_exact(&mut header_data)?; + read.read_exact(&mut header_data).map_err(gix_hash::io::Error::from)?; let (version, num_objects) = crate::data::header::decode(&header_data)?; assert_eq!( @@ -69,7 +69,7 @@ where version, objects_left: num_objects, hash: (mode != input::Mode::AsIs).then(|| { - let mut hash = gix_features::hash::hasher(object_hash); + let mut hash = gix_hash::hasher(object_hash); hash.update(&header_data); hash }), @@ -97,7 +97,7 @@ where } None => crate::data::Entry::from_read(&mut self.read, self.offset, self.hash_len), } - .map_err(input::Error::from)?; + .map_err(gix_hash::io::Error::from)?; // Decompress object to learn its compressed bytes let compressed_buf = self.compressed_buf.take().unwrap_or_else(|| Vec::with_capacity(4096)); @@ -114,7 +114,7 @@ where decompressor: &mut self.decompressor, }; - let bytes_copied = io::copy(&mut decompressed_reader, &mut io::sink())?; + let bytes_copied = io::copy(&mut decompressed_reader, &mut io::sink()).map_err(gix_hash::io::Error::from)?; if bytes_copied != entry.decompressed_size { return Err(input::Error::IncompletePack { actual: bytes_copied, @@ -138,7 +138,10 @@ where let crc32 = if self.compressed.crc32() { let mut header_buf = [0u8; 12 + gix_hash::Kind::longest().len_in_bytes()]; - let header_len = entry.header.write_to(bytes_copied, &mut header_buf.as_mut())?; + let header_len = entry + .header + .write_to(bytes_copied, &mut header_buf.as_mut()) + .map_err(gix_hash::io::Error::from)?; let state = gix_features::hash::crc32_update(0, &header_buf[..header_len]); Some(gix_features::hash::crc32_update(state, &compressed)) } else { @@ -172,26 +175,22 @@ where let mut id = gix_hash::ObjectId::null(self.object_hash); if let Err(err) = self.read.read_exact(id.as_mut_slice()) { if self.mode != input::Mode::Restore { - return Err(err.into()); + return Err(input::Error::Io(err.into())); } } if let Some(hash) = self.hash.take() { - let actual_id = gix_hash::ObjectId::from(hash.digest()); + let actual_id = hash.try_finalize().map_err(gix_hash::io::Error::from)?; if self.mode == input::Mode::Restore { id = actual_id; - } - if id != actual_id { - return Err(input::Error::ChecksumMismatch { - actual: actual_id, - expected: id, - }); + } else { + actual_id.verify(&id)?; } } Some(id) } else if self.mode == input::Mode::Restore { let hash = self.hash.clone().expect("in restore mode a hash is set"); - Some(gix_hash::ObjectId::from(hash.digest())) + Some(hash.try_finalize().map_err(gix_hash::io::Error::from)?) } else { None }) @@ -273,7 +272,8 @@ where impl crate::data::File { /// Returns an iterator over [`Entries`][crate::data::input::Entry], without making use of the memory mapping. pub fn streaming_iter(&self) -> Result, input::Error> { - let reader = io::BufReader::with_capacity(4096 * 8, fs::File::open(&self.path)?); + let reader = + io::BufReader::with_capacity(4096 * 8, fs::File::open(&self.path).map_err(gix_hash::io::Error::from)?); BytesToEntriesIter::new_from_header( reader, input::Mode::Verify, diff --git a/gix-pack/src/data/input/entries_to_bytes.rs b/gix-pack/src/data/input/entries_to_bytes.rs index dc2ac58848a..1cf308a6d89 100644 --- a/gix-pack/src/data/input/entries_to_bytes.rs +++ b/gix-pack/src/data/input/entries_to_bytes.rs @@ -1,7 +1,5 @@ use std::iter::Peekable; -use gix_features::hash; - use crate::data::input; /// An implementation of [`Iterator`] to write [encoded entries][input::Entry] to an inner implementation each time @@ -66,7 +64,7 @@ where self.trailer } - fn next_inner(&mut self, entry: input::Entry) -> Result { + fn next_inner(&mut self, entry: input::Entry) -> Result { if self.num_entries == 0 { let header_bytes = crate::data::header::encode(self.data_version, 0); self.output.write_all(&header_bytes[..])?; @@ -82,7 +80,7 @@ where Ok(entry) } - fn write_header_and_digest(&mut self, last_entry: Option<&mut input::Entry>) -> Result<(), input::Error> { + fn write_header_and_digest(&mut self, last_entry: Option<&mut input::Entry>) -> Result<(), gix_hash::io::Error> { let header_bytes = crate::data::header::encode(self.data_version, self.num_entries); let num_bytes_written = if last_entry.is_some() { self.output.stream_position()? @@ -95,7 +93,7 @@ where self.output.rewind()?; let interrupt_never = std::sync::atomic::AtomicBool::new(false); - let digest = hash::bytes( + let digest = gix_hash::bytes( &mut self.output, num_bytes_written, self.object_hash, @@ -129,13 +127,16 @@ where match self.input.next() { Some(res) => Some(match res { - Ok(entry) => self.next_inner(entry).and_then(|mut entry| { - if self.input.peek().is_none() { - self.write_header_and_digest(Some(&mut entry)).map(|_| entry) - } else { - Ok(entry) - } - }), + Ok(entry) => self + .next_inner(entry) + .and_then(|mut entry| { + if self.input.peek().is_none() { + self.write_header_and_digest(Some(&mut entry)).map(|_| entry) + } else { + Ok(entry) + } + }) + .map_err(input::Error::from), Err(err) => { self.is_done = true; Err(err) @@ -143,7 +144,7 @@ where }), None => match self.write_header_and_digest(None) { Ok(_) => None, - Err(err) => Some(Err(err)), + Err(err) => Some(Err(err.into())), }, } } diff --git a/gix-pack/src/data/input/entry.rs b/gix-pack/src/data/input/entry.rs index ad970aa0376..55fb439afc5 100644 --- a/gix-pack/src/data/input/entry.rs +++ b/gix-pack/src/data/input/entry.rs @@ -54,7 +54,7 @@ fn compress_data(obj: &gix_object::Data<'_>) -> Result, input::Error> { let mut out = gix_features::zlib::stream::deflate::Write::new(Vec::new()); if let Err(err) = std::io::copy(&mut &*obj.data, &mut out) { match err.kind() { - std::io::ErrorKind::Other => return Err(input::Error::Io(err)), + std::io::ErrorKind::Other => return Err(input::Error::Io(err.into())), err => { unreachable!("Should never see other errors than zlib, but got {:?}", err) } diff --git a/gix-pack/src/data/input/types.rs b/gix-pack/src/data/input/types.rs index 86852479faa..46a694a217b 100644 --- a/gix-pack/src/data/input/types.rs +++ b/gix-pack/src/data/input/types.rs @@ -1,19 +1,14 @@ -use std::io; - /// Returned by [`BytesToEntriesIter::new_from_header()`][crate::data::input::BytesToEntriesIter::new_from_header()] and as part /// of `Item` of [`BytesToEntriesIter`][crate::data::input::BytesToEntriesIter]. #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { #[error("An IO operation failed while streaming an entry")] - Io(#[from] io::Error), + Io(#[from] gix_hash::io::Error), #[error(transparent)] PackParse(#[from] crate::data::header::decode::Error), - #[error("pack checksum in trailer was {expected}, but actual checksum was {actual}")] - ChecksumMismatch { - expected: gix_hash::ObjectId, - actual: gix_hash::ObjectId, - }, + #[error("Failed to verify pack checksum in trailer")] + Verify(#[from] gix_hash::verify::Error), #[error("pack is incomplete: it was decompressed into {actual} bytes but {expected} bytes where expected.")] IncompletePack { actual: u64, expected: u64 }, #[error("The object {object_id} could not be decoded or wasn't found")] diff --git a/gix-pack/src/data/output/bytes.rs b/gix-pack/src/data/output/bytes.rs index 849da6af6cf..39f7a1d8a05 100644 --- a/gix-pack/src/data/output/bytes.rs +++ b/gix-pack/src/data/output/bytes.rs @@ -1,7 +1,5 @@ use std::io::Write; -use gix_features::hash; - use crate::data::output; use crate::exact_vec; @@ -13,7 +11,7 @@ where E: std::error::Error + 'static, { #[error(transparent)] - Io(#[from] std::io::Error), + Io(#[from] gix_hash::io::Error), #[error(transparent)] Input(E), } @@ -24,7 +22,7 @@ pub struct FromEntriesIter { /// An iterator for input [`output::Entry`] instances pub input: I, /// A way of writing encoded bytes. - output: hash::Write, + output: gix_hash::io::Write, /// Our trailing hash when done writing all input entries trailer: Option, /// The amount of objects in the iteration and the version of the packfile to be written. @@ -71,7 +69,7 @@ where ); FromEntriesIter { input, - output: hash::Write::new(output, object_hash), + output: gix_hash::io::Write::new(output, object_hash), trailer: None, entry_version: version, pack_offsets_and_validity: exact_vec(num_entries as usize), @@ -98,7 +96,9 @@ where let previous_written = self.written; if let Some((version, num_entries)) = self.header_info.take() { let header_bytes = crate::data::header::encode(version, num_entries); - self.output.write_all(&header_bytes[..])?; + self.output + .write_all(&header_bytes[..]) + .map_err(gix_hash::io::Error::from)?; self.written += header_bytes.len() as u64; } match self.input.next() { @@ -116,17 +116,28 @@ where } self.written - base_offset }); - self.written += header.write_to(entry.decompressed_size as u64, &mut self.output)? as u64; - self.written += std::io::copy(&mut &*entry.compressed_data, &mut self.output)?; + self.written += header + .write_to(entry.decompressed_size as u64, &mut self.output) + .map_err(gix_hash::io::Error::from)? as u64; + self.written += std::io::copy(&mut &*entry.compressed_data, &mut self.output) + .map_err(gix_hash::io::Error::from)?; } } None => { - let digest = self.output.hash.clone().digest(); - self.output.inner.write_all(&digest[..])?; - self.written += digest.len() as u64; - self.output.inner.flush()?; + let digest = self + .output + .hash + .clone() + .try_finalize() + .map_err(gix_hash::io::Error::from)?; + self.output + .inner + .write_all(digest.as_slice()) + .map_err(gix_hash::io::Error::from)?; + self.written += digest.as_slice().len() as u64; + self.output.inner.flush().map_err(gix_hash::io::Error::from)?; self.is_done = true; - self.trailer = Some(gix_hash::ObjectId::from(digest)); + self.trailer = Some(digest); } } Ok(self.written - previous_written) diff --git a/gix-pack/src/index/encode.rs b/gix-pack/src/index/encode.rs index 61c6f6de77a..58f63cea954 100644 --- a/gix-pack/src/index/encode.rs +++ b/gix-pack/src/index/encode.rs @@ -36,13 +36,9 @@ pub(crate) fn fanout(iter: &mut dyn ExactSizeIterator) -> [u32; 256] mod function { use std::io; - use gix_features::{ - hash, - progress::{self, DynNestedProgress}, - }; - use super::{fanout, HIGH_BIT, LARGE_OFFSET_THRESHOLD}; use crate::index::V2_SIGNATURE; + use gix_features::progress::{self, DynNestedProgress}; struct Count { bytes: u64, @@ -76,7 +72,7 @@ mod function { pack_hash: &gix_hash::ObjectId, kind: crate::index::Version, progress: &mut dyn DynNestedProgress, - ) -> io::Result { + ) -> Result { use io::Write; assert_eq!(kind, crate::index::Version::V2, "Can only write V2 packs right now"); assert!( @@ -87,7 +83,7 @@ mod function { // Write header let mut out = Count::new(std::io::BufWriter::with_capacity( 8 * 4096, - hash::Write::new(out, kind.hash()), + gix_hash::io::Write::new(out, kind.hash()), )); out.write_all(V2_SIGNATURE)?; out.write_all(&(kind as u32).to_be_bytes())?; @@ -138,8 +134,8 @@ mod function { out.write_all(pack_hash.as_slice())?; let bytes_written_without_trailer = out.bytes; - let out = out.inner.into_inner()?; - let index_hash: gix_hash::ObjectId = out.hash.digest().into(); + let out = out.inner.into_inner().map_err(io::Error::from)?; + let index_hash = out.hash.try_finalize()?; out.inner.write_all(index_hash.as_slice())?; out.inner.flush()?; diff --git a/gix-pack/src/index/traverse/error.rs b/gix-pack/src/index/traverse/error.rs index f6cc7506548..a77b011af27 100644 --- a/gix-pack/src/index/traverse/error.rs +++ b/gix-pack/src/index/traverse/error.rs @@ -6,8 +6,8 @@ use crate::index; pub enum Error { #[error("One of the traversal processors failed")] Processor(#[source] E), - #[error("Index file, pack file or object verification failed")] - VerifyChecksum(#[from] index::verify::checksum::Error), + #[error("Failed to verify index file checksum")] + IndexVerify(#[source] index::verify::checksum::Error), #[error("The pack delta tree index could not be built")] Tree(#[from] crate::cache::delta::from_offsets::Error), #[error("The tree traversal failed")] @@ -20,17 +20,15 @@ pub enum Error { offset: u64, source: crate::data::decode::Error, }, - #[error("The packfiles checksum didn't match the index file checksum: expected {expected}, got {actual}")] - PackMismatch { - expected: gix_hash::ObjectId, - actual: gix_hash::ObjectId, - }, - #[error("The hash of {kind} object at offset {offset} didn't match the checksum in the index file: expected {expected}, got {actual}")] - PackObjectMismatch { - expected: gix_hash::ObjectId, - actual: gix_hash::ObjectId, + #[error("The packfiles checksum didn't match the index file checksum")] + PackMismatch(#[source] gix_hash::verify::Error), + #[error("Failed to verify pack file checksum")] + PackVerify(#[source] crate::verify::checksum::Error), + #[error("Error verifying object at offset {offset} against checksum in the index file")] + PackObjectVerify { offset: u64, - kind: gix_object::Kind, + #[source] + source: gix_object::data::verify::Error, }, #[error( "The CRC32 of {kind} object at offset {offset} didn't match the checksum in the index file: expected {expected}, got {actual}" diff --git a/gix-pack/src/index/traverse/mod.rs b/gix-pack/src/index/traverse/mod.rs index 50f0236b862..8f13774f0ab 100644 --- a/gix-pack/src/index/traverse/mod.rs +++ b/gix-pack/src/index/traverse/mod.rs @@ -126,18 +126,15 @@ impl index::File { E: std::error::Error + Send + Sync + 'static, { Ok(if check.file_checksum() { - if self.pack_checksum() != pack.checksum() { - return Err(Error::PackMismatch { - actual: pack.checksum(), - expected: self.pack_checksum(), - }); - } + pack.checksum() + .verify(&self.pack_checksum()) + .map_err(Error::PackMismatch)?; let (pack_res, id) = parallel::join( move || pack.verify_checksum(pack_progress, should_interrupt), move || self.verify_checksum(index_progress, should_interrupt), ); - pack_res?; - id? + pack_res.map_err(Error::PackVerify)?; + id.map_err(Error::IndexVerify)? } else { self.index_checksum() }) @@ -210,15 +207,12 @@ where E: std::error::Error + Send + Sync + 'static, { if check.object_checksum() { - let actual_oid = gix_object::compute_hash(index_entry.oid.kind(), object_kind, decompressed); - if actual_oid != index_entry.oid { - return Err(Error::PackObjectMismatch { - actual: actual_oid, - expected: index_entry.oid, + gix_object::Data::new(object_kind, decompressed) + .verify_checksum(&index_entry.oid) + .map_err(|source| Error::PackObjectVerify { offset: index_entry.pack_offset, - kind: object_kind, - }); - } + source, + })?; if let Some(desired_crc32) = index_entry.crc32 { let actual_crc32 = pack_entry_crc32(); if actual_crc32 != desired_crc32 { diff --git a/gix-pack/src/index/verify.rs b/gix-pack/src/index/verify.rs index c2c72f0faa5..c0ed279152a 100644 --- a/gix-pack/src/index/verify.rs +++ b/gix-pack/src/index/verify.rs @@ -220,7 +220,7 @@ impl index::File { .add_child_with_id("Sha1 of index".into(), integrity::ProgressId::ChecksumBytes.into()), should_interrupt, ) - .map_err(Into::into) + .map_err(index::traverse::Error::IndexVerify) .map(|id| integrity::Outcome { actual_index_checksum: id, pack_traverse_statistics: None, diff --git a/gix-pack/src/index/write/error.rs b/gix-pack/src/index/write/error.rs index a5ef6ad67b6..dbdbef717f4 100644 --- a/gix-pack/src/index/write/error.rs +++ b/gix-pack/src/index/write/error.rs @@ -1,11 +1,9 @@ -use std::io; - /// Returned by [`crate::index::File::write_data_iter_to_stream()`] #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { - #[error("An IO error occurred when reading the pack or creating a temporary file")] - Io(#[from] io::Error), + #[error("An error occurred when writing the pack index file")] + Io(#[from] gix_hash::io::Error), #[error("A pack entry could not be extracted")] PackEntryDecode(#[from] crate::data::input::Error), #[error("Indices of type {} cannot be written, only {} are supported", *.0 as usize, crate::index::Version::default() as usize)] diff --git a/gix-pack/src/index/write/mod.rs b/gix-pack/src/index/write/mod.rs index 0f07bf7a932..2c1c0330c2e 100644 --- a/gix-pack/src/index/write/mod.rs +++ b/gix-pack/src/index/write/mod.rs @@ -180,7 +180,7 @@ impl crate::index::File { root_progress.inc(); - let (resolver, pack) = make_resolver()?; + let (resolver, pack) = make_resolver().map_err(gix_hash::io::Error::from)?; let sorted_pack_offsets_by_oid = { let traverse::Outcome { roots, children } = tree.traverse( resolver, @@ -192,10 +192,7 @@ impl crate::index::File { entry, decompressed: bytes, .. - }| { - modify_base(data, entry, bytes, version.hash()); - Ok::<_, Error>(()) - }, + }| { modify_base(data, entry, bytes, version.hash()) }, traverse::Options { object_progress: Box::new( root_progress.add_child_with_id("Resolving".into(), ProgressId::ResolveObjects.into()), @@ -225,9 +222,9 @@ impl crate::index::File { Some(ph) => ph, None if num_objects == 0 => { let header = crate::data::header::encode(pack_version, 0); - let mut hasher = gix_features::hash::hasher(object_hash); + let mut hasher = gix_hash::hasher(object_hash); hasher.update(&header); - gix_hash::ObjectId::from(hasher.digest()) + hasher.try_finalize().map_err(gix_hash::io::Error::from)? } None => return Err(Error::IteratorInvariantTrailer), }; @@ -253,8 +250,14 @@ impl crate::index::File { } } -fn modify_base(entry: &mut TreeEntry, pack_entry: &crate::data::Entry, decompressed: &[u8], hash: gix_hash::Kind) { +fn modify_base( + entry: &mut TreeEntry, + pack_entry: &crate::data::Entry, + decompressed: &[u8], + hash: gix_hash::Kind, +) -> Result<(), gix_hash::hasher::Error> { let object_kind = pack_entry.header.as_kind().expect("base object as source of iteration"); - let id = gix_object::compute_hash(hash, object_kind, decompressed); + let id = gix_object::compute_hash(hash, object_kind, decompressed)?; entry.id = id; + Ok(()) } diff --git a/gix-pack/src/multi_index/verify.rs b/gix-pack/src/multi_index/verify.rs index fe02d2ebfb5..8ef4c61c2e2 100644 --- a/gix-pack/src/multi_index/verify.rs +++ b/gix-pack/src/multi_index/verify.rs @@ -279,23 +279,14 @@ impl File { use index::traverse::Error::*; match err { Processor(err) => Processor(integrity::Error::IndexIntegrity(err)), - VerifyChecksum(err) => VerifyChecksum(err), + IndexVerify(err) => IndexVerify(err), Tree(err) => Tree(err), TreeTraversal(err) => TreeTraversal(err), + PackVerify(err) => PackVerify(err), PackDecode { id, offset, source } => PackDecode { id, offset, source }, - PackMismatch { expected, actual } => PackMismatch { expected, actual }, + PackMismatch(err) => PackMismatch(err), EntryType(err) => EntryType(err), - PackObjectMismatch { - expected, - actual, - offset, - kind, - } => PackObjectMismatch { - expected, - actual, - offset, - kind, - }, + PackObjectVerify { offset, source } => PackObjectVerify { offset, source }, Crc32Mismatch { expected, actual, diff --git a/gix-pack/src/multi_index/write.rs b/gix-pack/src/multi_index/write.rs index 8054f3571d6..cfcc8bb7d18 100644 --- a/gix-pack/src/multi_index/write.rs +++ b/gix-pack/src/multi_index/write.rs @@ -14,7 +14,7 @@ mod error { #[allow(missing_docs)] pub enum Error { #[error(transparent)] - Io(#[from] std::io::Error), + Io(#[from] gix_hash::io::Error), #[error("Interrupted")] Interrupted, #[error(transparent)] @@ -83,7 +83,7 @@ impl multi_index::File { should_interrupt: &AtomicBool, Options { object_hash }: Options, ) -> Result { - let out = gix_features::hash::Write::new(out, object_hash); + let out = gix_hash::io::Write::new(out, object_hash); let (index_paths_sorted, index_filenames_sorted) = { index_paths.sort(); let file_names = index_paths @@ -181,30 +181,34 @@ impl multi_index::File { cf.num_chunks().try_into().expect("BUG: wrote more than 256 chunks"), index_paths_sorted.len() as u32, object_hash, - )?; + ) + .map_err(gix_hash::io::Error::from)?; { progress.set_name("Writing chunks".into()); progress.init(Some(cf.num_chunks()), gix_features::progress::count("chunks")); - let mut chunk_write = cf.into_write(&mut out, bytes_written)?; + let mut chunk_write = cf + .into_write(&mut out, bytes_written) + .map_err(gix_hash::io::Error::from)?; while let Some(chunk_to_write) = chunk_write.next_chunk() { match chunk_to_write { multi_index::chunk::index_names::ID => { - multi_index::chunk::index_names::write(&index_filenames_sorted, &mut chunk_write)?; + multi_index::chunk::index_names::write(&index_filenames_sorted, &mut chunk_write) } - multi_index::chunk::fanout::ID => multi_index::chunk::fanout::write(&entries, &mut chunk_write)?, - multi_index::chunk::lookup::ID => multi_index::chunk::lookup::write(&entries, &mut chunk_write)?, + multi_index::chunk::fanout::ID => multi_index::chunk::fanout::write(&entries, &mut chunk_write), + multi_index::chunk::lookup::ID => multi_index::chunk::lookup::write(&entries, &mut chunk_write), multi_index::chunk::offsets::ID => { - multi_index::chunk::offsets::write(&entries, num_large_offsets.is_some(), &mut chunk_write)?; + multi_index::chunk::offsets::write(&entries, num_large_offsets.is_some(), &mut chunk_write) } multi_index::chunk::large_offsets::ID => multi_index::chunk::large_offsets::write( &entries, num_large_offsets.expect("available if planned"), &mut chunk_write, - )?, + ), unknown => unreachable!("BUG: forgot to implement chunk {:?}", std::str::from_utf8(&unknown)), } + .map_err(gix_hash::io::Error::from)?; progress.inc(); if should_interrupt.load(Ordering::Relaxed) { return Err(Error::Interrupted); @@ -213,8 +217,11 @@ impl multi_index::File { } // write trailing checksum - let multi_index_checksum: gix_hash::ObjectId = out.inner.hash.digest().into(); - out.inner.inner.write_all(multi_index_checksum.as_slice())?; + let multi_index_checksum = out.inner.hash.try_finalize().map_err(gix_hash::io::Error::from)?; + out.inner + .inner + .write_all(multi_index_checksum.as_slice()) + .map_err(gix_hash::io::Error::from)?; out.progress.show_throughput(write_start); Ok(Outcome { multi_index_checksum }) diff --git a/gix-pack/src/verify.rs b/gix-pack/src/verify.rs index fff99c75d12..8bb6ce6f55a 100644 --- a/gix-pack/src/verify.rs +++ b/gix-pack/src/verify.rs @@ -10,11 +10,10 @@ pub mod checksum { pub enum Error { #[error("Interrupted by user")] Interrupted, - #[error("index checksum mismatch: expected {expected}, got {actual}")] - Mismatch { - expected: gix_hash::ObjectId, - actual: gix_hash::ObjectId, - }, + #[error("Failed to hash data")] + Hasher(#[from] gix_hash::hasher::Error), + #[error(transparent)] + Verify(#[from] gix_hash::verify::Error), } } @@ -26,8 +25,8 @@ pub fn fan(data: &[u32]) -> Option { } /// Calculate the hash of the given kind by trying to read the file from disk at `data_path` or falling back on the mapped content in `data`. -/// `Ok(desired_hash)` or `Err(Some(actual_hash))` is returned if the hash matches or mismatches. -/// If the `Err(None)` is returned, the operation was interrupted. +/// `Ok(expected)` or [`checksum::Error::Verify`] is returned if the hash matches or mismatches. +/// If the [`checksum::Error::Interrupted`] is returned, the operation was interrupted. pub fn checksum_on_disk_or_mmap( data_path: &Path, data: &[u8], @@ -37,7 +36,7 @@ pub fn checksum_on_disk_or_mmap( should_interrupt: &AtomicBool, ) -> Result { let data_len_without_trailer = data.len() - object_hash.len_in_bytes(); - let actual = match gix_features::hash::bytes_of_file( + let actual = match gix_hash::bytes_of_file( data_path, data_len_without_trailer as u64, object_hash, @@ -45,20 +44,20 @@ pub fn checksum_on_disk_or_mmap( should_interrupt, ) { Ok(id) => id, - Err(err) if err.kind() == std::io::ErrorKind::Interrupted => return Err(checksum::Error::Interrupted), - Err(_io_err) => { + Err(gix_hash::io::Error::Io(err)) if err.kind() == std::io::ErrorKind::Interrupted => { + return Err(checksum::Error::Interrupted); + } + Err(gix_hash::io::Error::Io(_io_err)) => { let start = std::time::Instant::now(); - let mut hasher = gix_features::hash::hasher(object_hash); + let mut hasher = gix_hash::hasher(object_hash); hasher.update(&data[..data_len_without_trailer]); progress.inc_by(data_len_without_trailer); progress.show_throughput(start); - gix_hash::ObjectId::from(hasher.digest()) + hasher.try_finalize()? } + Err(gix_hash::io::Error::Hasher(err)) => return Err(checksum::Error::Hasher(err)), }; - if actual == expected { - Ok(actual) - } else { - Err(checksum::Error::Mismatch { actual, expected }) - } + actual.verify(&expected)?; + Ok(actual) } diff --git a/gix-status/src/index_as_worktree/function.rs b/gix-status/src/index_as_worktree/function.rs index c6edc032d69..57981937ee9 100644 --- a/gix-status/src/index_as_worktree/function.rs +++ b/gix-status/src/index_as_worktree/function.rs @@ -359,7 +359,7 @@ impl<'index> State<'_, 'index> { Err(err) if gix_fs::io_err::is_not_found(err.kind(), err.raw_os_error()) => { return Ok(Some(Change::Removed.into())) } - Err(err) => return Err(err.into()), + Err(err) => return Err(Error::Io(err.into())), }; self.symlink_metadata_calls.fetch_add(1, Ordering::Relaxed); let metadata = match gix_index::fs::Metadata::from_path_no_follow(worktree_path) { @@ -385,7 +385,7 @@ impl<'index> State<'_, 'index> { return Ok(Some(Change::Removed.into())) } Err(err) => { - return Err(err.into()); + return Err(Error::Io(err.into())); } }; if entry.flags.contains(gix_index::entry::Flags::INTENT_TO_ADD) { @@ -562,8 +562,9 @@ where self.buf.clear(); let platform = self .attr_stack - .at_entry(self.rela_path, Some(self.entry.mode), &self.objects)?; - let file = std::fs::File::open(self.path)?; + .at_entry(self.rela_path, Some(self.entry.mode), &self.objects) + .map_err(gix_hash::io::Error::from)?; + let file = std::fs::File::open(self.path).map_err(gix_hash::io::Error::from)?; let out = self .filter .convert_to_git( @@ -574,7 +575,7 @@ where }, &mut |buf| Ok(self.objects.find_blob(self.id, buf).map(|_| Some(()))?), ) - .map_err(|err| io::Error::new(io::ErrorKind::Other, err))?; + .map_err(|err| Error::Io(io::Error::new(io::ErrorKind::Other, err).into()))?; let len = match out { ToGitOutcome::Unchanged(_) => Some(self.file_len), ToGitOutcome::Process(_) | ToGitOutcome::Buffer(_) => None, diff --git a/gix-status/src/index_as_worktree/traits.rs b/gix-status/src/index_as_worktree/traits.rs index e5224f38609..854cedf31f1 100644 --- a/gix-status/src/index_as_worktree/traits.rs +++ b/gix-status/src/index_as_worktree/traits.rs @@ -143,14 +143,16 @@ impl CompareBlobs for HashEq { let mut stream = data.stream_worktree_file()?; match stream.as_bytes() { Some(buffer) => { - let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buffer); + let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buffer) + .map_err(gix_hash::io::Error::from)?; Ok((entry.id != file_hash).then_some(file_hash)) } None => { let file_hash = match stream.size() { None => { - stream.read_to_end(buf)?; + stream.read_to_end(buf).map_err(gix_hash::io::Error::from)?; gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buf) + .map_err(gix_hash::io::Error::from)? } Some(len) => gix_object::compute_stream_hash( entry.id.kind(), diff --git a/gix-status/src/index_as_worktree/types.rs b/gix-status/src/index_as_worktree/types.rs index f38f131e74d..c4ad3d16cb1 100644 --- a/gix-status/src/index_as_worktree/types.rs +++ b/gix-status/src/index_as_worktree/types.rs @@ -10,7 +10,7 @@ pub enum Error { #[error("The clock was off when reading file related metadata after updating a file on disk")] Time(#[from] std::time::SystemTimeError), #[error("IO error while writing blob or reading file metadata or changing filetype")] - Io(#[from] std::io::Error), + Io(#[from] gix_hash::io::Error), #[error("Failed to obtain blob from object database")] Find(#[from] gix_object::find::existing_object::Error), #[error("Could not determine status for submodule at '{rela_path}'")] diff --git a/gix-status/src/index_as_worktree_with_renames/mod.rs b/gix-status/src/index_as_worktree_with_renames/mod.rs index 8368102c246..4eaa8f09014 100644 --- a/gix-status/src/index_as_worktree_with_renames/mod.rs +++ b/gix-status/src/index_as_worktree_with_renames/mod.rs @@ -536,11 +536,13 @@ pub(super) mod function { should_interrupt, ) .map_err(Error::HashFile)?, - ToGitOutcome::Buffer(buf) => gix_object::compute_hash(object_hash, gix_object::Kind::Blob, buf), + ToGitOutcome::Buffer(buf) => gix_object::compute_hash(object_hash, gix_object::Kind::Blob, buf) + .map_err(|err| Error::HashFile(err.into()))?, ToGitOutcome::Process(mut stream) => { buf.clear(); - stream.read_to_end(buf).map_err(Error::HashFile)?; + stream.read_to_end(buf).map_err(|err| Error::HashFile(err.into()))?; gix_object::compute_hash(object_hash, gix_object::Kind::Blob, buf) + .map_err(|err| Error::HashFile(err.into()))? } } } @@ -548,6 +550,7 @@ pub(super) mod function { let path = worktree_root.join(gix_path::from_bstr(rela_path)); let target = gix_path::into_bstr(std::fs::read_link(path).map_err(Error::ReadLink)?); gix_object::compute_hash(object_hash, gix_object::Kind::Blob, &target) + .map_err(|err| Error::HashFile(err.into()))? } Kind::Directory | Kind::Repository => object_hash.null(), }) diff --git a/gix-status/src/index_as_worktree_with_renames/types.rs b/gix-status/src/index_as_worktree_with_renames/types.rs index add7717897e..a6fff8980bd 100644 --- a/gix-status/src/index_as_worktree_with_renames/types.rs +++ b/gix-status/src/index_as_worktree_with_renames/types.rs @@ -17,7 +17,7 @@ pub enum Error { #[error("Could not open worktree file for reading")] OpenWorktreeFile(std::io::Error), #[error(transparent)] - HashFile(std::io::Error), + HashFile(gix_hash::io::Error), #[error("Could not read worktree link content")] ReadLink(std::io::Error), #[error(transparent)] diff --git a/gix/Cargo.toml b/gix/Cargo.toml index 5e0a0d2a31a..14445293f98 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -217,7 +217,7 @@ blocking-http-transport-reqwest-native-tls = [ #! #### Performance #! #! The reason these features exist is to allow optimization for compile time and optimize for compatibility by default. This means that some performance options around -#! SHA1 and ZIP might not compile on all platforms, so it depends on the end-user who compiles the application to chose these based on their needs. +#! ZIP might not compile on all platforms, so it depends on the end-user who compiles the application to chose these based on their needs. ## Activate features that maximize performance, like using threads, but leave everything else that might affect compatibility out to allow users more fine-grained ## control over performance features like which `zlib*` implementation to use. @@ -249,11 +249,7 @@ pack-cache-lru-dynamic = ["gix-pack/pack-cache-lru-dynamic"] ## Activate other features that maximize performance, like usage of threads, `zlib-ng` and access to caching in object databases. ## Note that some platforms might suffer from compile failures, which is when `max-performance-safe` should be used. -max-performance = ["max-performance-safe", "zlib-ng", "fast-sha1"] - -## If enabled, use assembly versions of sha1 on supported platforms. -## This might cause compile failures as well which is why it can be turned off separately. -fast-sha1 = ["gix-features/fast-sha1"] +max-performance = ["max-performance-safe", "zlib-ng"] ## Use the C-based zlib-ng backend, which can compress and decompress significantly faster. ## Note that this will cause duplicate symbol errors if the application also depends on `zlib` - use `zlib-ng-compat` in that case. diff --git a/gix/src/repository/impls.rs b/gix/src/repository/impls.rs index 31c149c9494..32f319c8199 100644 --- a/gix/src/repository/impls.rs +++ b/gix/src/repository/impls.rs @@ -106,7 +106,7 @@ impl gix_object::Write for crate::Repository { } fn write_buf(&self, object: gix_object::Kind, from: &[u8]) -> Result { - let oid = gix_object::compute_hash(self.object_hash(), object, from); + let oid = gix_object::compute_hash(self.object_hash(), object, from)?; if self.objects.exists(&oid) { return Ok(oid); } diff --git a/gix/src/repository/object.rs b/gix/src/repository/object.rs index 873e143792b..2528e1c51cd 100644 --- a/gix/src/repository/object.rs +++ b/gix/src/repository/object.rs @@ -172,7 +172,8 @@ impl crate::Repository { } fn write_object_inner(&self, buf: &[u8], kind: gix_object::Kind) -> Result, object::write::Error> { - let oid = gix_object::compute_hash(self.object_hash(), kind, buf); + let oid = gix_object::compute_hash(self.object_hash(), kind, buf) + .map_err(|err| Box::new(err) as Box)?; if self.objects.exists(&oid) { return Ok(oid.attach(self)); } @@ -189,7 +190,8 @@ impl crate::Repository { /// pre-hashing the data, and checking if the object is already present. pub fn write_blob(&self, bytes: impl AsRef<[u8]>) -> Result, object::write::Error> { let bytes = bytes.as_ref(); - let oid = gix_object::compute_hash(self.object_hash(), gix_object::Kind::Blob, bytes); + let oid = gix_object::compute_hash(self.object_hash(), gix_object::Kind::Blob, bytes) + .map_err(|err| Box::new(err) as Box)?; if self.objects.exists(&oid) { return Ok(oid.attach(self)); } @@ -214,7 +216,8 @@ impl crate::Repository { } fn write_blob_stream_inner(&self, buf: &[u8]) -> Result, object::write::Error> { - let oid = gix_object::compute_hash(self.object_hash(), gix_object::Kind::Blob, buf); + let oid = gix_object::compute_hash(self.object_hash(), gix_object::Kind::Blob, buf) + .map_err(|err| Box::new(err) as Box)?; if self.objects.exists(&oid) { return Ok(oid.attach(self)); } diff --git a/justfile b/justfile index da837e97e13..f143bc06abc 100755 --- a/justfile +++ b/justfile @@ -99,8 +99,6 @@ check: cargo check -p gix-features --all-features cargo check -p gix-features --features parallel cargo check -p gix-features --features fs-read-dir - cargo check -p gix-features --features rustsha1 - cargo check -p gix-features --features fast-sha1 cargo check -p gix-features --features progress cargo check -p gix-features --features io-pipe cargo check -p gix-features --features crc32 diff --git a/tests/it/src/commands/git_to_sh.rs b/tests/it/src/commands/git_to_sh.rs index 499a9a5d953..85ef05eec9c 100644 --- a/tests/it/src/commands/git_to_sh.rs +++ b/tests/it/src/commands/git_to_sh.rs @@ -115,7 +115,11 @@ pub(super) mod function { })?; let mapped = crate::commands::copy_royal::remapped(data); ( - gix::objs::compute_hash(repo.object_hash(), gix::object::Kind::Blob, mapped.as_bytes()), + gix::objs::compute_hash( + repo.object_hash(), + gix::object::Kind::Blob, + mapped.as_bytes(), + )?, Cow::Owned(mapped.into()), ) } diff --git a/tests/snapshots/plumbing/no-repo/pack/explode/broken-delete-pack-to-sink-failure b/tests/snapshots/plumbing/no-repo/pack/explode/broken-delete-pack-to-sink-failure index 2123cefd0f0..90237d86cf9 100644 --- a/tests/snapshots/plumbing/no-repo/pack/explode/broken-delete-pack-to-sink-failure +++ b/tests/snapshots/plumbing/no-repo/pack/explode/broken-delete-pack-to-sink-failure @@ -1,5 +1,5 @@ Error: Failed to explode the entire pack - some loose objects may have been created nonetheless Caused by: - 0: Index file, pack file or object verification failed - 1: index checksum mismatch: expected f1cd3cc7bc63a4a2b357a475a58ad49b40355470, got 337fe3b886fc5041a35313887d68feefeae52519 \ No newline at end of file + 0: Failed to verify pack file checksum + 1: Hash was 337fe3b886fc5041a35313887d68feefeae52519, but should have been f1cd3cc7bc63a4a2b357a475a58ad49b40355470 \ No newline at end of file diff --git a/tests/snapshots/plumbing/no-repo/pack/verify/index-failure b/tests/snapshots/plumbing/no-repo/pack/verify/index-failure index b93e9895bb1..3a7ec0aa154 100644 --- a/tests/snapshots/plumbing/no-repo/pack/verify/index-failure +++ b/tests/snapshots/plumbing/no-repo/pack/verify/index-failure @@ -2,5 +2,5 @@ Could not find matching pack file at 'index.pack' - only index file will be veri Error: Verification failure Caused by: - 0: Index file, pack file or object verification failed - 1: index checksum mismatch: expected 0eba66e6b391eb83efc3ec9fc8a3087788911c0a, got fa9a8a630eacc2d3df00aff604bec2451ccbc8ff \ No newline at end of file + 0: Failed to verify index file checksum + 1: Hash was fa9a8a630eacc2d3df00aff604bec2451ccbc8ff, but should have been 0eba66e6b391eb83efc3ec9fc8a3087788911c0a \ No newline at end of file