diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index 80819f5d8c9..fb2d9a4dca4 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1 +1,2 @@ github: byron +open_collective: gitoxide diff --git a/Cargo.lock b/Cargo.lock index 030f8c91182..d8aee88db33 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1063,9 +1063,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.33" +version = "1.0.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "324a1be68054ef05ad64b861cc9eaf1d623d2d8cb25b4bf2cb9cdd902b4bf253" +checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" dependencies = [ "crc32fast", "libz-ng-sys", @@ -2069,19 +2069,25 @@ dependencies = [ "bstr", "document-features", "gix-command", + "gix-diff", "gix-filter", "gix-fs 0.12.0", "gix-hash 0.15.0", "gix-object 0.45.0", + "gix-odb", "gix-path 0.10.12", "gix-quote 0.4.13", + "gix-revision", + "gix-revwalk 0.16.0", "gix-tempfile 15.0.0", "gix-testtools", "gix-trace 0.1.11", + "gix-utils 0.1.13", "gix-worktree 0.37.0", "imara-diff", "pretty_assertions", "serde", + "termtree", "thiserror", ] diff --git a/crate-status.md b/crate-status.md index 0d75dce40b6..502583127b8 100644 --- a/crate-status.md +++ b/crate-status.md @@ -338,14 +338,21 @@ Check out the [performance discussion][gix-diff-performance] as well. ### gix-merge -* [x] three-way merge analysis of **blobs** with choice of how to resolve conflicts +* [x] three-way content-merge analysis of **blobs** with choice of how to resolve conflicts + - [x] respect git attributes and drivers. - [ ] choose how to resolve conflicts on the data-structure - - [ ] produce a new blob based on data-structure containing possible resolutions + - [ ] more efficient handling of paths with `merge=binary` attributes (do not load them into memory) + - [x] produce a new blob based on data-structure containing possible resolutions - [x] `merge` style - [x] `diff3` style - [x] `zdiff` style + - [ ] various newlines-related options during the merge (see https://git-scm.com/docs/git-merge#Documentation/git-merge.txt-ignore-space-change). - [ ] a way to control inter-hunk merging based on proximity (maybe via `gix-diff` feature which could use the same) -* [ ] diff-heuristics match Git perfectly +* [x] **tree**-diff-heuristics match Git for its test-cases + - [ ] a way to generate an index with stages + - *currently the data it provides won't generate index entries, and possibly can't be used for it yet* + - [ ] submodule merges (*right now they count as conflicts if they differ*) +* [x] **commits** - with handling of multiple merge bases by recursive merge-base merge * [x] API documentation * [ ] Examples diff --git a/gitoxide-core/src/pack/explode.rs b/gitoxide-core/src/pack/explode.rs index f73a54533ad..9e51cf51a17 100644 --- a/gitoxide-core/src/pack/explode.rs +++ b/gitoxide-core/src/pack/explode.rs @@ -9,7 +9,8 @@ use anyhow::{anyhow, Result}; use gix::{ hash::ObjectId, object, objs, odb, - odb::{loose, pack, Write}, + odb::{loose, pack}, + prelude::Write, NestedProgress, }; @@ -96,8 +97,8 @@ enum OutputWriter { Sink(odb::Sink), } -impl gix::odb::Write for OutputWriter { - fn write_buf(&self, kind: object::Kind, from: &[u8]) -> Result { +impl gix::objs::Write for OutputWriter { + fn write_buf(&self, kind: object::Kind, from: &[u8]) -> Result { match self { OutputWriter::Loose(db) => db.write_buf(kind, from), OutputWriter::Sink(db) => db.write_buf(kind, from), @@ -109,7 +110,7 @@ impl gix::odb::Write for OutputWriter { kind: object::Kind, size: u64, from: &mut dyn Read, - ) -> Result { + ) -> Result { match self { OutputWriter::Loose(db) => db.write_stream(kind, size, from), OutputWriter::Sink(db) => db.write_stream(kind, size, from), diff --git a/gitoxide-core/src/repository/diff.rs b/gitoxide-core/src/repository/diff.rs index 18ca60f9e88..0927acbeec6 100644 --- a/gitoxide-core/src/repository/diff.rs +++ b/gitoxide-core/src/repository/diff.rs @@ -1,5 +1,6 @@ use gix::bstr::{BString, ByteSlice}; use gix::objs::tree::EntryMode; +use gix::odb::store::RefreshMode; use gix::prelude::ObjectIdExt; pub fn tree( @@ -9,6 +10,7 @@ pub fn tree( new_treeish: BString, ) -> anyhow::Result<()> { repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&**repo.index_or_empty()?)); + repo.objects.refresh = RefreshMode::Never; let old_tree_id = repo.rev_parse_single(old_treeish.as_bstr())?; let new_tree_id = repo.rev_parse_single(new_treeish.as_bstr())?; diff --git a/gitoxide-core/src/repository/merge.rs b/gitoxide-core/src/repository/merge/file.rs similarity index 93% rename from gitoxide-core/src/repository/merge.rs rename to gitoxide-core/src/repository/merge/file.rs index 6698dd3dd2f..120eff0135b 100644 --- a/gitoxide-core/src/repository/merge.rs +++ b/gitoxide-core/src/repository/merge/file.rs @@ -1,5 +1,5 @@ use crate::OutputFormat; -use anyhow::{bail, Context}; +use anyhow::{anyhow, bail, Context}; use gix::bstr::BString; use gix::bstr::ByteSlice; use gix::merge::blob::builtin_driver::binary; @@ -83,8 +83,11 @@ pub fn file( other: Some(theirs.as_bstr()), }; let mut buf = repo.empty_reusable_buffer(); - let (pick, resolution) = platform.merge(&mut buf, labels, repo.command_context()?)?; - let buf = platform.buffer_by_pick(pick).unwrap_or(&buf); + let (pick, resolution) = platform.merge(&mut buf, labels, &repo.command_context()?)?; + let buf = platform + .buffer_by_pick(pick) + .map_err(|_| anyhow!("Participating object was too large"))? + .unwrap_or(&buf); out.write_all(buf)?; if resolution == Resolution::Conflict { diff --git a/gitoxide-core/src/repository/merge/mod.rs b/gitoxide-core/src/repository/merge/mod.rs new file mode 100644 index 00000000000..6acb97bba28 --- /dev/null +++ b/gitoxide-core/src/repository/merge/mod.rs @@ -0,0 +1,5 @@ +mod file; +pub use file::file; + +pub mod tree; +pub use tree::function::tree; diff --git a/gitoxide-core/src/repository/merge/tree.rs b/gitoxide-core/src/repository/merge/tree.rs new file mode 100644 index 00000000000..3f497f08a37 --- /dev/null +++ b/gitoxide-core/src/repository/merge/tree.rs @@ -0,0 +1,112 @@ +use crate::OutputFormat; + +pub struct Options { + pub format: OutputFormat, + pub resolve_content_merge: Option, + pub in_memory: bool, +} + +pub(super) mod function { + + use crate::OutputFormat; + use anyhow::{anyhow, bail, Context}; + use gix::bstr::BString; + use gix::bstr::ByteSlice; + use gix::merge::blob::builtin_driver::binary; + use gix::merge::blob::builtin_driver::text::Conflict; + use gix::merge::tree::UnresolvedConflict; + use gix::prelude::Write; + + use super::Options; + + #[allow(clippy::too_many_arguments)] + pub fn tree( + mut repo: gix::Repository, + out: &mut dyn std::io::Write, + err: &mut dyn std::io::Write, + base: BString, + ours: BString, + theirs: BString, + Options { + format, + resolve_content_merge, + in_memory, + }: Options, + ) -> anyhow::Result<()> { + if format != OutputFormat::Human { + bail!("JSON output isn't implemented yet"); + } + repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&**repo.index_or_empty()?)); + if in_memory { + repo.objects.enable_object_memory(); + } + let (base_ref, base_id) = refname_and_tree(&repo, base)?; + let (ours_ref, ours_id) = refname_and_tree(&repo, ours)?; + let (theirs_ref, theirs_id) = refname_and_tree(&repo, theirs)?; + + let mut options = repo.tree_merge_options()?; + if let Some(resolve) = resolve_content_merge { + options.blob_merge.text.conflict = resolve; + options.blob_merge.resolve_binary_with = match resolve { + Conflict::Keep { .. } => None, + Conflict::ResolveWithOurs => Some(binary::ResolveWith::Ours), + Conflict::ResolveWithTheirs => Some(binary::ResolveWith::Theirs), + Conflict::ResolveWithUnion => None, + }; + } + + let base_id_str = base_id.to_string(); + let ours_id_str = ours_id.to_string(); + let theirs_id_str = theirs_id.to_string(); + let labels = gix::merge::blob::builtin_driver::text::Labels { + ancestor: base_ref + .as_ref() + .map_or(base_id_str.as_str().into(), |n| n.as_bstr()) + .into(), + current: ours_ref + .as_ref() + .map_or(ours_id_str.as_str().into(), |n| n.as_bstr()) + .into(), + other: theirs_ref + .as_ref() + .map_or(theirs_id_str.as_str().into(), |n| n.as_bstr()) + .into(), + }; + let mut res = repo.merge_trees(base_id, ours_id, theirs_id, labels, options)?; + { + let _span = gix::trace::detail!("Writing merged tree"); + let mut written = 0; + let tree_id = res + .tree + .write(|tree| { + written += 1; + repo.write(tree) + }) + .map_err(|err| anyhow!("{err}"))?; + writeln!(out, "{tree_id} (wrote {written} trees)")?; + } + + if !res.conflicts.is_empty() { + writeln!(err, "{} possibly resolved conflicts", res.conflicts.len())?; + } + if res.has_unresolved_conflicts(UnresolvedConflict::Renames) { + bail!("Tree conflicted") + } + Ok(()) + } + + fn refname_and_tree( + repo: &gix::Repository, + revspec: BString, + ) -> anyhow::Result<(Option, gix::hash::ObjectId)> { + let spec = repo.rev_parse(revspec.as_bstr())?; + let tree_id = spec + .single() + .context("Expected revspec to expand to a single rev only")? + .object()? + .peel_to_tree()? + .id; + let refname = spec.first_reference().map(|r| r.name.shorten().as_bstr().to_owned()); + Ok((refname, tree_id)) + } +} diff --git a/gix-diff/src/rewrites/mod.rs b/gix-diff/src/rewrites/mod.rs index 08d6f2cce53..f18052b343b 100644 --- a/gix-diff/src/rewrites/mod.rs +++ b/gix-diff/src/rewrites/mod.rs @@ -1,4 +1,6 @@ +use crate::tree::visit::ChangeId; use crate::Rewrites; +use std::collections::BTreeSet; /// Types related to the rename tracker for renames, rewrites and copies. pub mod tracker; @@ -12,6 +14,8 @@ pub struct Tracker { path_backing: Vec, /// How to track copies and/or rewrites. rewrites: Rewrites, + /// Previously emitted relation ids of rewrite pairs, with `(deleted source, added destination)`. + child_renames: BTreeSet<(ChangeId, ChangeId)>, } /// Determine in which set of files to search for copies. diff --git a/gix-diff/src/rewrites/tracker.rs b/gix-diff/src/rewrites/tracker.rs index df166bd43e3..e1416a172f2 100644 --- a/gix-diff/src/rewrites/tracker.rs +++ b/gix-diff/src/rewrites/tracker.rs @@ -10,6 +10,7 @@ use std::ops::Range; use bstr::{BStr, ByteSlice}; use gix_object::tree::{EntryKind, EntryMode}; +use crate::rewrites::tracker::visit::SourceKind; use crate::tree::visit::{Action, ChangeId, Relation}; use crate::{ blob::{platform::prepare_diff::Operation, DiffLineStats, ResourceKind}, @@ -155,6 +156,7 @@ impl Tracker { items: vec![], path_backing: vec![], rewrites, + child_renames: Default::default(), } } } @@ -168,13 +170,13 @@ impl Tracker { return Some(change); }; - let relation = change - .relation() - .filter(|_| matches!(change_kind, ChangeKind::Addition | ChangeKind::Deletion)); let entry_kind = change.entry_mode().kind(); if entry_kind == EntryKind::Commit { return Some(change); } + let relation = change + .relation() + .filter(|_| matches!(change_kind, ChangeKind::Addition | ChangeKind::Deletion)); if let (None, EntryKind::Tree) = (relation, entry_kind) { return Some(change); }; @@ -244,8 +246,9 @@ impl Tracker { }; self.items.sort_by(by_id_and_location); - // Rewrites by directory can be pruned out quickly, quickly pruning candidates - // for the following per-item steps. + // Rewrites by directory (without local changes) can be pruned out quickly, + // by finding only parents, their counterpart, and then all children can be matched by + // relationship ID. self.match_pairs_of_kind( visit::SourceKind::Rename, &mut cb, @@ -266,6 +269,8 @@ impl Tracker { None, )?; + self.match_renamed_directories(&mut cb)?; + if let Some(copies) = self.rewrites.copies { self.match_pairs_of_kind( visit::SourceKind::Copy, @@ -387,6 +392,7 @@ impl Tracker { }) { dest_idx += dest_ofs; dest_ofs = dest_idx + 1; + self.items[dest_idx].location(&self.path_backing); let src = find_match( &self.items, dest, @@ -434,11 +440,17 @@ impl Tracker { return Ok(Action::Cancel); } - if let Some((Relation::Parent(src), Relation::Parent(dst))) = relations { - let res = self.emit_child_renames_matching_identity(cb, kind, src, dst)?; - if res == Action::Cancel { - return Ok(Action::Cancel); + match relations { + Some((Relation::Parent(src), Relation::Parent(dst))) => { + let res = self.emit_child_renames_matching_identity(cb, kind, src, dst)?; + if res == Action::Cancel { + return Ok(Action::Cancel); + } + } + Some((Relation::ChildOfParent(src), Relation::ChildOfParent(dst))) => { + self.child_renames.insert((src, dst)); } + _ => {} } } Ok(Action::Continue) @@ -446,6 +458,7 @@ impl Tracker { /// Emit the children of `src_parent_id` and `dst_parent_id` as pairs of exact matches, which are assumed /// as `src` and `dst` were an exact match (so all children have to match exactly). + /// Note that we intentionally do not record them as their parents will be emitted, too. fn emit_child_renames_matching_identity( &mut self, cb: &mut impl FnMut(visit::Destination<'_, T>, Option>) -> Action, @@ -504,6 +517,56 @@ impl Tracker { } Ok(Action::Continue) } + + /// Find directories with relation id that haven't been emitted yet and store them for lookup. + /// Then use the previously stored emitted renames with relation id to learn which directories they 'link' + /// and emit them, too. + /// Note that this works whenever top-level directories are renamed because they are always added and deleted, + /// and we only match those. Thus, one rewrite inside the directory is enough. + fn match_renamed_directories( + &mut self, + cb: &mut impl FnMut(visit::Destination<'_, T>, Option>) -> Action, + ) -> Result<(), emit::Error> { + fn unemitted_directory_matching_relation_id(items: &[Item], child_id: ChangeId) -> Option { + items.iter().position(|i| { + !i.emitted && matches!(i.change.relation(), Some(Relation::Parent(pid)) if pid == child_id) + }) + } + for (deleted_child_id, added_child_id) in &self.child_renames { + let Some(src_idx) = unemitted_directory_matching_relation_id(&self.items, *deleted_child_id) else { + continue; + }; + let Some(dst_idx) = unemitted_directory_matching_relation_id(&self.items, *added_child_id) else { + // This could go wrong in case there are mismatches, so be defensive here. + // But generally, we'd expect the destination item to exist. + continue; + }; + + let (src_item, dst_item) = (&self.items[src_idx], &self.items[dst_idx]); + let entry_mode = src_item.change.entry_mode(); + let location = src_item.location(&self.path_backing); + let src = visit::Source { + entry_mode, + id: src_item.change.id().to_owned(), + kind: SourceKind::Rename, + location, + change: &src_item.change, + diff: None, + }; + let location = dst_item.location(&self.path_backing); + let change = dst_item.change.clone(); + let dst = visit::Destination { change, location }; + let res = cb(dst, Some(src)); + + self.items[src_idx].emitted = true; + self.items[dst_idx].emitted = true; + + if res == Action::Cancel { + return Ok(()); + } + } + Ok(()) + } } fn filename(path: &BStr) -> &BStr { @@ -572,8 +635,8 @@ fn find_match<'a, T: Change>( let (item_id, item_mode) = item.change.id_and_entry_mode(); if needs_exact_match(percentage) || item_mode.is_link() { let first_idx = items.partition_point(|a| a.change.id() < item_id); - let range = items.get(first_idx..).map(|items| { - let end = items + let range = items.get(first_idx..).map(|slice| { + let end = slice .iter() .position(|a| a.change.id() != item_id) .map_or(items.len(), |idx| first_idx + idx); diff --git a/gix-diff/src/tree/mod.rs b/gix-diff/src/tree/mod.rs index 7ef2b277a4d..d542b1c1239 100644 --- a/gix-diff/src/tree/mod.rs +++ b/gix-diff/src/tree/mod.rs @@ -36,8 +36,10 @@ pub trait Visit { /// The state required to run [tree-diffs](super::tree()). #[derive(Default, Clone)] pub struct State { - buf1: Vec, - buf2: Vec, + /// A buffer for object data. + pub buf1: Vec, + /// Another buffer for object data. + pub buf2: Vec, trees: VecDeque, change_id: visit::ChangeId, } diff --git a/gix-diff/src/tree_with_rewrites/change.rs b/gix-diff/src/tree_with_rewrites/change.rs index 196a4b977b6..508c1137519 100644 --- a/gix-diff/src/tree_with_rewrites/change.rs +++ b/gix-diff/src/tree_with_rewrites/change.rs @@ -4,7 +4,7 @@ use bstr::BString; use bstr::{BStr, ByteSlice}; /// Represents any possible change in order to turn one tree into another, which references data owned by its producer. -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq)] pub enum ChangeRef<'a> { /// An entry was added, like the addition of a file or directory. Addition { @@ -101,7 +101,7 @@ pub enum ChangeRef<'a> { } /// Represents any possible change in order to turn one tree into another, with fully-owned data. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub enum Change { /// An entry was added, like the addition of a file or directory. Addition { @@ -444,6 +444,17 @@ impl<'a> ChangeRef<'a> { | ChangeRef::Rewrite { location, .. } => location, } } + + /// Return the *previous* location of the resource where possible, i.e. the source of a rename or copy, or the + /// location at which an addition, deletion or modification took place. + pub fn source_location(&self) -> &BStr { + match self { + ChangeRef::Addition { location, .. } + | ChangeRef::Deletion { location, .. } + | ChangeRef::Modification { location, .. } => location, + ChangeRef::Rewrite { source_location, .. } => source_location, + } + } } impl Change { @@ -477,4 +488,15 @@ impl Change { | Change::Rewrite { location, .. } => location.as_bstr(), } } + + /// Return the *previous* location of the resource where possible, i.e. the source of a rename or copy, or the + /// location at which an addition, deletion or modification took place. + pub fn source_location(&self) -> &BStr { + match self { + Change::Addition { location, .. } + | Change::Deletion { location, .. } + | Change::Modification { location, .. } => location.as_bstr(), + Change::Rewrite { source_location, .. } => source_location.as_bstr(), + } + } } diff --git a/gix-diff/tests/diff/rewrites/tracker.rs b/gix-diff/tests/diff/rewrites/tracker.rs index fb6da4c5b47..998e9076726 100644 --- a/gix-diff/tests/diff/rewrites/tracker.rs +++ b/gix-diff/tests/diff/rewrites/tracker.rs @@ -525,17 +525,19 @@ fn rename_by_50_percent_similarity() -> crate::Result { #[test] fn directories_without_relation_are_ignored() -> crate::Result { let mut track = util::new_tracker(Default::default()); - let tree_without_relation = Change { - id: NULL_ID, - kind: ChangeKind::Deletion, - mode: EntryKind::Tree.into(), - relation: None, - }; - assert_eq!( - track.try_push_change(tree_without_relation, "dir".into()), - Some(tree_without_relation), - "trees, or non-blobs, are ignored, particularly when they have no relation" - ); + for mode in [EntryKind::Tree, EntryKind::Commit] { + let tree_without_relation = Change { + id: NULL_ID, + kind: ChangeKind::Deletion, + mode: mode.into(), + relation: None, + }; + assert_eq!( + track.try_push_change(tree_without_relation, "dir".into()), + Some(tree_without_relation), + "trees and submodules are ignored, particularly when they have no relation" + ); + } Ok(()) } @@ -586,6 +588,7 @@ fn directory_renames_by_id_can_fail_gracefully() -> crate::Result { (Change::addition(), "b", "firt\nsecond\n"), ], ); + let mut calls = 0; let out = util::assert_emit_with_objects( &mut track, @@ -598,10 +601,17 @@ fn directory_renames_by_id_can_fail_gracefully() -> crate::Result { assert_eq!(src.location, expected_src); assert_eq!(dst.location, expected_dst); } - 3..=6 => { + 3 => { + assert_eq!(src.unwrap().location, "d"); + assert_eq!( + dst.location, "d-renamed", + "it can now track modified and renamed directories" + ); + } + 4 => { assert_eq!(src, None); - let expected_dst = ["d", "d-renamed", "d/subdir/d"][calls - 3]; - assert_eq!(dst.location, expected_dst); + assert_eq!(dst.change.kind, ChangeKind::Deletion); + assert_eq!(dst.location, "d/subdir/d"); } _ => unreachable!("Should have expected emission call {calls}"), } @@ -618,7 +628,7 @@ fn directory_renames_by_id_can_fail_gracefully() -> crate::Result { ..Default::default() } ); - assert_eq!(calls, 6, "Should not have too few calls"); + assert_eq!(calls, 5, "Should not have too few calls"); Ok(()) } diff --git a/gix-diff/tests/diff/tree_with_rewrites.rs b/gix-diff/tests/diff/tree_with_rewrites.rs index f0c137b6abe..5f30c26b8fd 100644 --- a/gix-diff/tests/diff/tree_with_rewrites.rs +++ b/gix-diff/tests/diff/tree_with_rewrites.rs @@ -1904,6 +1904,137 @@ rename to gix-sec/tests/sec.rs Ok(()) } +#[test] +fn realistic_renames_3_without_identity() -> crate::Result { + let (changes, out) = collect_changes_opts( + "r4-base", + "r4-dir-rename-non-identity", + Options { + location: Some(Location::Path), + rewrites: Some(Rewrites { + copies: None, + percentage: None, + limit: 0, + }), + }, + )?; + + // Look how nicely it captures and associates this directory rename. + insta::assert_debug_snapshot!(changes.into_iter() + .filter(|c| !c.entry_mode().is_tree() || + c.relation().map_or(false, |r| matches!(r, Relation::Parent(_))) + ).collect::>(), @r#" + [ + Rewrite { + source_location: "src/plumbing/options.rs", + source_entry_mode: EntryMode( + 33188, + ), + source_relation: Some( + ChildOfParent( + 2, + ), + ), + source_id: Sha1(00750edc07d6415dcc07ae0351e9397b0222b7ba), + diff: None, + entry_mode: EntryMode( + 33188, + ), + id: Sha1(00750edc07d6415dcc07ae0351e9397b0222b7ba), + location: "src/plumbing-renamed/options/mod.rs", + relation: Some( + ChildOfParent( + 1, + ), + ), + copy: false, + }, + Rewrite { + source_location: "src/plumbing/mod.rs", + source_entry_mode: EntryMode( + 33188, + ), + source_relation: Some( + ChildOfParent( + 2, + ), + ), + source_id: Sha1(0cfbf08886fca9a91cb753ec8734c84fcbe52c9f), + diff: None, + entry_mode: EntryMode( + 33188, + ), + id: Sha1(0cfbf08886fca9a91cb753ec8734c84fcbe52c9f), + location: "src/plumbing-renamed/mod.rs", + relation: Some( + ChildOfParent( + 1, + ), + ), + copy: false, + }, + Rewrite { + source_location: "src/plumbing/main.rs", + source_entry_mode: EntryMode( + 33188, + ), + source_relation: Some( + ChildOfParent( + 2, + ), + ), + source_id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + diff: None, + entry_mode: EntryMode( + 33188, + ), + id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + location: "src/plumbing-renamed/main.rs", + relation: Some( + ChildOfParent( + 1, + ), + ), + copy: false, + }, + Rewrite { + source_location: "src/plumbing", + source_entry_mode: EntryMode( + 16384, + ), + source_relation: Some( + Parent( + 2, + ), + ), + source_id: Sha1(b9d41dcdbd92fcab2fb6594d04f2ad99b3472621), + diff: None, + entry_mode: EntryMode( + 16384, + ), + id: Sha1(202702465d7bb291153629dc2e8b353afe9cbdae), + location: "src/plumbing-renamed", + relation: Some( + Parent( + 1, + ), + ), + copy: false, + }, + ] + "#); + + let out = out.expect("tracking enabled"); + assert_eq!( + out.num_similarity_checks, 0, + "similarity checks disabled, and not necessary" + ); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + Ok(()) +} + mod util { use gix_diff::rewrites; use gix_object::{FindExt, TreeRefIter}; diff --git a/gix-diff/tests/fixtures/generated-archives/make_diff_for_rewrites_repo.tar b/gix-diff/tests/fixtures/generated-archives/make_diff_for_rewrites_repo.tar index ecc1fadbcc9..878a4113ef7 100644 Binary files a/gix-diff/tests/fixtures/generated-archives/make_diff_for_rewrites_repo.tar and b/gix-diff/tests/fixtures/generated-archives/make_diff_for_rewrites_repo.tar differ diff --git a/gix-diff/tests/fixtures/make_diff_for_rewrites_repo.sh b/gix-diff/tests/fixtures/make_diff_for_rewrites_repo.sh index 5d63c067de6..a03188ed553 100644 --- a/gix-diff/tests/fixtures/make_diff_for_rewrites_repo.sh +++ b/gix-diff/tests/fixtures/make_diff_for_rewrites_repo.sh @@ -794,4 +794,16 @@ git -c diff.renames=1 show HEAD~2 > baseline-2.with-renames git -c diff.renames=0 show HEAD~4 > baseline.no-renames git -c diff.renames=1 show HEAD~4 > baseline.with-renames +echo 1 >src/plumbing/main.rs +echo 2 >src/plumbing/mod.rs +echo 3 >src/plumbing/options.rs +git add src/plumbing && git commit -m "r4-base" +store_tree "r4-base" + +mkdir src/plumbing/options +git mv src/plumbing/options.rs src/plumbing/options/mod.rs +git mv src/plumbing src/plumbing-renamed +git commit -m "r4-dir-rename-non-identity" +store_tree "r4-dir-rename-non-identity" + mv ../*.tree . \ No newline at end of file diff --git a/gix-discover/src/lib.rs b/gix-discover/src/lib.rs index df0ca849489..939928f829a 100644 --- a/gix-discover/src/lib.rs +++ b/gix-discover/src/lib.rs @@ -39,7 +39,7 @@ pub mod is_git { Metadata { source: std::io::Error, path: PathBuf }, #[error("The repository's config file doesn't exist or didn't have a 'bare' configuration or contained core.worktree without value")] Inconclusive, - #[error("Could not obtain current directory when conforming repository path")] + #[error("Could not obtain current directory for resolving the '.' repository path")] CurrentDir(#[from] std::io::Error), } } diff --git a/gix-merge/Cargo.toml b/gix-merge/Cargo.toml index 92a6fb1c2d5..1070e979516 100644 --- a/gix-merge/Cargo.toml +++ b/gix-merge/Cargo.toml @@ -15,26 +15,26 @@ workspace = true doctest = false [features] -default = ["blob"] -## Enable diffing of blobs using imara-diff, which also allows for a generic rewrite tracking implementation. -blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command", "dep:gix-tempfile", "dep:gix-trace", "dep:gix-quote"] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"] [dependencies] gix-hash = { version = "^0.15.0", path = "../gix-hash" } gix-object = { version = "^0.45.0", path = "../gix-object" } -gix-filter = { version = "^0.14.0", path = "../gix-filter", optional = true } -gix-worktree = { version = "^0.37.0", path = "../gix-worktree", default-features = false, features = ["attributes"], optional = true } -gix-command = { version = "^0.3.10", path = "../gix-command", optional = true } -gix-path = { version = "^0.10.12", path = "../gix-path", optional = true } -gix-fs = { version = "^0.12.0", path = "../gix-fs", optional = true } -gix-tempfile = { version = "^15.0.0", path = "../gix-tempfile", optional = true } -gix-trace = { version = "^0.1.11", path = "../gix-trace", optional = true } -gix-quote = { version = "^0.4.13", path = "../gix-quote", optional = true } +gix-filter = { version = "^0.14.0", path = "../gix-filter" } +gix-worktree = { version = "^0.37.0", path = "../gix-worktree", default-features = false, features = ["attributes"] } +gix-command = { version = "^0.3.10", path = "../gix-command" } +gix-path = { version = "^0.10.12", path = "../gix-path" } +gix-fs = { version = "^0.12.0", path = "../gix-fs" } +gix-tempfile = { version = "^15.0.0", path = "../gix-tempfile" } +gix-trace = { version = "^0.1.11", path = "../gix-trace" } +gix-quote = { version = "^0.4.13", path = "../gix-quote" } +gix-revision = { version = "^0.30.0", path = "../gix-revision", default-features = false, features = ["merge_base"] } +gix-revwalk = { version = "^0.16.0", path = "../gix-revwalk" } +gix-diff = { version = "^0.47.0", path = "../gix-diff", default-features = false, features = ["blob"] } thiserror = "1.0.63" -imara-diff = { version = "0.1.7", optional = true } +imara-diff = { version = "0.1.7" } bstr = { version = "1.5.0", default-features = false } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } @@ -42,6 +42,9 @@ document-features = { version = "0.2.0", optional = true } [dev-dependencies] gix-testtools = { path = "../tests/tools" } +gix-odb = { path = "../gix-odb" } +gix-utils = { version = "^0.1.12", path = "../gix-utils" } +termtree = "0.5.1" pretty_assertions = "1.4.0" [package.metadata.docs.rs] diff --git a/gix-merge/fuzz/.gitignore b/gix-merge/fuzz/.gitignore new file mode 100644 index 00000000000..94e6cc53131 --- /dev/null +++ b/gix-merge/fuzz/.gitignore @@ -0,0 +1,7 @@ +target +corpus +artifacts + +# These usually involve a lot of local CPU time, keep them. +$artifacts +$corpus diff --git a/gix-merge/fuzz/Cargo.toml b/gix-merge/fuzz/Cargo.toml new file mode 100644 index 00000000000..881b91cb8fd --- /dev/null +++ b/gix-merge/fuzz/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "gix-merge-fuzz" +version = "0.0.0" +authors = ["Automatically generated"] +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +anyhow = "1.0.76" +libfuzzer-sys = "0.4" +arbitrary = { version = "1.3.2", features = ["derive"] } +imara-diff = { version = "0.1.7" } +gix-merge = { path = ".." } + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "blob" +path = "fuzz_targets/blob.rs" +test = false +doc = false diff --git a/gix-merge/fuzz/fuzz_targets/blob.rs b/gix-merge/fuzz/fuzz_targets/blob.rs new file mode 100644 index 00000000000..e96979a3b2a --- /dev/null +++ b/gix-merge/fuzz/fuzz_targets/blob.rs @@ -0,0 +1,80 @@ +#![no_main] +use anyhow::Result; +use arbitrary::Arbitrary; +use gix_merge::blob::builtin_driver::text::{Conflict, ConflictStyle, Options}; +use gix_merge::blob::Resolution; +use libfuzzer_sys::fuzz_target; +use std::hint::black_box; +use std::num::NonZero; + +fn fuzz_text_merge( + Ctx { + base, + ours, + theirs, + marker_size, + }: Ctx, +) -> Result<()> { + let mut buf = Vec::new(); + let mut input = imara_diff::intern::InternedInput::default(); + for diff_algorithm in [ + imara_diff::Algorithm::Histogram, + imara_diff::Algorithm::Myers, + imara_diff::Algorithm::MyersMinimal, + ] { + let mut opts = Options { + diff_algorithm, + conflict: Default::default(), + }; + for (left, right) in [(ours, theirs), (theirs, ours)] { + let resolution = gix_merge::blob::builtin_driver::text( + &mut buf, + &mut input, + Default::default(), + left, + base, + right, + opts, + ); + if resolution == Resolution::Conflict { + for conflict in [ + Conflict::ResolveWithOurs, + Conflict::ResolveWithTheirs, + Conflict::ResolveWithUnion, + Conflict::Keep { + style: ConflictStyle::Diff3, + marker_size, + }, + Conflict::Keep { + style: ConflictStyle::ZealousDiff3, + marker_size, + }, + ] { + opts.conflict = conflict; + gix_merge::blob::builtin_driver::text( + &mut buf, + &mut input, + Default::default(), + left, + base, + right, + opts, + ); + } + } + } + } + Ok(()) +} + +#[derive(Debug, Arbitrary)] +struct Ctx<'a> { + base: &'a [u8], + ours: &'a [u8], + theirs: &'a [u8], + marker_size: NonZero, +} + +fuzz_target!(|ctx: Ctx<'_>| { + _ = black_box(fuzz_text_merge(ctx)); +}); diff --git a/gix-merge/src/blob/builtin_driver/text/function.rs b/gix-merge/src/blob/builtin_driver/text/function.rs index bb800ce47e2..2b0a2e7522c 100644 --- a/gix-merge/src/blob/builtin_driver/text/function.rs +++ b/gix-merge/src/blob/builtin_driver/text/function.rs @@ -5,6 +5,7 @@ use crate::blob::builtin_driver::text::utils::{ }; use crate::blob::builtin_driver::text::{Conflict, ConflictStyle, Labels, Options}; use crate::blob::Resolution; +use std::ops::Range; /// Merge `current` and `other` with `ancestor` as base according to `opts`. /// @@ -36,7 +37,7 @@ pub fn merge<'a>( input.update_before(tokens(ancestor)); input.update_after(tokens(current)); - let current_hunks = imara_diff::diff( + let hunks = imara_diff::diff( opts.diff_algorithm, input, CollectHunks { @@ -53,187 +54,197 @@ pub fn merge<'a>( input, CollectHunks { side: Side::Other, - hunks: current_hunks, + hunks, }, ); + if hunks.is_empty() { + write_ancestor(input, 0, input.before.len(), out); + return Resolution::Complete; + } + hunks.sort_by(|a, b| a.before.start.cmp(&b.before.start)); let mut hunks = hunks.into_iter().peekable(); let mut intersecting = Vec::new(); let mut ancestor_integrated_until = 0; let mut resolution = Resolution::Complete; - let mut filled_hunks = Vec::with_capacity(2); - while let Some(hunk) = hunks.next() { - if take_intersecting(&hunk, &mut hunks, &mut intersecting) { - fill_ancestor(&hunk.before, &mut intersecting); + let mut current_hunks = Vec::with_capacity(2); + while take_intersecting(&mut hunks, &mut current_hunks, &mut intersecting).is_some() { + if intersecting.is_empty() { + let hunk = current_hunks.pop().expect("always pushed during intersection check"); + write_ancestor(input, ancestor_integrated_until, hunk.before.start as usize, out); + ancestor_integrated_until = hunk.before.end; + write_hunks(std::slice::from_ref(&hunk), input, ¤t_tokens, out); + continue; + } - let filled_hunks_side = hunk.side; - filled_hunks.clear(); - filled_hunks.push(hunk); - fill_ancestor( - &intersecting - .first() - .zip(intersecting.last()) - .map(|(f, l)| f.before.start..l.before.end) - .expect("at least one entry"), - &mut filled_hunks, - ); - match opts.conflict { - Conflict::Keep { style, marker_size } => { - let (hunks_front_and_back, num_hunks_front) = match style { - ConflictStyle::Merge | ConflictStyle::ZealousDiff3 => { - zealously_contract_hunks(&mut filled_hunks, &mut intersecting, input, ¤t_tokens) - } - ConflictStyle::Diff3 => (Vec::new(), 0), - }; - let (our_hunks, their_hunks) = match filled_hunks_side { - Side::Current => (&filled_hunks, &intersecting), - Side::Other => (&intersecting, &filled_hunks), - Side::Ancestor => { - unreachable!("initial hunks are never ancestors") + let filled_hunks_side = current_hunks.first().expect("at least one hunk").side; + { + let filled_hunks_range = before_range_from_hunks(¤t_hunks); + let intersecting_range = before_range_from_hunks(&intersecting); + let extended_range = filled_hunks_range.start..intersecting_range.end.max(filled_hunks_range.end); + fill_ancestor(&extended_range, &mut current_hunks); + fill_ancestor(&extended_range, &mut intersecting); + } + match opts.conflict { + Conflict::Keep { style, marker_size } => { + let marker_size = marker_size.get(); + let (hunks_front_and_back, num_hunks_front) = match style { + ConflictStyle::Merge | ConflictStyle::ZealousDiff3 => { + zealously_contract_hunks(&mut current_hunks, &mut intersecting, input, ¤t_tokens) + } + ConflictStyle::Diff3 => (Vec::new(), 0), + }; + let (our_hunks, their_hunks) = match filled_hunks_side { + Side::Current => (¤t_hunks, &intersecting), + Side::Other => (&intersecting, ¤t_hunks), + Side::Ancestor => { + unreachable!("initial hunks are never ancestors") + } + }; + let (front_hunks, back_hunks) = hunks_front_and_back.split_at(num_hunks_front); + let first_hunk = first_hunk(front_hunks, our_hunks, their_hunks, back_hunks); + let last_hunk = last_hunk(front_hunks, our_hunks, their_hunks, back_hunks); + write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); + write_hunks(front_hunks, input, ¤t_tokens, out); + if their_hunks.is_empty() { + write_hunks(our_hunks, input, ¤t_tokens, out); + } else if our_hunks.is_empty() { + write_hunks(their_hunks, input, ¤t_tokens, out); + } else { + // DEVIATION: this makes tests (mostly) pass, but probably is very different from what Git does. + let hunk_storage; + let nl = detect_line_ending( + if front_hunks.is_empty() { + hunk_storage = Hunk { + before: ancestor_integrated_until..first_hunk.before.start, + after: Default::default(), + side: Side::Ancestor, + }; + std::slice::from_ref(&hunk_storage) + } else { + front_hunks + }, + input, + ¤t_tokens, + ) + .or_else(|| detect_line_ending(our_hunks, input, ¤t_tokens)) + .unwrap_or(b"\n".into()); + match style { + ConflictStyle::Merge => { + if contains_lines(our_hunks) || contains_lines(their_hunks) { + resolution = Resolution::Conflict; + write_conflict_marker(out, b'<', current_label, marker_size, nl); + write_hunks(our_hunks, input, ¤t_tokens, out); + write_conflict_marker(out, b'=', None, marker_size, nl); + write_hunks(their_hunks, input, ¤t_tokens, out); + write_conflict_marker(out, b'>', other_label, marker_size, nl); + } } - }; - let (front_hunks, back_hunks) = hunks_front_and_back.split_at(num_hunks_front); - let first_hunk = front_hunks - .first() - .or(our_hunks.first()) - .expect("at least one hunk to write"); - let last_hunk = back_hunks - .last() - .or(their_hunks.last()) - .or(our_hunks.last()) - .or(front_hunks.last()) - .expect("at least one hunk"); - write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); - - write_hunks(front_hunks, input, ¤t_tokens, out); - if their_hunks.is_empty() { - write_hunks(our_hunks, input, ¤t_tokens, out); - } else if our_hunks.is_empty() { - // TODO: assure we run into this - currently no test triggers this. Can this happen at all? - write_hunks(their_hunks, input, ¤t_tokens, out); - } else { - // DEVIATION: this makes tests (mostly) pass, but probably is very different from what Git does. - let hunk_storage; - let nl = detect_line_ending( - if front_hunks.is_empty() { - hunk_storage = Hunk { - before: ancestor_integrated_until..first_hunk.before.start, - after: Default::default(), - side: Side::Ancestor, - }; - std::slice::from_ref(&hunk_storage) - } else { - front_hunks - }, - input, - ¤t_tokens, - ) - .or_else(|| detect_line_ending(our_hunks, input, ¤t_tokens)) - .unwrap_or(b"\n".into()); - match style { - ConflictStyle::Merge => { - if contains_lines(our_hunks) || contains_lines(their_hunks) { + ConflictStyle::Diff3 | ConflictStyle::ZealousDiff3 => { + if contains_lines(our_hunks) || contains_lines(their_hunks) { + if hunks_differ_in_diff3(style, our_hunks, their_hunks, input, ¤t_tokens) { resolution = Resolution::Conflict; write_conflict_marker(out, b'<', current_label, marker_size, nl); write_hunks(our_hunks, input, ¤t_tokens, out); + let ancestor_hunk = Hunk { + before: first_hunk.before.start..last_hunk.before.end, + after: Default::default(), + side: Side::Ancestor, + }; + let ancestor_hunk = std::slice::from_ref(&ancestor_hunk); + let ancestor_nl = detect_line_ending_or_nl(ancestor_hunk, input, ¤t_tokens); + write_conflict_marker(out, b'|', ancestor_label, marker_size, ancestor_nl); + write_hunks(ancestor_hunk, input, ¤t_tokens, out); write_conflict_marker(out, b'=', None, marker_size, nl); write_hunks(their_hunks, input, ¤t_tokens, out); write_conflict_marker(out, b'>', other_label, marker_size, nl); - } - } - ConflictStyle::Diff3 | ConflictStyle::ZealousDiff3 => { - if contains_lines(our_hunks) || contains_lines(their_hunks) { - if hunks_differ_in_diff3(style, our_hunks, their_hunks, input, ¤t_tokens) { - resolution = Resolution::Conflict; - write_conflict_marker(out, b'<', current_label, marker_size, nl); - write_hunks(our_hunks, input, ¤t_tokens, out); - let ancestor_hunk = Hunk { - before: first_hunk.before.start..last_hunk.before.end, - after: Default::default(), - side: Side::Ancestor, - }; - let ancestor_hunk = std::slice::from_ref(&ancestor_hunk); - let ancestor_nl = - detect_line_ending_or_nl(ancestor_hunk, input, ¤t_tokens); - write_conflict_marker(out, b'|', ancestor_label, marker_size, ancestor_nl); - write_hunks(ancestor_hunk, input, ¤t_tokens, out); - write_conflict_marker(out, b'=', None, marker_size, nl); - write_hunks(their_hunks, input, ¤t_tokens, out); - write_conflict_marker(out, b'>', other_label, marker_size, nl); - } else { - write_hunks(our_hunks, input, ¤t_tokens, out); - } + } else { + write_hunks(our_hunks, input, ¤t_tokens, out); } } } } - write_hunks(back_hunks, input, ¤t_tokens, out); - ancestor_integrated_until = last_hunk.before.end; } - Conflict::ResolveWithOurs | Conflict::ResolveWithTheirs => { - let (our_hunks, their_hunks) = match filled_hunks_side { - Side::Current => (&filled_hunks, &intersecting), - Side::Other => (&intersecting, &filled_hunks), - Side::Ancestor => { - unreachable!("initial hunks are never ancestors") - } - }; - let hunks_to_write = if opts.conflict == Conflict::ResolveWithOurs { - our_hunks - } else { - their_hunks - }; - if let Some(first_hunk) = hunks_to_write.first() { - write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); - } - write_hunks(hunks_to_write, input, ¤t_tokens, out); - if let Some(last_hunk) = hunks_to_write.last() { - ancestor_integrated_until = last_hunk.before.end; + write_hunks(back_hunks, input, ¤t_tokens, out); + ancestor_integrated_until = last_hunk.before.end; + } + Conflict::ResolveWithOurs | Conflict::ResolveWithTheirs => { + let (our_hunks, their_hunks) = match filled_hunks_side { + Side::Current => (¤t_hunks, &intersecting), + Side::Other => (&intersecting, ¤t_hunks), + Side::Ancestor => { + unreachable!("initial hunks are never ancestors") } + }; + let hunks_to_write = if opts.conflict == Conflict::ResolveWithOurs { + our_hunks + } else { + their_hunks + }; + if let Some(first_hunk) = hunks_to_write.first() { + write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); + } + write_hunks(hunks_to_write, input, ¤t_tokens, out); + if let Some(last_hunk) = hunks_to_write.last() { + ancestor_integrated_until = last_hunk.before.end; } - Conflict::ResolveWithUnion => { - let (hunks_front_and_back, num_hunks_front) = - zealously_contract_hunks(&mut filled_hunks, &mut intersecting, input, ¤t_tokens); + } + Conflict::ResolveWithUnion => { + let (hunks_front_and_back, num_hunks_front) = + zealously_contract_hunks(&mut current_hunks, &mut intersecting, input, ¤t_tokens); - let (our_hunks, their_hunks) = match filled_hunks_side { - Side::Current => (&filled_hunks, &intersecting), - Side::Other => (&intersecting, &filled_hunks), - Side::Ancestor => { - unreachable!("initial hunks are never ancestors") - } - }; - let (front_hunks, back_hunks) = hunks_front_and_back.split_at(num_hunks_front); - let first_hunk = front_hunks - .first() - .or(our_hunks.first()) - .expect("at least one hunk to write"); - write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); - write_hunks(front_hunks, input, ¤t_tokens, out); - assure_ends_with_nl(out, detect_line_ending_or_nl(front_hunks, input, ¤t_tokens)); - write_hunks(our_hunks, input, ¤t_tokens, out); - assure_ends_with_nl(out, detect_line_ending_or_nl(our_hunks, input, ¤t_tokens)); - write_hunks(their_hunks, input, ¤t_tokens, out); - if !back_hunks.is_empty() { - assure_ends_with_nl(out, detect_line_ending_or_nl(their_hunks, input, ¤t_tokens)); + let (our_hunks, their_hunks) = match filled_hunks_side { + Side::Current => (¤t_hunks, &intersecting), + Side::Other => (&intersecting, ¤t_hunks), + Side::Ancestor => { + unreachable!("initial hunks are never ancestors") } - write_hunks(back_hunks, input, ¤t_tokens, out); - let last_hunk = back_hunks - .last() - .or(their_hunks.last()) - .or(our_hunks.last()) - .or(front_hunks.last()) - .expect("at least one hunk"); - ancestor_integrated_until = last_hunk.before.end; + }; + let (front_hunks, back_hunks) = hunks_front_and_back.split_at(num_hunks_front); + let first_hunk = first_hunk(front_hunks, our_hunks, their_hunks, back_hunks); + write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); + write_hunks(front_hunks, input, ¤t_tokens, out); + assure_ends_with_nl(out, detect_line_ending_or_nl(front_hunks, input, ¤t_tokens)); + write_hunks(our_hunks, input, ¤t_tokens, out); + assure_ends_with_nl(out, detect_line_ending_or_nl(our_hunks, input, ¤t_tokens)); + write_hunks(their_hunks, input, ¤t_tokens, out); + if !back_hunks.is_empty() { + assure_ends_with_nl(out, detect_line_ending_or_nl(their_hunks, input, ¤t_tokens)); } + write_hunks(back_hunks, input, ¤t_tokens, out); + let last_hunk = last_hunk(front_hunks, our_hunks, their_hunks, back_hunks); + ancestor_integrated_until = last_hunk.before.end; } - } else { - write_ancestor(input, ancestor_integrated_until, hunk.before.start as usize, out); - ancestor_integrated_until = hunk.before.end; - write_hunks(std::slice::from_ref(&hunk), input, ¤t_tokens, out); } } write_ancestor(input, ancestor_integrated_until, input.before.len(), out); resolution } + +fn first_hunk<'a>(front: &'a [Hunk], ours: &'a [Hunk], theirs: &'a [Hunk], back: &'a [Hunk]) -> &'a Hunk { + front + .first() + .or(ours.first()) + .or(theirs.first()) + .or(back.first()) + .expect("at least one hunk - we aborted if there are none anywhere") +} + +/// Note that last-hunk could be [`first_hunk()`], so the hunk must only be used accordingly. +fn last_hunk<'a>(front: &'a [Hunk], ours: &'a [Hunk], theirs: &'a [Hunk], back: &'a [Hunk]) -> &'a Hunk { + back.last() + .or(theirs.last()) + .or(ours.last()) + .or(front.last()) + .expect("at least one hunk - we aborted if there are none anywhere") +} + +fn before_range_from_hunks(hunks: &[Hunk]) -> Range { + hunks + .first() + .zip(hunks.last()) + .map(|(f, l)| f.before.start..l.before.end) + .expect("at least one entry") +} diff --git a/gix-merge/src/blob/builtin_driver/text/mod.rs b/gix-merge/src/blob/builtin_driver/text/mod.rs index 6c5a432782d..a3b38048abd 100644 --- a/gix-merge/src/blob/builtin_driver/text/mod.rs +++ b/gix-merge/src/blob/builtin_driver/text/mod.rs @@ -1,4 +1,5 @@ use bstr::BStr; +use std::num::NonZeroU8; /// The way the built-in [text driver](crate::blob::BuiltinDriver::Text) will express /// merge conflicts in the resulting file. @@ -87,7 +88,7 @@ pub enum Conflict { /// How to visualize conflicts in merged files. style: ConflictStyle, /// The amount of markers to draw, defaults to 7, i.e. `<<<<<<<` - marker_size: usize, + marker_size: NonZeroU8, }, /// Chose our side to resolve a conflict. ResolveWithOurs, @@ -99,12 +100,13 @@ pub enum Conflict { impl Conflict { /// The amount of conflict marker characters to print by default. - pub const DEFAULT_MARKER_SIZE: usize = 7; + // TODO: use NonZeroU8::new().unwrap() here once the MSRV supports it. + pub const DEFAULT_MARKER_SIZE: u8 = 7; /// The amount of conflict markers to print if this instance contains them, or `None` otherwise - pub fn marker_size(&self) -> Option { + pub fn marker_size(&self) -> Option { match self { - Conflict::Keep { marker_size, .. } => Some(*marker_size), + Conflict::Keep { marker_size, .. } => Some(marker_size.get()), Conflict::ResolveWithOurs | Conflict::ResolveWithTheirs | Conflict::ResolveWithUnion => None, } } @@ -114,7 +116,7 @@ impl Default for Conflict { fn default() -> Self { Conflict::Keep { style: Default::default(), - marker_size: Conflict::DEFAULT_MARKER_SIZE, + marker_size: Conflict::DEFAULT_MARKER_SIZE.try_into().unwrap(), } } } diff --git a/gix-merge/src/blob/builtin_driver/text/utils.rs b/gix-merge/src/blob/builtin_driver/text/utils.rs index 1aab3e47f08..877f2693145 100644 --- a/gix-merge/src/blob/builtin_driver/text/utils.rs +++ b/gix-merge/src/blob/builtin_driver/text/utils.rs @@ -93,9 +93,9 @@ pub fn assure_ends_with_nl(out: &mut Vec, nl: &BStr) { } } -pub fn write_conflict_marker(out: &mut Vec, marker: u8, label: Option<&BStr>, marker_size: usize, nl: &BStr) { +pub fn write_conflict_marker(out: &mut Vec, marker: u8, label: Option<&BStr>, marker_size: u8, nl: &BStr) { assure_ends_with_nl(out, nl); - out.extend(std::iter::repeat(marker).take(marker_size)); + out.extend(std::iter::repeat(marker).take(marker_size as usize)); if let Some(label) = label { out.push(b' '); out.extend_from_slice(label); @@ -132,8 +132,8 @@ pub fn fill_ancestor(Range { start, end }: &Range, in_out: &mut Vec) for (idx, next_idx) in (first_idx..in_out.len()).map(|idx| (idx, idx + 1)) { let Some(next_hunk) = in_out.get(next_idx) else { break }; let hunk = &in_out[idx]; - if let Some(lines_to_add) = next_hunk.after.start.checked_sub(hunk.after.end).filter(is_nonzero) { - in_out.push(ancestor_hunk(hunk.after.end, lines_to_add)); + if let Some(lines_to_add) = next_hunk.before.start.checked_sub(hunk.before.end).filter(is_nonzero) { + in_out.push(ancestor_hunk(hunk.before.end, lines_to_add)); added_hunks = true; } } @@ -414,21 +414,46 @@ fn write_tokens( } /// Find all hunks in `iter` which aren't from the same side as `hunk` and intersect with it. -/// Return `true` if `out` is non-empty after the operation, indicating overlapping hunks were found. -pub fn take_intersecting(hunk: &Hunk, iter: &mut Peekable>, out: &mut Vec) -> bool { - out.clear(); - while iter - .peek() - .filter(|b_hunk| { - b_hunk.side != hunk.side - && (hunk.before.contains(&b_hunk.before.start) - || (hunk.before.is_empty() && hunk.before.start == b_hunk.before.start)) - }) - .is_some() - { - out.extend(iter.next()); +/// Also put `hunk` into `input` so it's the first item, and possibly put more hunks of the side of `hunk` so +/// `iter` doesn't have any overlapping hunks left. +/// Return `true` if `intersecting` is non-empty after the operation, indicating overlapping hunks were found. +pub fn take_intersecting( + iter: &mut Peekable>, + input: &mut Vec, + intersecting: &mut Vec, +) -> Option<()> { + input.clear(); + input.push(iter.next()?); + intersecting.clear(); + + fn left_overlaps_right(left: &Hunk, right: &Hunk) -> bool { + left.side != right.side + && (right.before.contains(&left.before.start) + || (right.before.is_empty() && right.before.start == left.before.start)) + } + + loop { + let hunk = input.last().expect("just pushed"); + while iter.peek().filter(|b_hunk| left_overlaps_right(b_hunk, hunk)).is_some() { + intersecting.extend(iter.next()); + } + // The hunks that overlap might themselves overlap with a following hunk of the other side. + // If so, split it so it doesn't overlap anymore. + let mut found_more_intersections = false; + while intersecting + .last_mut() + .zip(iter.peek_mut()) + .filter(|(last_intersecting, candidate)| left_overlaps_right(candidate, last_intersecting)) + .is_some() + { + input.extend(iter.next()); + found_more_intersections = true; + } + if !found_more_intersections { + break; + } } - !out.is_empty() + Some(()) } pub fn tokens(input: &[u8]) -> imara_diff::sources::ByteLines<'_, true> { diff --git a/gix-merge/src/blob/platform/merge.rs b/gix-merge/src/blob/platform/merge.rs index cd803006268..460c24ad85b 100644 --- a/gix-merge/src/blob/platform/merge.rs +++ b/gix-merge/src/blob/platform/merge.rs @@ -7,6 +7,7 @@ use std::path::PathBuf; pub struct Options { /// If `true`, the resources being merged are contained in a virtual ancestor, /// which is the case when merge bases are merged into one. + /// This flag affects the choice of merge drivers. pub is_virtual_ancestor: bool, /// Determine how to resolve conflicts. If `None`, no conflict resolution is possible, and it picks a side. pub resolve_binary_with: Option, @@ -18,8 +19,6 @@ pub struct Options { #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { - #[error("At least one resource was too large to be processed")] - ResourceTooLarge, #[error(transparent)] PrepareExternalDriver(#[from] inner::prepare_external_driver::Error), #[error("Failed to launch external merge driver: {cmd}")] @@ -268,6 +267,8 @@ pub(super) mod inner { /// pub mod builtin_merge { + use crate::blob::platform::resource; + use crate::blob::platform::resource::Data; use crate::blob::{builtin_driver, BuiltinDriver, PlatformRef, Resolution}; /// An identifier to tell us how a merge conflict was resolved by [builtin_merge](PlatformRef::builtin_merge). @@ -299,24 +300,29 @@ pub(super) mod inner { /// Returns `None` if one of the buffers is too large, making a merge impossible. /// Note that if the *pick* wasn't [`Pick::Buffer`], then `out` will not have been cleared, /// and one has to take the data from the respective resource. + /// + /// If there is no buffer loaded as the resource is too big, we will automatically perform a binary merge + /// which effectively chooses our side by default. pub fn builtin_merge( &self, driver: BuiltinDriver, out: &mut Vec, input: &mut imara_diff::intern::InternedInput<&'parent [u8]>, labels: builtin_driver::text::Labels<'_>, - ) -> Option<(Pick, Resolution)> { - let base = self.ancestor.data.as_slice()?; - let ours = self.current.data.as_slice()?; - let theirs = self.other.data.as_slice()?; + ) -> (Pick, Resolution) { + let base = self.ancestor.data.as_slice().unwrap_or_default(); + let ours = self.current.data.as_slice().unwrap_or_default(); + let theirs = self.other.data.as_slice().unwrap_or_default(); let driver = if driver != BuiltinDriver::Binary - && (is_binary_buf(ours) || is_binary_buf(theirs) || is_binary_buf(base)) + && (is_binary_buf(self.ancestor.data) + || is_binary_buf(self.other.data) + || is_binary_buf(self.current.data)) { BuiltinDriver::Binary } else { driver }; - Some(match driver { + match driver { BuiltinDriver::Text => { let resolution = builtin_driver::text(out, input, labels, ours, base, theirs, self.options.text); @@ -346,13 +352,19 @@ pub(super) mod inner { ); (Pick::Buffer, resolution) } - }) + } } } - fn is_binary_buf(buf: &[u8]) -> bool { - let buf = &buf[..buf.len().min(8000)]; - buf.contains(&0) + fn is_binary_buf(data: resource::Data<'_>) -> bool { + match data { + Data::Missing => false, + Data::Buffer(buf) => { + let buf = &buf[..buf.len().min(8000)]; + buf.contains(&0) + } + Data::TooLarge { .. } => true, + } } } } @@ -374,15 +386,11 @@ impl<'parent> PlatformRef<'parent> { &self, out: &mut Vec, labels: builtin_driver::text::Labels<'_>, - context: gix_command::Context, + context: &gix_command::Context, ) -> Result<(inner::builtin_merge::Pick, Resolution), Error> { - let _span = gix_trace::coarse!( - "gix_merge::blob::PlatformRef::merge()", - current_rela_path = %self.current.rela_path - ); match self.configured_driver() { Ok(driver) => { - let mut cmd = self.prepare_external_driver(driver.command.clone(), labels, context)?; + let mut cmd = self.prepare_external_driver(driver.command.clone(), labels, context.clone())?; let status = cmd.status().map_err(|err| Error::SpawnExternalDriver { cmd: format!("{:?}", cmd.cmd), source: err, @@ -400,22 +408,52 @@ impl<'parent> PlatformRef<'parent> { Err(builtin) => { let mut input = imara_diff::intern::InternedInput::new(&[][..], &[]); out.clear(); - let (pick, resolution) = self - .builtin_merge(builtin, out, &mut input, labels) - .ok_or(Error::ResourceTooLarge)?; + let (pick, resolution) = self.builtin_merge(builtin, out, &mut input, labels); Ok((pick, resolution)) } } } /// Using a `pick` obtained from [`merge()`](Self::merge), obtain the respective buffer suitable for reading or copying. - /// Return `None` if the buffer is too large, or if the `pick` corresponds to a buffer (that was written separately). - pub fn buffer_by_pick(&self, pick: inner::builtin_merge::Pick) -> Option<&'parent [u8]> { + /// Return `Ok(None)` if the `pick` corresponds to a buffer (that was written separately). + /// Return `Err(())` if the buffer is *too large*, so it was never read. + #[allow(clippy::result_unit_err)] + pub fn buffer_by_pick(&self, pick: inner::builtin_merge::Pick) -> Result, ()> { match pick { - inner::builtin_merge::Pick::Ancestor => self.ancestor.data.as_slice(), - inner::builtin_merge::Pick::Ours => self.current.data.as_slice(), - inner::builtin_merge::Pick::Theirs => self.other.data.as_slice(), - inner::builtin_merge::Pick::Buffer => None, + inner::builtin_merge::Pick::Ancestor => self.ancestor.data.as_slice().map(Some).ok_or(()), + inner::builtin_merge::Pick::Ours => self.current.data.as_slice().map(Some).ok_or(()), + inner::builtin_merge::Pick::Theirs => self.other.data.as_slice().map(Some).ok_or(()), + inner::builtin_merge::Pick::Buffer => Ok(None), + } + } + + /// Use `pick` to return the object id of the merged result, assuming that `buf` was passed as `out` to [merge()](Self::merge). + /// In case of binary or large files, this will simply be the existing ID of the resource. + /// In case of resources available in the object DB for binary merges, the object ID will be returned. + /// If new content was produced due to a content merge, `buf` will be written out + /// to the object database using `write_blob`. + /// Beware that the returned ID could be `Ok(None)` if the underlying resource was loaded + /// from the worktree *and* was too large so it was never loaded from disk. + /// `Ok(None)` will also be returned if one of the resources was missing. + /// `write_blob()` is used to turn buffers. + pub fn id_by_pick( + &self, + pick: inner::builtin_merge::Pick, + buf: &[u8], + mut write_blob: impl FnMut(&[u8]) -> Result, + ) -> Result, E> { + let field = match pick { + inner::builtin_merge::Pick::Ancestor => &self.ancestor, + inner::builtin_merge::Pick::Ours => &self.current, + inner::builtin_merge::Pick::Theirs => &self.other, + inner::builtin_merge::Pick::Buffer => return write_blob(buf).map(Some), + }; + use crate::blob::platform::resource::Data; + match field.data { + Data::TooLarge { .. } | Data::Missing if !field.id.is_null() => Ok(Some(field.id.to_owned())), + Data::TooLarge { .. } | Data::Missing => Ok(None), + Data::Buffer(buf) if field.id.is_null() => write_blob(buf).map(Some), + Data::Buffer(_) => Ok(Some(field.id.to_owned())), } } } diff --git a/gix-merge/src/blob/platform/mod.rs b/gix-merge/src/blob/platform/mod.rs index 14b33d03fd5..aa51102288e 100644 --- a/gix-merge/src/blob/platform/mod.rs +++ b/gix-merge/src/blob/platform/mod.rs @@ -95,7 +95,7 @@ impl Platform { attr_stack, attrs: { let mut out = attributes::search::Outcome::default(); - out.initialize_with_selection(&Default::default(), Some("merge")); + out.initialize_with_selection(&Default::default(), ["merge", "conflict-marker-size"]); out }, options, diff --git a/gix-merge/src/blob/platform/prepare_merge.rs b/gix-merge/src/blob/platform/prepare_merge.rs index 24ffb5af32e..310a5896637 100644 --- a/gix-merge/src/blob/platform/prepare_merge.rs +++ b/gix-merge/src/blob/platform/prepare_merge.rs @@ -1,7 +1,10 @@ +use crate::blob::builtin_driver::text::Conflict; use crate::blob::platform::{merge, DriverChoice, ResourceRef}; use crate::blob::{BuiltinDriver, Platform, PlatformRef, ResourceKind}; use bstr::{BStr, BString, ByteSlice}; use gix_filter::attributes; +use std::num::NonZeroU8; +use std::str::FromStr; /// The error returned by [Platform::prepare_merge_state()](Platform::prepare_merge()). #[derive(Debug, thiserror::Error)] @@ -45,12 +48,14 @@ impl Platform { rela_path: current.rela_path.clone(), })?; entry.matching_attributes(&mut self.attrs); - let attr = self.attrs.iter_selected().next().expect("pre-initialized with 'diff'"); - let mut driver = match attr.assignment.state { + let mut attrs = self.attrs.iter_selected(); + let merge_attr = attrs.next().expect("pre-initialized with 'merge'"); + let marker_size_attr = attrs.next().expect("pre-initialized with 'conflict-marker-size'"); + let mut driver = match merge_attr.assignment.state { attributes::StateRef::Set => DriverChoice::BuiltIn(BuiltinDriver::Text), attributes::StateRef::Unset => DriverChoice::BuiltIn(BuiltinDriver::Binary), attributes::StateRef::Value(_) | attributes::StateRef::Unspecified => { - let name = match attr.assignment.state { + let name = match merge_attr.assignment.state { attributes::StateRef::Value(name) => Some(name.as_bstr()), attributes::StateRef::Unspecified => { self.options.default_driver.as_ref().map(|name| name.as_bstr()) @@ -60,6 +65,17 @@ impl Platform { self.find_driver_by_name(name) } }; + if let attributes::StateRef::Value(value) = marker_size_attr.assignment.state { + if let Some(value) = u8::from_str(value.as_bstr().to_str_lossy().as_ref()) + .ok() + .and_then(NonZeroU8::new) + { + match &mut options.text.conflict { + Conflict::Keep { marker_size, .. } => *marker_size = value, + Conflict::ResolveWithOurs | Conflict::ResolveWithTheirs | Conflict::ResolveWithUnion => {} + } + } + } if let Some(recursive_driver_name) = match driver { DriverChoice::Index(idx) => self.drivers.get(idx), _ => None, diff --git a/gix-merge/src/blob/platform/set_resource.rs b/gix-merge/src/blob/platform/set_resource.rs index c3dae8ffd4d..75be7b65e07 100644 --- a/gix-merge/src/blob/platform/set_resource.rs +++ b/gix-merge/src/blob/platform/set_resource.rs @@ -34,7 +34,7 @@ impl Platform { /// If an `id` is known, as the hash of the object as (would) be stored in `git`, then it should be provided /// for completeness. Note that it's not expected to be in `objects` if `rela_path` is set and a worktree-root /// is available for `kind`. - /// * `mode` is the kind of object (only blobs and links are allowed) + /// * `mode` is the kind of object (only blobs and executables are allowed) /// * `rela_path` is the relative path as seen from the (work)tree root. /// * `kind` identifies the side of the merge this resource will be used for. /// * `objects` provides access to the object database in case the resource can't be read from a worktree. @@ -46,7 +46,6 @@ impl Platform { kind: ResourceKind, objects: &impl gix_object::FindObjectOrHeader, ) -> Result<(), Error> { - gix_trace::detail!("gix_merge::blob::Platform::set_resource()", %id, %rela_path); if !matches!( mode, gix_object::tree::EntryKind::Blob | gix_object::tree::EntryKind::BlobExecutable diff --git a/gix-merge/src/commit.rs b/gix-merge/src/commit.rs new file mode 100644 index 00000000000..94097ec7004 --- /dev/null +++ b/gix-merge/src/commit.rs @@ -0,0 +1,227 @@ +/// The error returned by [`commit()`](crate::commit()). +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error(transparent)] + MergeBase(#[from] gix_revision::merge_base::Error), + #[error(transparent)] + MergeTree(#[from] crate::tree::Error), + #[error("Failed to write tree for merged merge-base or virtual commit")] + WriteObject(gix_object::write::Error), + #[error("No common ancestor between {our_commit_id} and {their_commit_id}")] + NoMergeBase { + /// The commit on our side that was to be merged. + our_commit_id: gix_hash::ObjectId, + /// The commit on their side that was to be merged. + their_commit_id: gix_hash::ObjectId, + }, + #[error( + "Conflicts occurred when trying to resolve multiple merge-bases by merging them. This is most certainly a bug." + )] + VirtualMergeBaseConflict, + #[error("Could not find ancestor, our or their commit to extract tree from")] + FindCommit(#[from] gix_object::find::existing_object::Error), +} + +/// A way to configure [`commit()`](crate::commit()). +#[derive(Default, Debug, Clone)] +pub struct Options { + /// If `true`, merging unrelated commits is allowed, with the merge-base being assumed as empty tree. + pub allow_missing_merge_base: bool, + /// Options to define how trees should be merged. + pub tree_merge: crate::tree::Options, + /// If `true`, do not merge multiple merge-bases into one. Instead, just use the first one. + // TODO: test + #[doc(alias = "no_recursive", alias = "git2")] + pub use_first_merge_base: bool, +} + +/// The result of [`commit()`](crate::commit()). +#[derive(Clone)] +pub struct Outcome<'a> { + /// The outcome of the actual tree-merge. + pub tree_merge: crate::tree::Outcome<'a>, + /// The tree id of the base commit we used. This is either… + /// * the single merge-base we found + /// * the first of multiple merge-bases if [`use_first_merge_base`](Options::use_first_merge_base) was `true`. + /// * the merged tree of all merge-bases, which then isn't linked to an actual commit. + /// * an empty tree, if [`allow_missing_merge_base`](Options::allow_missing_merge_base) is enabled. + pub merge_base_tree_id: gix_hash::ObjectId, + /// The object ids of all the commits which were found to be merge-bases, or `None` if there was no merge-base. + pub merge_bases: Option>, + /// A list of virtual commits that were created to merge multiple merge-bases into one. + /// As they are not reachable by anything they will be garbage collected, but knowing them provides options. + pub virtual_merge_bases: Vec, +} + +pub(super) mod function { + use crate::blob::builtin_driver; + use crate::commit::{Error, Options}; + use crate::tree::UnresolvedConflict; + use gix_object::FindExt; + use std::borrow::Cow; + + /// Like [`tree()`](crate::tree()), but it takes only two commits, `our_commit` and `their_commit` to automatically + /// compute the merge-bases among them. + /// If there are multiple merge bases, these will be auto-merged into one, recursively, if + /// [`allow_missing_merge_base`](Options::allow_missing_merge_base) is `true`. + /// + /// `labels` are names where [`current`](crate::blob::builtin_driver::text::Labels::current) is a name for `our_commit` + /// and [`other`](crate::blob::builtin_driver::text::Labels::other) is a name for `their_commit`. + /// If [`ancestor`](crate::blob::builtin_driver::text::Labels::ancestor) is unset, it will be set by us based on the + /// merge-bases of `our_commit` and `their_commit`. + /// + /// The `graph` is used to find the merge-base between `our_commit` and `their_commit`, and can also act as cache + /// to speed up subsequent merge-base queries. + /// + /// Use `abbreviate_hash(id)` to shorten the given `id` according to standard git shortening rules. It's used in case + /// the ancestor-label isn't explicitly set so that the merge base label becomes the shortened `id`. + /// Note that it's a dyn closure only to make it possible to recursively call this function in case of multiple merge-bases. + /// + /// `write_object` is used only if it's allowed to merge multiple merge-bases into one, and if there + /// are multiple merge bases, and to write merged buffers as blobs. + /// + /// ### Performance + /// + /// Note that `objects` *should* have an object cache to greatly accelerate tree-retrieval. + /// + /// ### Notes + /// + /// When merging merge-bases recursively, the options are adjusted automatically to act like Git, i.e. merge binary + /// blobs and resolve with *ours*, while resorting to using the base/ancestor in case of unresolvable conflicts. + /// + /// ### Deviation + /// + /// * It's known that certain conflicts around symbolic links can be auto-resolved. We don't have an option for this + /// at all, yet, primarily as Git seems to not implement the *ours*/*theirs* choice in other places even though it + /// reasonably could. So we leave it to the caller to continue processing the returned tree at will. + #[allow(clippy::too_many_arguments)] + pub fn commit<'objects>( + our_commit: gix_hash::ObjectId, + their_commit: gix_hash::ObjectId, + labels: builtin_driver::text::Labels<'_>, + graph: &mut gix_revwalk::Graph<'_, '_, gix_revwalk::graph::Commit>, + diff_resource_cache: &mut gix_diff::blob::Platform, + blob_merge: &mut crate::blob::Platform, + objects: &'objects (impl gix_object::FindObjectOrHeader + gix_object::Write), + abbreviate_hash: &mut dyn FnMut(&gix_hash::oid) -> String, + options: Options, + ) -> Result, Error> { + let merge_bases = gix_revision::merge_base(our_commit, &[their_commit], graph)?; + let mut virtual_merge_bases = Vec::new(); + let mut state = gix_diff::tree::State::default(); + let mut commit_to_tree = + |commit_id: gix_hash::ObjectId| objects.find_commit(&commit_id, &mut state.buf1).map(|c| c.tree()); + + let (merge_base_tree_id, ancestor_name): (_, Cow<'_, str>) = match merge_bases.clone() { + Some(base_commit) if base_commit.len() == 1 => { + (commit_to_tree(base_commit[0])?, abbreviate_hash(&base_commit[0]).into()) + } + Some(mut base_commits) => { + let virtual_base_tree = if options.use_first_merge_base { + let first = *base_commits.first().expect("if Some() there is at least one."); + commit_to_tree(first)? + } else { + let mut merged_commit_id = base_commits.pop().expect("at least one base"); + let mut options = options.clone(); + options.tree_merge.allow_lossy_resolution = true; + options.tree_merge.blob_merge.is_virtual_ancestor = true; + options.tree_merge.blob_merge.text.conflict = builtin_driver::text::Conflict::ResolveWithOurs; + let favor_ancestor = Some(builtin_driver::binary::ResolveWith::Ancestor); + options.tree_merge.blob_merge.resolve_binary_with = favor_ancestor; + options.tree_merge.symlink_conflicts = favor_ancestor; + let labels = builtin_driver::text::Labels { + current: Some("Temporary merge branch 1".into()), + other: Some("Temporary merge branch 2".into()), + ..labels + }; + while let Some(next_commit_id) = base_commits.pop() { + options.tree_merge.marker_size_multiplier += 1; + let mut out = commit( + merged_commit_id, + next_commit_id, + labels, + graph, + diff_resource_cache, + blob_merge, + objects, + abbreviate_hash, + options.clone(), + )?; + // This shouldn't happen, but if for some buggy reason it does, we rather bail. + if out + .tree_merge + .has_unresolved_conflicts(UnresolvedConflict::ConflictMarkers) + { + return Err(Error::VirtualMergeBaseConflict); + } + let merged_tree_id = out + .tree_merge + .tree + .write(|tree| objects.write(tree)) + .map_err(Error::WriteObject)?; + + merged_commit_id = + create_virtual_commit(objects, merged_commit_id, next_commit_id, merged_tree_id)?; + + virtual_merge_bases.extend(out.virtual_merge_bases); + virtual_merge_bases.push(merged_commit_id); + } + commit_to_tree(merged_commit_id)? + }; + (virtual_base_tree, "merged common ancestors".into()) + } + None => { + if options.allow_missing_merge_base { + (gix_hash::ObjectId::empty_tree(our_commit.kind()), "empty tree".into()) + } else { + return Err(Error::NoMergeBase { + our_commit_id: our_commit, + their_commit_id: their_commit, + }); + } + } + }; + + let mut labels = labels; // TODO(borrowchk): this re-assignment shouldn't be needed. + if labels.ancestor.is_none() { + labels.ancestor = Some(ancestor_name.as_ref().into()); + } + + let our_tree_id = objects.find_commit(&our_commit, &mut state.buf1)?.tree(); + let their_tree_id = objects.find_commit(&their_commit, &mut state.buf1)?.tree(); + + let outcome = crate::tree( + &merge_base_tree_id, + &our_tree_id, + &their_tree_id, + labels, + objects, + |buf| objects.write_buf(gix_object::Kind::Blob, buf), + &mut state, + diff_resource_cache, + blob_merge, + options.tree_merge, + )?; + + Ok(super::Outcome { + tree_merge: outcome, + merge_bases, + merge_base_tree_id, + virtual_merge_bases, + }) + } + + fn create_virtual_commit( + objects: &(impl gix_object::Find + gix_object::Write), + parent_a: gix_hash::ObjectId, + parent_b: gix_hash::ObjectId, + tree_id: gix_hash::ObjectId, + ) -> Result { + let mut buf = Vec::new(); + let mut commit: gix_object::Commit = objects.find_commit(&parent_a, &mut buf)?.into(); + commit.parents = vec![parent_a, parent_b].into(); + commit.tree = tree_id; + objects.write(&commit).map_err(Error::WriteObject) + } +} diff --git a/gix-merge/src/lib.rs b/gix-merge/src/lib.rs index 8e608c53ab4..18be9a67604 100644 --- a/gix-merge/src/lib.rs +++ b/gix-merge/src/lib.rs @@ -2,5 +2,10 @@ #![forbid(unsafe_code)] /// -#[cfg(feature = "blob")] pub mod blob; +/// +pub mod commit; +pub use commit::function::commit; +/// +pub mod tree; +pub use tree::function::tree; diff --git a/gix-merge/src/tree/function.rs b/gix-merge/src/tree/function.rs new file mode 100644 index 00000000000..5cffd377cdd --- /dev/null +++ b/gix-merge/src/tree/function.rs @@ -0,0 +1,1091 @@ +use crate::tree::utils::{ + apply_change, perform_blob_merge, possibly_rewritten_location, rewrite_location_with_renamed_directory, + to_components, track, unique_path_in_tree, ChangeList, ChangeListRef, PossibleConflict, TrackedChange, TreeNodes, +}; +use crate::tree::ConflictMapping::{Original, Swapped}; +use crate::tree::{ + Conflict, ConflictMapping, ContentMerge, Error, Options, Outcome, Resolution, ResolutionFailure, UnresolvedConflict, +}; +use bstr::{BString, ByteSlice}; +use gix_diff::tree::recorder::Location; +use gix_diff::tree_with_rewrites::Change; +use gix_hash::ObjectId; +use gix_object::tree::{EntryKind, EntryMode}; +use gix_object::{tree, FindExt}; +use std::convert::Infallible; + +/// Perform a merge between `our_tree` and `their_tree`, using `base_tree` as merge-base. +/// Note that `base_tree` can be an empty tree to indicate 'no common ancestor between the two sides'. +/// +/// * `labels` are relevant for text-merges and will be shown in conflicts. +/// * `objects` provides access to trees when diffing them. +/// * `write_blob_to_odb(content) -> Result` writes newly merged content into the odb to obtain an id +/// that will be used in merged trees. +/// * `diff_state` is state used for diffing trees. +/// * `diff_resource_cache` is used for similarity checks. +/// * `blob_merge` is a pre-configured platform to merge any content. +/// - Note that it shouldn't be allowed to read from the worktree, given that this is a tree-merge. +/// * `options` are used to affect how the merge is performed. +/// +/// ### Unbiased (Ours x Theirs == Theirs x Ours) +/// +/// The algorithm is implemented so that the result is the same no matter how the sides are ordered. +/// +/// ### Differences to Merge-ORT +/// +/// Merge-ORT (Git) defines the desired outcomes where are merely mimicked here. The algorithms are different, and it's +/// clear that Merge-ORT is significantly more elaborate and general. +/// +/// It also writes out trees once it's done with them in a form of reduction process, here an editor is used +/// to keep only the changes, to be written by the caller who receives it as part of the result. +/// This may use more memory in the worst case scenario, but in average *shouldn't* perform much worse due to the +/// natural sparsity of the editor. +/// +/// Our rename-tracking also produces copy information, but we discard it and simply treat it like an addition. +/// +/// Finally, our algorithm will consider reasonable solutions to merge-conflicts as conflicts that are resolved, leaving +/// only content with conflict markers as unresolved ones. +/// +/// ### Performance +/// +/// Note that `objects` *should* have an object cache to greatly accelerate tree-retrieval. +#[allow(clippy::too_many_arguments)] +pub fn tree<'objects, E>( + base_tree: &gix_hash::oid, + our_tree: &gix_hash::oid, + their_tree: &gix_hash::oid, + mut labels: crate::blob::builtin_driver::text::Labels<'_>, + objects: &'objects impl gix_object::FindObjectOrHeader, + mut write_blob_to_odb: impl FnMut(&[u8]) -> Result, + diff_state: &mut gix_diff::tree::State, + diff_resource_cache: &mut gix_diff::blob::Platform, + blob_merge: &mut crate::blob::Platform, + options: Options, +) -> Result, Error> +where + E: Into>, +{ + let ours_needs_diff = base_tree != our_tree; + let theirs_needs_diff = base_tree != their_tree; + let _span = gix_trace::coarse!("gix_merge::tree", ?base_tree, ?our_tree, ?their_tree, ?labels); + let (mut base_buf, mut side_buf) = (Vec::new(), Vec::new()); + let ancestor_tree = objects.find_tree(base_tree, &mut base_buf)?; + let allow_resolution_failure = !options.allow_lossy_resolution; + + let mut editor = tree::Editor::new(ancestor_tree.to_owned(), objects, base_tree.kind()); + let ancestor_tree = gix_object::TreeRefIter::from_bytes(&base_buf); + + let mut our_changes = Vec::new(); + if ours_needs_diff { + let our_tree = objects.find_tree_iter(our_tree, &mut side_buf)?; + gix_diff::tree_with_rewrites( + ancestor_tree, + our_tree, + diff_resource_cache, + diff_state, + objects, + |change| -> Result<_, Infallible> { + track(change, &mut our_changes); + Ok(gix_diff::tree_with_rewrites::Action::Continue) + }, + gix_diff::tree_with_rewrites::Options { + location: Some(Location::Path), + rewrites: options.rewrites, + }, + )?; + } + + let mut our_tree = TreeNodes::new(); + for (idx, change) in our_changes.iter().enumerate() { + our_tree.track_change(&change.inner, idx); + } + + let mut their_changes = Vec::new(); + if theirs_needs_diff { + let their_tree = objects.find_tree_iter(their_tree, &mut side_buf)?; + gix_diff::tree_with_rewrites( + ancestor_tree, + their_tree, + diff_resource_cache, + diff_state, + objects, + |change| -> Result<_, Infallible> { + track(change, &mut their_changes); + Ok(gix_diff::tree_with_rewrites::Action::Continue) + }, + gix_diff::tree_with_rewrites::Options { + location: Some(Location::Path), + rewrites: options.rewrites, + }, + )?; + } + + let mut their_tree = TreeNodes::new(); + for (idx, change) in their_changes.iter().enumerate() { + their_tree.track_change(&change.inner, idx); + } + + let mut conflicts = Vec::new(); + let mut failed_on_first_conflict = false; + let mut should_fail_on_conflict = |conflict: Conflict| -> bool { + if let Some(how) = options.fail_on_conflict { + if conflict.resolution.is_err() || is_unresolved(std::slice::from_ref(&conflict), how) { + failed_on_first_conflict = true; + } + } + conflicts.push(conflict); + failed_on_first_conflict + }; + + let ((mut our_changes, mut our_tree), (mut their_changes, mut their_tree)) = + ((&mut our_changes, &mut our_tree), (&mut their_changes, &mut their_tree)); + let mut outer_side = Original; + if their_changes.is_empty() { + ((our_changes, our_tree), (their_changes, their_tree)) = ((their_changes, their_tree), (our_changes, our_tree)); + (labels.current, labels.other) = (labels.other, labels.current); + outer_side = outer_side.swapped(); + } + + #[derive(Debug)] + enum MatchKind { + /// A tree is supposed to be superseded by something else. + EraseTree, + /// A leaf node is superseded by a tree + EraseLeaf, + } + + 'outer: while their_changes.iter().rev().any(|c| !c.was_written) { + let mut segment_start = 0; + let mut last_seen_len = their_changes.len(); + + while segment_start != last_seen_len { + for theirs_idx in segment_start..last_seen_len { + // `their` can be a tree, and it could be used to efficiently prune child-changes as these + // trees are always rewrites with parent ids (of course we validate), so child-changes could be handled + // quickly. However, for now the benefit of having these trees is to have them as part of the match-tree + // on *our* side so that it's clear that we passed a renamed directory (by identity). + let TrackedChange { + inner: theirs, + was_written, + needs_tree_insertion, + rewritten_location, + } = &their_changes[theirs_idx]; + if theirs.entry_mode().is_tree() || *was_written { + continue; + } + + if needs_tree_insertion.is_some() { + their_tree.insert(theirs, theirs_idx); + } + + match our_tree + .check_conflict( + rewritten_location + .as_ref() + .map_or_else(|| theirs.source_location(), |t| t.0.as_bstr()), + ) + .filter(|ours| { + ours.change_idx() + .zip(needs_tree_insertion.flatten()) + .map_or(true, |(ours_idx, ignore_idx)| ours_idx != ignore_idx) + && our_tree.is_not_same_change_in_possible_conflict(theirs, ours, our_changes) + }) { + None => { + if let Some((rewritten_location, ours_idx)) = rewritten_location { + if should_fail_on_conflict(Conflict::with_resolution( + Resolution::SourceLocationAffectedByRename { + final_location: rewritten_location.to_owned(), + }, + (&our_changes[*ours_idx].inner, theirs, Original, outer_side), + )) { + break 'outer; + }; + editor.remove(to_components(theirs.location()))?; + } + apply_change(&mut editor, theirs, rewritten_location.as_ref().map(|t| &t.0))?; + their_changes[theirs_idx].was_written = true; + } + Some(candidate) => { + use crate::tree::utils::to_components_bstring_ref as toc; + debug_assert!( + rewritten_location.is_none(), + "We should probably handle the case where a rewritten location is passed down here" + ); + + let (ours_idx, match_kind) = match candidate { + PossibleConflict::PassedRewrittenDirectory { change_idx } => { + let ours = &our_changes[change_idx]; + let location_after_passed_rename = + rewrite_location_with_renamed_directory(theirs.location(), &ours.inner); + if let Some(new_location) = location_after_passed_rename { + their_tree.remove_existing_leaf(theirs.location()); + push_deferred_with_rewrite( + (theirs.clone(), Some(change_idx)), + Some((new_location, change_idx)), + their_changes, + ); + } else { + apply_change(&mut editor, theirs, None)?; + their_changes[theirs_idx].was_written = true; + } + their_changes[theirs_idx].was_written = true; + continue; + } + PossibleConflict::TreeToNonTree { change_idx: Some(idx) } + if matches!( + our_changes[idx].inner, + Change::Deletion { .. } | Change::Addition { .. } + ) => + { + (Some(idx), Some(MatchKind::EraseTree)) + } + PossibleConflict::NonTreeToTree { change_idx } => (change_idx, Some(MatchKind::EraseLeaf)), + PossibleConflict::Match { change_idx: ours_idx } => (Some(ours_idx), None), + _ => (None, None), + }; + + let Some(ours_idx) = ours_idx else { + let ours = match candidate { + PossibleConflict::TreeToNonTree { change_idx, .. } + | PossibleConflict::NonTreeToTree { change_idx, .. } => change_idx, + PossibleConflict::Match { change_idx } + | PossibleConflict::PassedRewrittenDirectory { change_idx } => Some(change_idx), + } + .map(|idx| &our_changes[idx]); + + if let Some(ours) = ours { + gix_trace::debug!("Turning a case we could probably handle into a conflict for now. theirs: {theirs:#?} ours: {ours:#?} kind: {match_kind:?}"); + if allow_resolution_failure + && should_fail_on_conflict(Conflict::without_resolution( + ResolutionFailure::Unknown, + (&ours.inner, theirs, Original, outer_side), + )) + { + break 'outer; + }; + continue; + } else { + gix_trace::debug!("Couldn't figure out how to handle {match_kind:?} theirs: {theirs:#?} candidate: {candidate:#?}"); + continue; + } + }; + + let ours = &our_changes[ours_idx].inner; + debug_assert!( + match_kind.is_none() + || (ours.location() == theirs.location() + || ours.source_location() == theirs.source_location()), + "BUG: right now it's not known to be possible to match changes from different paths: {match_kind:?} {candidate:?}" + ); + match (ours, theirs) { + ( + Change::Modification { + previous_id, + previous_entry_mode, + id: our_id, + location: our_location, + entry_mode: our_mode, + .. + }, + Change::Rewrite { + source_id: their_source_id, + id: their_id, + location: their_location, + entry_mode: their_mode, + source_location, + .. + }, + ) + | ( + Change::Rewrite { + source_id: their_source_id, + id: their_id, + location: their_location, + entry_mode: their_mode, + source_location, + .. + }, + Change::Modification { + previous_id, + previous_entry_mode, + id: our_id, + location: our_location, + entry_mode: our_mode, + .. + }, + ) => { + let side = if matches!(ours, Change::Modification { .. }) { + Original + } else { + Swapped + }; + if let Some(merged_mode) = merge_modes(*our_mode, *their_mode) { + assert_eq!( + previous_id, their_source_id, + "both refer to the same base, so should always match" + ); + let their_rewritten_location = possibly_rewritten_location( + pick_our_tree(side, our_tree, their_tree), + their_location.as_ref(), + pick_our_changes(side, our_changes, their_changes), + ); + let renamed_without_change = their_source_id == their_id; + let (our_id, resolution) = if renamed_without_change { + (*our_id, None) + } else { + let (our_location, our_id, our_mode, their_location, their_id, their_mode) = + match side { + Original => ( + our_location, + our_id, + our_mode, + their_location, + their_id, + their_mode, + ), + Swapped => ( + their_location, + their_id, + their_mode, + our_location, + our_id, + our_mode, + ), + }; + let (merged_blob_id, resolution) = perform_blob_merge( + labels, + objects, + blob_merge, + &mut diff_state.buf1, + &mut write_blob_to_odb, + (our_location, *our_id, *our_mode), + (their_location, *their_id, *their_mode), + (source_location, *previous_id, *previous_entry_mode), + (0, outer_side), + &options, + )?; + (merged_blob_id, Some(resolution)) + }; + + editor.remove(toc(our_location))?; + pick_our_tree(side, our_tree, their_tree) + .remove_existing_leaf(our_location.as_bstr()); + let final_location = their_rewritten_location.clone(); + let new_change = Change::Addition { + location: their_rewritten_location.unwrap_or_else(|| their_location.to_owned()), + relation: None, + entry_mode: merged_mode, + id: our_id, + }; + if should_fail_on_conflict(Conflict::with_resolution( + Resolution::OursModifiedTheirsRenamedAndChangedThenRename { + merged_mode: (merged_mode != *their_mode).then_some(merged_mode), + merged_blob: resolution.map(|resolution| ContentMerge { + resolution, + merged_blob_id: our_id, + }), + final_location, + }, + (ours, theirs, side, outer_side), + )) { + break 'outer; + } + + // The other side gets the addition, not our side. + push_deferred( + (new_change, None), + pick_our_changes_mut(side, their_changes, our_changes), + ); + } else if allow_resolution_failure { + editor.upsert(toc(our_location), our_mode.kind(), *our_id)?; + editor.upsert(toc(their_location), their_mode.kind(), *their_id)?; + + if should_fail_on_conflict(Conflict::without_resolution( + ResolutionFailure::OursModifiedTheirsRenamedTypeMismatch, + (ours, theirs, side, outer_side), + )) { + break 'outer; + } + } + } + ( + Change::Modification { + location, + previous_id, + previous_entry_mode, + entry_mode: our_mode, + id: our_id, + .. + }, + Change::Modification { + entry_mode: their_mode, + id: their_id, + .. + }, + ) if !involves_submodule(our_mode, their_mode) + && our_mode.kind() == their_mode.kind() + && our_id != their_id => + { + let (merged_blob_id, resolution) = perform_blob_merge( + labels, + objects, + blob_merge, + &mut diff_state.buf1, + &mut write_blob_to_odb, + (location, *our_id, *our_mode), + (location, *their_id, *their_mode), + (location, *previous_id, *previous_entry_mode), + (0, outer_side), + &options, + )?; + editor.upsert(toc(location), our_mode.kind(), merged_blob_id)?; + if should_fail_on_conflict(Conflict::with_resolution( + Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { + merged_blob: ContentMerge { + resolution, + merged_blob_id, + }, + }, + (ours, theirs, Original, outer_side), + )) { + break 'outer; + }; + } + ( + Change::Addition { + location, + entry_mode: our_mode, + id: our_id, + .. + }, + Change::Addition { + entry_mode: their_mode, + id: their_id, + .. + }, + ) if !involves_submodule(our_mode, their_mode) && our_id != their_id => { + let conflict = if let Some(merged_mode) = merge_modes(*our_mode, *their_mode) { + let side = if our_mode == their_mode || matches!(our_mode.kind(), EntryKind::Blob) { + outer_side + } else { + outer_side.swapped() + }; + let (merged_blob_id, resolution) = perform_blob_merge( + labels, + objects, + blob_merge, + &mut diff_state.buf1, + &mut write_blob_to_odb, + (location, *our_id, merged_mode), + (location, *their_id, merged_mode), + (location, their_id.kind().null(), merged_mode), + (0, side), + &options, + )?; + editor.upsert(toc(location), merged_mode.kind(), merged_blob_id)?; + Some(Conflict::with_resolution( + Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { + merged_blob: ContentMerge { + resolution, + merged_blob_id, + }, + }, + (ours, theirs, Original, outer_side), + )) + } else if allow_resolution_failure { + // Actually this has a preference, as symlinks are always left in place with the other side renamed. + let ( + logical_side, + label_of_side_to_be_moved, + (our_mode, our_id), + (their_mode, their_id), + ) = if matches!(our_mode.kind(), EntryKind::Link | EntryKind::Tree) { + ( + Original, + labels.other.unwrap_or_default(), + (*our_mode, *our_id), + (*their_mode, *their_id), + ) + } else { + ( + Swapped, + labels.current.unwrap_or_default(), + (*their_mode, *their_id), + (*our_mode, *our_id), + ) + }; + let tree_with_rename = pick_our_tree(logical_side, their_tree, our_tree); + let renamed_location = unique_path_in_tree( + location.as_bstr(), + &editor, + tree_with_rename, + label_of_side_to_be_moved, + )?; + editor.upsert(toc(location), our_mode.kind(), our_id)?; + let conflict = Conflict::without_resolution( + ResolutionFailure::OursAddedTheirsAddedTypeMismatch { + their_unique_location: renamed_location.clone(), + }, + (ours, theirs, logical_side, outer_side), + ); + + let new_change = Change::Addition { + location: renamed_location, + entry_mode: their_mode, + id: their_id, + relation: None, + }; + tree_with_rename.remove_existing_leaf(location.as_bstr()); + push_deferred( + (new_change, None), + pick_our_changes_mut(logical_side, their_changes, our_changes), + ); + Some(conflict) + } else { + None + }; + + if let Some(conflict) = conflict { + if should_fail_on_conflict(conflict) { + break 'outer; + }; + } + } + ( + Change::Modification { + location, + entry_mode, + id, + .. + }, + Change::Deletion { .. }, + ) + | ( + Change::Deletion { .. }, + Change::Modification { + location, + entry_mode, + id, + .. + }, + ) if allow_resolution_failure => { + let (label_of_side_to_be_moved, side) = if matches!(ours, Change::Modification { .. }) { + (labels.current.unwrap_or_default(), Original) + } else { + (labels.other.unwrap_or_default(), Swapped) + }; + let deletion_prefaces_addition_of_directory = { + let change_on_right = match side { + Original => their_changes.get(theirs_idx + 1), + Swapped => our_changes.get(ours_idx + 1), + }; + change_on_right + .map(|change| { + change.inner.entry_mode().is_tree() && change.inner.location() == location + }) + .unwrap_or_default() + }; + + if deletion_prefaces_addition_of_directory { + let our_tree = pick_our_tree(side, our_tree, their_tree); + let renamed_path = unique_path_in_tree( + location.as_bstr(), + &editor, + our_tree, + label_of_side_to_be_moved, + )?; + editor.remove(toc(location))?; + our_tree.remove_existing_leaf(location.as_bstr()); + + let new_change = Change::Addition { + location: renamed_path.clone(), + relation: None, + entry_mode: *entry_mode, + id: *id, + }; + let should_break = should_fail_on_conflict(Conflict::without_resolution( + ResolutionFailure::OursModifiedTheirsDirectoryThenOursRenamed { + renamed_unique_path_to_modified_blob: renamed_path, + }, + (ours, theirs, side, outer_side), + )); + + // Since we move *our* side, our tree needs to be modified. + push_deferred( + (new_change, None), + pick_our_changes_mut(side, our_changes, their_changes), + ); + + if should_break { + break 'outer; + }; + } else { + let should_break = should_fail_on_conflict(Conflict::without_resolution( + ResolutionFailure::OursModifiedTheirsDeleted, + (ours, theirs, side, outer_side), + )); + editor.upsert(toc(location), entry_mode.kind(), *id)?; + if should_break { + break 'outer; + } + } + } + ( + Change::Rewrite { + source_location, + source_entry_mode, + source_id, + entry_mode: our_mode, + id: our_id, + location: our_location, + .. + }, + Change::Rewrite { + entry_mode: their_mode, + id: their_id, + location: their_location, + .. + }, + // NOTE: renames are only tracked among these kinds of types anyway, but we make sure. + ) if our_mode.is_blob_or_symlink() && their_mode.is_blob_or_symlink() => { + let (merged_blob_id, mut resolution) = if our_id == their_id { + (*our_id, None) + } else { + let (id, resolution) = perform_blob_merge( + labels, + objects, + blob_merge, + &mut diff_state.buf1, + &mut write_blob_to_odb, + (our_location, *our_id, *our_mode), + (their_location, *their_id, *their_mode), + (source_location, *source_id, *source_entry_mode), + (1, outer_side), + &options, + )?; + (id, Some(resolution)) + }; + + let merged_mode = + merge_modes(*our_mode, *their_mode).expect("this case was assured earlier"); + + editor.remove(toc(source_location))?; + our_tree.remove_existing_leaf(source_location.as_bstr()); + their_tree.remove_existing_leaf(source_location.as_bstr()); + + let their_rewritten_location = + possibly_rewritten_location(our_tree, their_location.as_bstr(), our_changes); + let our_rewritten_location = + possibly_rewritten_location(their_tree, our_location.as_bstr(), their_changes); + let (our_addition, their_addition) = + match (our_rewritten_location, their_rewritten_location) { + (None, Some(location)) => ( + None, + Some(Change::Addition { + location, + relation: None, + entry_mode: merged_mode, + id: merged_blob_id, + }), + ), + (Some(location), None) => ( + None, + Some(Change::Addition { + location, + relation: None, + entry_mode: merged_mode, + id: merged_blob_id, + }), + ), + (Some(_ours), Some(_theirs)) => { + gix_trace::debug!( + "Found two rewritten locations, '{_ours}' and '{_theirs}'" + ); + // Pretend this is the end of the loop and keep this as conflict. + // If this happens in the wild, we'd want to reproduce it. + if allow_resolution_failure + && should_fail_on_conflict(Conflict::without_resolution( + ResolutionFailure::Unknown, + (ours, theirs, Original, outer_side), + )) + { + break 'outer; + }; + their_changes[theirs_idx].was_written = true; + our_changes[ours_idx].was_written = true; + continue; + } + (None, None) => { + if our_location == their_location { + ( + None, + Some(Change::Addition { + location: our_location.to_owned(), + relation: None, + entry_mode: merged_mode, + id: merged_blob_id, + }), + ) + } else { + if !allow_resolution_failure { + their_changes[theirs_idx].was_written = true; + our_changes[ours_idx].was_written = true; + continue; + } + if should_fail_on_conflict(Conflict::without_resolution( + ResolutionFailure::OursRenamedTheirsRenamedDifferently { + merged_blob: resolution.take().map(|resolution| ContentMerge { + resolution, + merged_blob_id, + }), + }, + (ours, theirs, Original, outer_side), + )) { + break 'outer; + }; + let our_addition = Change::Addition { + location: our_location.to_owned(), + relation: None, + entry_mode: merged_mode, + id: merged_blob_id, + }; + let their_addition = Change::Addition { + location: their_location.to_owned(), + relation: None, + entry_mode: merged_mode, + id: merged_blob_id, + }; + (Some(our_addition), Some(their_addition)) + } + } + }; + + if let Some(resolution) = resolution { + if should_fail_on_conflict(Conflict::with_resolution( + Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { + merged_blob: ContentMerge { + resolution, + merged_blob_id, + }, + }, + (ours, theirs, Original, outer_side), + )) { + break 'outer; + }; + } + if let Some(addition) = our_addition { + push_deferred((addition, Some(ours_idx)), our_changes); + } + if let Some(addition) = their_addition { + push_deferred((addition, Some(theirs_idx)), their_changes); + } + } + ( + Change::Deletion { .. }, + Change::Rewrite { + source_location, + entry_mode: rewritten_mode, + id: rewritten_id, + location, + .. + }, + ) + | ( + Change::Rewrite { + source_location, + entry_mode: rewritten_mode, + id: rewritten_id, + location, + .. + }, + Change::Deletion { .. }, + ) if !rewritten_mode.is_commit() && allow_resolution_failure => { + let side = if matches!(ours, Change::Deletion { .. }) { + Original + } else { + Swapped + }; + + editor.remove(toc(source_location))?; + pick_our_tree(side, our_tree, their_tree) + .remove_existing_leaf(source_location.as_bstr()); + + let their_rewritten_location = possibly_rewritten_location( + pick_our_tree(side, our_tree, their_tree), + location.as_ref(), + pick_our_changes(side, our_changes, their_changes), + ) + .unwrap_or_else(|| location.to_owned()); + let our_addition = Change::Addition { + location: their_rewritten_location, + relation: None, + entry_mode: *rewritten_mode, + id: *rewritten_id, + }; + + if should_fail_on_conflict(Conflict::without_resolution( + ResolutionFailure::OursDeletedTheirsRenamed, + (ours, theirs, side, outer_side), + )) { + break 'outer; + }; + + push_deferred( + (our_addition, None), + pick_our_changes_mut(side, their_changes, our_changes), + ); + } + ( + Change::Rewrite { + source_location, + source_entry_mode, + source_id, + entry_mode: our_mode, + id: our_id, + location, + .. + }, + Change::Addition { + id: their_id, + entry_mode: their_mode, + .. + }, + ) + | ( + Change::Addition { + id: their_id, + entry_mode: their_mode, + .. + }, + Change::Rewrite { + source_location, + source_entry_mode, + source_id, + entry_mode: our_mode, + id: our_id, + location, + .. + }, + ) if !involves_submodule(our_mode, their_mode) => { + let side = if matches!(ours, Change::Rewrite { .. }) { + Original + } else { + Swapped + }; + if let Some(merged_mode) = merge_modes(*our_mode, *their_mode) { + let (merged_blob_id, resolution) = if our_id == their_id { + (*our_id, None) + } else { + let (id, resolution) = perform_blob_merge( + labels, + objects, + blob_merge, + &mut diff_state.buf1, + &mut write_blob_to_odb, + (location, *our_id, *our_mode), + (location, *their_id, *their_mode), + (source_location, source_id.kind().null(), *source_entry_mode), + (0, outer_side), + &options, + )?; + (id, Some(resolution)) + }; + + editor.remove(toc(source_location))?; + pick_our_tree(side, our_tree, their_tree).remove_leaf(source_location.as_bstr()); + + if let Some(resolution) = resolution { + if should_fail_on_conflict(Conflict::with_resolution( + Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { + merged_blob: ContentMerge { + resolution, + merged_blob_id, + }, + }, + (ours, theirs, Original, outer_side), + )) { + break 'outer; + }; + } + + // Because this constellation can only be found by the lookup tree, there is + // no need to put it as addition, we know it's not going to intersect on the other side. + editor.upsert(toc(location), merged_mode.kind(), merged_blob_id)?; + } else if allow_resolution_failure { + editor.remove(toc(source_location))?; + pick_our_tree(side, our_tree, their_tree).remove_leaf(source_location.as_bstr()); + + let ( + logical_side, + label_of_side_to_be_moved, + (our_mode, our_id), + (their_mode, their_id), + ) = if matches!(our_mode.kind(), EntryKind::Link | EntryKind::Tree) { + ( + Original, + labels.other.unwrap_or_default(), + (*our_mode, *our_id), + (*their_mode, *their_id), + ) + } else { + ( + Swapped, + labels.current.unwrap_or_default(), + (*their_mode, *their_id), + (*our_mode, *our_id), + ) + }; + let tree_with_rename = pick_our_tree(logical_side, their_tree, our_tree); + let renamed_location = unique_path_in_tree( + location.as_bstr(), + &editor, + tree_with_rename, + label_of_side_to_be_moved, + )?; + editor.upsert(toc(location), our_mode.kind(), our_id)?; + let conflict = Conflict::without_resolution( + ResolutionFailure::OursAddedTheirsAddedTypeMismatch { + their_unique_location: renamed_location.clone(), + }, + (ours, theirs, side, outer_side), + ); + + let new_change_with_rename = Change::Addition { + location: renamed_location, + entry_mode: their_mode, + id: their_id, + relation: None, + }; + tree_with_rename.remove_existing_leaf(location.as_bstr()); + push_deferred( + ( + new_change_with_rename, + Some(pick_idx(logical_side, theirs_idx, ours_idx)), + ), + pick_our_changes_mut(logical_side, their_changes, our_changes), + ); + + if should_fail_on_conflict(conflict) { + break 'outer; + } + } + } + _unknown => { + if allow_resolution_failure + && should_fail_on_conflict(Conflict::without_resolution( + ResolutionFailure::Unknown, + (ours, theirs, Original, outer_side), + )) + { + break 'outer; + }; + } + } + their_changes[theirs_idx].was_written = true; + our_changes[ours_idx].was_written = true; + } + } + } + segment_start = last_seen_len; + last_seen_len = their_changes.len(); + } + + ((our_changes, our_tree), (their_changes, their_tree)) = ((their_changes, their_tree), (our_changes, our_tree)); + (labels.current, labels.other) = (labels.other, labels.current); + outer_side = outer_side.swapped(); + } + + Ok(Outcome { + tree: editor, + conflicts, + failed_on_first_unresolved_conflict: failed_on_first_conflict, + }) +} + +pub(super) fn is_unresolved(conflicts: &[Conflict], how: UnresolvedConflict) -> bool { + match how { + UnresolvedConflict::ConflictMarkers => conflicts.iter().any(|c| { + c.resolution.is_err() + || c.content_merge().map_or(false, |info| { + matches!(info.resolution, crate::blob::Resolution::Conflict) + }) + }), + UnresolvedConflict::Renames => conflicts.iter().any(|c| match &c.resolution { + Ok(success) => match success { + Resolution::SourceLocationAffectedByRename { .. } + | Resolution::OursModifiedTheirsRenamedAndChangedThenRename { .. } => true, + Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { merged_blob } => { + matches!(merged_blob.resolution, crate::blob::Resolution::Conflict) + } + }, + Err(_failure) => true, + }), + } +} + +fn involves_submodule(a: &EntryMode, b: &EntryMode) -> bool { + a.is_commit() || b.is_commit() +} + +/// Allows equal modes or preferes executables bits in case of blobs +fn merge_modes(a: EntryMode, b: EntryMode) -> Option { + match (a.kind(), b.kind()) { + (EntryKind::BlobExecutable, EntryKind::BlobExecutable | EntryKind::Blob) + | (EntryKind::Blob, EntryKind::BlobExecutable) => Some(EntryKind::BlobExecutable.into()), + (_, _) if a == b => Some(a), + _ => None, + } +} + +fn push_deferred(change_and_idx: (Change, Option), changes: &mut ChangeList) { + push_deferred_with_rewrite(change_and_idx, None, changes); +} + +fn push_deferred_with_rewrite( + (change, ours_idx): (Change, Option), + new_location: Option<(BString, usize)>, + changes: &mut ChangeList, +) { + changes.push(TrackedChange { + inner: change, + was_written: false, + needs_tree_insertion: Some(ours_idx), + rewritten_location: new_location, + }); +} + +fn pick_our_tree<'a>(side: ConflictMapping, ours: &'a mut TreeNodes, theirs: &'a mut TreeNodes) -> &'a mut TreeNodes { + match side { + Original => ours, + Swapped => theirs, + } +} + +fn pick_our_changes<'a>( + side: ConflictMapping, + ours: &'a ChangeListRef, + theirs: &'a ChangeListRef, +) -> &'a ChangeListRef { + match side { + Original => ours, + Swapped => theirs, + } +} + +fn pick_idx(side: ConflictMapping, ours: usize, theirs: usize) -> usize { + match side { + Original => ours, + Swapped => theirs, + } +} + +fn pick_our_changes_mut<'a>( + side: ConflictMapping, + ours: &'a mut ChangeList, + theirs: &'a mut ChangeList, +) -> &'a mut ChangeList { + match side { + Original => ours, + Swapped => theirs, + } +} diff --git a/gix-merge/src/tree/mod.rs b/gix-merge/src/tree/mod.rs new file mode 100644 index 00000000000..cfc47e6de28 --- /dev/null +++ b/gix-merge/src/tree/mod.rs @@ -0,0 +1,260 @@ +use bstr::BString; +use gix_diff::tree_with_rewrites::Change; +use gix_diff::Rewrites; + +/// The error returned by [`tree()`](crate::tree()). +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Could not find ancestor, our or their tree to get started")] + FindTree(#[from] gix_object::find::existing_object::Error), + #[error("Could not find ancestor, our or their tree iterator to get started")] + FindTreeIter(#[from] gix_object::find::existing_iter::Error), + #[error("Failed to diff our side or their side")] + DiffTree(#[from] gix_diff::tree_with_rewrites::Error), + #[error("Could not apply merge result to base tree")] + TreeEdit(#[from] gix_object::tree::editor::Error), + #[error("Failed to load resource to prepare for blob merge")] + BlobMergeSetResource(#[from] crate::blob::platform::set_resource::Error), + #[error(transparent)] + BlobMergePrepare(#[from] crate::blob::platform::prepare_merge::Error), + #[error(transparent)] + BlobMerge(#[from] crate::blob::platform::merge::Error), + #[error("Failed to write merged blob content as blob to the object database")] + WriteBlobToOdb(Box), + #[error("The merge was performed, but the binary merge result couldn't be selected as it wasn't found")] + MergeResourceNotFound, +} + +/// The outcome produced by [`tree()`](crate::tree()). +#[derive(Clone)] +pub struct Outcome<'a> { + /// The ready-made (but unwritten) which is the *base* tree, including all non-conflicting changes, and the changes that had + /// conflicts which could be resolved automatically. + /// + /// This means, if all of their changes were conflicting, this will be equivalent to the *base* tree. + pub tree: gix_object::tree::Editor<'a>, + /// The set of conflicts we encountered. Can be empty to indicate there was no conflict. + /// Note that conflicts might have been auto-resolved, but they are listed here for completeness. + /// Use [`has_unresolved_conflicts()`](Outcome::has_unresolved_conflicts()) to see if any action is needed + /// before using [`tree`](Outcome::tree). + pub conflicts: Vec, + /// `true` if `conflicts` contains only a single *unresolved* conflict in the last slot, but possibly more resolved ones. + /// This also makes this outcome a very partial merge that cannot be completed. + /// Only set if [`fail_on_conflict`](Options::fail_on_conflict) is `true`. + pub failed_on_first_unresolved_conflict: bool, +} + +/// Determine what should be considered an unresolved conflict. +/// +/// Note that no matter which variant, [conflicts](Conflict) with [resolution failure](`ResolutionFailure`) +/// will always be unresolved. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum UnresolvedConflict { + /// Only consider content merges with conflict markers as unresolved. + ConflictMarkers, + /// Whenever there was any rename, or conflict markers, it is unresolved. + Renames, +} + +impl Outcome<'_> { + /// Return `true` if there is any conflict that would still need to be resolved as they would yield undesirable trees. + /// This is based on `how` to determine what should be considered unresolved. + pub fn has_unresolved_conflicts(&self, how: UnresolvedConflict) -> bool { + function::is_unresolved(&self.conflicts, how) + } +} + +/// A description of a conflict (i.e. merge issue without an auto-resolution) as seen during a [tree-merge](crate::tree()). +/// They may have a resolution that was applied automatically, or be left for the caller to resolved. +#[derive(Debug, Clone)] +pub struct Conflict { + /// A record on how the conflict resolution succeeded with `Ok(_)` or failed with `Err(_)`. + /// In case of `Err(_)`, no write + /// On failure, one can examine `ours` and `theirs` to potentially find a custom solution. + /// Note that the descriptions of resolutions or resolution failures may be swapped compared + /// to the actual changes. This is due to changes like `modification|deletion` being treated the + /// same as `deletion|modification`, i.e. *ours* is not more privileged than theirs. + /// To compensate for that, use [`changes_in_resolution()`](Conflict::changes_in_resolution()). + pub resolution: Result, + /// The change representing *our* side. + pub ours: Change, + /// The change representing *their* side. + pub theirs: Change, + map: ConflictMapping, +} + +/// A utility to help define which side is what. +#[derive(Debug, Clone, Copy)] +enum ConflictMapping { + Original, + Swapped, +} + +impl ConflictMapping { + fn is_swapped(&self) -> bool { + matches!(self, ConflictMapping::Swapped) + } + fn swapped(self) -> ConflictMapping { + match self { + ConflictMapping::Original => ConflictMapping::Swapped, + ConflictMapping::Swapped => ConflictMapping::Original, + } + } +} + +impl Conflict { + /// Returns the changes of fields `ours` and `theirs` so they match their description in the + /// [`Resolution`] or [`ResolutionFailure`] respectively. + /// Without this, the sides may appear swapped as `ours|theirs` is treated the same as `theirs/ours` + /// if both types are different, like `modification|deletion`. + pub fn changes_in_resolution(&self) -> (&Change, &Change) { + match self.map { + ConflictMapping::Original => (&self.ours, &self.theirs), + ConflictMapping::Swapped => (&self.theirs, &self.ours), + } + } + + /// Similar to [`changes_in_resolution()`](Self::changes_in_resolution()), but returns the parts + /// of the structure so the caller can take ownership. This can be useful when applying your own + /// resolutions for resolution failures. + pub fn into_parts_by_resolution(self) -> (Result, Change, Change) { + match self.map { + ConflictMapping::Original => (self.resolution, self.ours, self.theirs), + ConflictMapping::Swapped => (self.resolution, self.theirs, self.ours), + } + } + + /// Return information about the content merge if it was performed. + pub fn content_merge(&self) -> Option { + match &self.resolution { + Ok(success) => match success { + Resolution::SourceLocationAffectedByRename { .. } => None, + Resolution::OursModifiedTheirsRenamedAndChangedThenRename { merged_blob, .. } => *merged_blob, + Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { merged_blob } => Some(*merged_blob), + }, + Err(failure) => match failure { + ResolutionFailure::OursRenamedTheirsRenamedDifferently { merged_blob } => *merged_blob, + ResolutionFailure::Unknown + | ResolutionFailure::OursModifiedTheirsDeleted + | ResolutionFailure::OursModifiedTheirsRenamedTypeMismatch + | ResolutionFailure::OursModifiedTheirsDirectoryThenOursRenamed { + renamed_unique_path_to_modified_blob: _, + } + | ResolutionFailure::OursAddedTheirsAddedTypeMismatch { .. } + | ResolutionFailure::OursDeletedTheirsRenamed => None, + }, + } + } +} + +/// Describes of a conflict involving *our* change and *their* change was specifically resolved. +/// +/// Note that all resolutions are side-agnostic, so *ours* could also have been *theirs* and vice versa. +/// Also note that symlink merges are always done via binary merge, using the same logic. +#[derive(Debug, Clone)] +pub enum Resolution { + /// *ours* had a renamed directory and *theirs* made a change in the now renamed directory. + /// We moved that change into its location. + SourceLocationAffectedByRename { + /// The repository-relative path to the location that the change ended up in after + /// being affected by a renamed directory. + final_location: BString, + }, + /// *ours* was a modified blob and *theirs* renamed that blob. + /// We moved the changed blob from *ours* to its new location, and merged it successfully. + /// If this is a `copy`, the source of the copy was set to be the changed blob as well so both match. + OursModifiedTheirsRenamedAndChangedThenRename { + /// If one side added the executable bit, we always add it in the merged result. + merged_mode: Option, + /// If `Some(…)`, the content of the involved blob had to be merged. + merged_blob: Option, + /// The repository relative path to the location the blob finally ended up in. + /// It's `Some()` only if *they* rewrote the blob into a directory which *we* renamed on *our* side. + final_location: Option, + }, + /// *ours* and *theirs* carried changes and where content-merged. + /// + /// Note that *ours* and *theirs* may also be rewrites with the same destination and mode, + /// or additions. + OursModifiedTheirsModifiedThenBlobContentMerge { + /// The outcome of the content merge. + merged_blob: ContentMerge, + }, +} + +/// Describes of a conflict involving *our* change and *their* failed to be resolved. +#[derive(Debug, Clone)] +pub enum ResolutionFailure { + /// *ours* was renamed, but *theirs* was renamed differently. Both versions will be present in the tree, + OursRenamedTheirsRenamedDifferently { + /// If `Some(…)`, the content of the involved blob had to be merged. + merged_blob: Option, + }, + /// *ours* was modified, but *theirs* was turned into a directory, so *ours* was renamed to a non-conflicting path. + OursModifiedTheirsDirectoryThenOursRenamed { + /// The path at which `ours` can be found in the tree - it's in the same directory that it was in before. + renamed_unique_path_to_modified_blob: BString, + }, + /// *ours* was added (or renamed into place) with a different mode than theirs, e.g. blob and symlink, and we kept + /// the symlink in its original location, renaming the other side to `their_unique_location`. + OursAddedTheirsAddedTypeMismatch { + /// The location at which *their* state was placed to resolve the name and type clash, named to indicate + /// where the entry is coming from. + their_unique_location: BString, + }, + /// *ours* was modified, and they renamed the same file, but there is also a non-mergable type-change. + /// Here we keep both versions of the file. + OursModifiedTheirsRenamedTypeMismatch, + /// *ours* was deleted, but *theirs* was renamed. + OursDeletedTheirsRenamed, + /// *ours* was modified and *theirs* was deleted. We keep the modified one and ignore the deletion. + OursModifiedTheirsDeleted, + /// *ours* and *theirs* are in an untested state so it can't be handled yet, and is considered a conflict + /// without adding our *or* their side to the resulting tree. + Unknown, +} + +/// Information about a blob content merge for use in a [`Resolution`]. +/// Note that content merges always count as success to avoid duplication of cases, which forces callers +/// to check for the [`resolution`](Self::resolution) field. +#[derive(Debug, Copy, Clone)] +pub struct ContentMerge { + /// The fully merged blob. + pub merged_blob_id: gix_hash::ObjectId, + /// Identify the kind of resolution of the blob merge. Note that it may be conflicting. + pub resolution: crate::blob::Resolution, +} + +/// A way to configure [`tree()`](crate::tree()). +#[derive(Default, Debug, Clone)] +pub struct Options { + /// If *not* `None`, rename tracking will be performed when determining the changes of each side of the merge. + pub rewrites: Option, + /// Decide how blob-merges should be done. This relates to if conflicts can be resolved or not. + pub blob_merge: crate::blob::platform::merge::Options, + /// The context to use when invoking merge-drivers. + pub blob_merge_command_ctx: gix_command::Context, + /// If `Some(what-is-unresolved)`, the first unresolved conflict will cause the entire merge to stop. + /// This is useful to see if there is any conflict, without performing the whole operation. + // TODO: Maybe remove this if the cost of figuring out conflicts is so low - after all, the data structures + // and initial diff is the expensive thing right now, which are always done upfront. + // However, this could change once we know do everything during the traversal, which probably doesn't work + // without caching stuff and is too complicated to actually do. + pub fail_on_conflict: Option, + /// This value also affects the size of merge-conflict markers, to allow differentiating + /// merge conflicts on each level, for any value greater than 0, with values `N` causing `N*2` + /// markers to be added to the configured value. + /// + /// This is used automatically when merging merge-bases recursively. + pub marker_size_multiplier: u8, + /// If `None`, when symlinks clash *ours* will be chosen and a conflict will occur. + /// Otherwise, the same logic applies as for the merge of binary resources. + pub symlink_conflicts: Option, + /// If `true`, instead of issuing a conflict with [`ResolutionFailure`], do nothing and keep the base/ancestor + /// version. This is useful when one wants to avoid any kind of merge-conflict to have *some*, *lossy* resolution. + pub allow_lossy_resolution: bool, +} + +pub(super) mod function; +mod utils; diff --git a/gix-merge/src/tree/utils.rs b/gix-merge/src/tree/utils.rs new file mode 100644 index 00000000000..cd37f809e1e --- /dev/null +++ b/gix-merge/src/tree/utils.rs @@ -0,0 +1,572 @@ +//! ## About `debug_assert!() +//! +//! The idea is to have code that won't panic in production. Thus, if in production that assertion would fail, +//! we will rather let the code run and hope it will either be correct enough or fail in more graceful ways later. +//! +//! Once such a case becomes a bug and is reproduced in testing, the debug-assertion will kick in and hopefully +//! contribute to finding a fix faster. +use crate::blob::builtin_driver::binary::Pick; +use crate::blob::ResourceKind; +use crate::tree::{Conflict, ConflictMapping, Error, Options, Resolution, ResolutionFailure}; +use bstr::ByteSlice; +use bstr::{BStr, BString, ByteVec}; +use gix_diff::tree_with_rewrites::{Change, ChangeRef}; +use gix_hash::ObjectId; +use gix_object::tree; +use gix_object::tree::{EntryKind, EntryMode}; +use std::collections::HashMap; + +/// Assuming that `their_location` is the destination of *their* rewrite, check if *it* passes +/// over a directory rewrite in *our* tree. If so, rewrite it so that we get the path +/// it would have had if it had been renamed along with *our* directory. +pub fn possibly_rewritten_location( + check_tree: &mut TreeNodes, + their_location: &BStr, + our_changes: &ChangeListRef, +) -> Option { + check_tree.check_conflict(their_location).and_then(|pc| match pc { + PossibleConflict::PassedRewrittenDirectory { change_idx } => { + let passed_change = &our_changes[change_idx]; + rewrite_location_with_renamed_directory(their_location, &passed_change.inner) + } + _ => None, + }) +} + +pub fn rewrite_location_with_renamed_directory(their_location: &BStr, passed_change: &Change) -> Option { + match passed_change { + Change::Rewrite { + source_location, + location, + .. + } if passed_change.entry_mode().is_tree() => { + // This is safe even without dealing with slashes as we found this rewrite + // by walking each component, and we know it's a tree for added safety. + let suffix = their_location.strip_prefix(source_location.as_bytes())?; + let mut rewritten = location.to_owned(); + rewritten.push_str(suffix); + Some(rewritten) + } + _ => None, + } +} + +/// Produce a unique path within the directory that contains the file at `file_path` like `a/b`, using `editor` +/// and `tree` to assure unique names, to obtain the tree at `a/` and `side_name` to more clearly signal +/// where the file is coming from. +pub fn unique_path_in_tree( + file_path: &BStr, + editor: &tree::Editor<'_>, + tree: &mut TreeNodes, + side_name: &BStr, +) -> Result { + let mut buf = file_path.to_owned(); + buf.push(b'~'); + buf.extend( + side_name + .as_bytes() + .iter() + .copied() + .map(|b| if b == b'/' { b'_' } else { b }), + ); + + // We could use a cursor here, but clashes are so unlikely that this wouldn't be meaningful for performance. + let base_len = buf.len(); + let mut suffix = 0; + while editor.get(to_components_bstring_ref(&buf)).is_some() || tree.check_conflict(buf.as_bstr()).is_some() { + buf.truncate(base_len); + buf.push_str(format!("_{suffix}",)); + suffix += 1; + } + Ok(buf) +} + +/// Perform a merge between two blobs and return the result of its object id. +#[allow(clippy::too_many_arguments)] +pub fn perform_blob_merge( + mut labels: crate::blob::builtin_driver::text::Labels<'_>, + objects: &impl gix_object::FindObjectOrHeader, + blob_merge: &mut crate::blob::Platform, + buf: &mut Vec, + write_blob_to_odb: &mut impl FnMut(&[u8]) -> Result, + (our_location, our_id, our_mode): (&BString, ObjectId, EntryMode), + (their_location, their_id, their_mode): (&BString, ObjectId, EntryMode), + (previous_location, previous_id, previous_mode): (&BString, ObjectId, EntryMode), + (extra_markers, outer_side): (u8, ConflictMapping), + options: &Options, +) -> Result<(ObjectId, crate::blob::Resolution), Error> +where + E: Into>, +{ + if matches!(our_mode.kind(), EntryKind::Link) && matches!(their_mode.kind(), EntryKind::Link) { + let (pick, resolution) = crate::blob::builtin_driver::binary(options.symlink_conflicts); + let (our_id, their_id) = match outer_side { + ConflictMapping::Original => (our_id, their_id), + ConflictMapping::Swapped => (their_id, our_id), + }; + let id = match pick { + Pick::Ancestor => previous_id, + Pick::Ours => our_id, + Pick::Theirs => their_id, + }; + return Ok((id, resolution)); + } + let (our_kind, their_kind) = match outer_side { + ConflictMapping::Original => (ResourceKind::CurrentOrOurs, ResourceKind::OtherOrTheirs), + ConflictMapping::Swapped => (ResourceKind::OtherOrTheirs, ResourceKind::CurrentOrOurs), + }; + blob_merge.set_resource(our_id, our_mode.kind(), our_location.as_bstr(), our_kind, objects)?; + blob_merge.set_resource( + their_id, + their_mode.kind(), + their_location.as_bstr(), + their_kind, + objects, + )?; + blob_merge.set_resource( + previous_id, + previous_mode.kind(), + previous_location.as_bstr(), + ResourceKind::CommonAncestorOrBase, + objects, + )?; + + fn combined(side: &BStr, location: &BString) -> BString { + let mut buf = side.to_owned(); + buf.push_byte(b':'); + buf.push_str(location); + buf + } + + if outer_side.is_swapped() { + (labels.current, labels.other) = (labels.other, labels.current); + } + + let (ancestor, current, other); + let labels = if our_location == their_location { + labels + } else { + ancestor = labels.ancestor.map(|side| combined(side, previous_location)); + current = labels.current.map(|side| combined(side, our_location)); + other = labels.other.map(|side| combined(side, their_location)); + crate::blob::builtin_driver::text::Labels { + ancestor: ancestor.as_ref().map(|n| n.as_bstr()), + current: current.as_ref().map(|n| n.as_bstr()), + other: other.as_ref().map(|n| n.as_bstr()), + } + }; + let prep = blob_merge.prepare_merge(objects, with_extra_markers(options, extra_markers))?; + let (pick, resolution) = prep.merge(buf, labels, &options.blob_merge_command_ctx)?; + + let merged_blob_id = prep + .id_by_pick(pick, buf, write_blob_to_odb) + .map_err(|err| Error::WriteBlobToOdb(err.into()))? + .ok_or(Error::MergeResourceNotFound)?; + Ok((merged_blob_id, resolution)) +} + +fn with_extra_markers(opts: &Options, extra_makers: u8) -> crate::blob::platform::merge::Options { + let mut out = opts.blob_merge; + if let crate::blob::builtin_driver::text::Conflict::Keep { marker_size, .. } = &mut out.text.conflict { + *marker_size = + marker_size.saturating_add(extra_makers.saturating_add(opts.marker_size_multiplier.saturating_mul(2))); + } + out +} + +/// A way to attach metadata to each change. +#[derive(Debug)] +pub struct TrackedChange { + /// The actual change + pub inner: Change, + /// If `true`, this change counts as written to the tree using a [`tree::Editor`]. + pub was_written: bool, + /// If `Some(ours_idx_to_ignore)`, this change must be placed into the tree before handling it. + /// This makes sure that new changes aren't visible too early, which would mean the algorithm + /// knows things too early which can be misleading. + /// The `ours_idx_to_ignore` assures that the same rewrite won't be used as matching side, which + /// would lead to strange effects. Only set if it's a rewrite though. + pub needs_tree_insertion: Option>, + /// A new `(location, change_idx)` pair for the change that can happen if the location is touching a rewrite in a parent + /// directory, but otherwise doesn't have a match. This means we shall redo the operation but with + /// the changed path. + /// The second tuple entry `change_idx` is the change-idx we passed over, which refers to the other side that interfered. + pub rewritten_location: Option<(BString, usize)>, +} + +pub type ChangeList = Vec; +pub type ChangeListRef = [TrackedChange]; + +/// Only keep leaf nodes, or trees that are the renamed, pushing `change` on `changes`. +/// Doing so makes it easy to track renamed or rewritten or copied directories, and properly +/// handle *their* changes that fall within them. +/// Note that it also rewrites `change` if it is a copy, turning it into an addition so copies don't have an effect +/// on the merge algorithm. +pub fn track(change: ChangeRef<'_>, changes: &mut ChangeList) { + if change.entry_mode().is_tree() && matches!(change, ChangeRef::Modification { .. }) { + return; + } + let is_tree = change.entry_mode().is_tree(); + changes.push(TrackedChange { + inner: match change.into_owned() { + Change::Rewrite { + id, + entry_mode, + location, + relation, + copy, + .. + } if copy => Change::Addition { + location, + relation, + entry_mode, + id, + }, + other => other, + }, + was_written: is_tree, + needs_tree_insertion: None, + rewritten_location: None, + }); +} + +/// Unconditionally apply `change` to `editor`. +pub fn apply_change( + editor: &mut tree::Editor<'_>, + change: &Change, + alternative_location: Option<&BString>, +) -> Result<(), tree::editor::Error> { + use to_components_bstring_ref as to_components; + if change.entry_mode().is_tree() { + return Ok(()); + } + + let (location, mode, id) = match change { + Change::Addition { + location, + entry_mode, + id, + .. + } + | Change::Modification { + location, + entry_mode, + id, + .. + } => (location, entry_mode, id), + Change::Deletion { location, .. } => { + editor.remove(to_components(alternative_location.unwrap_or(location)))?; + return Ok(()); + } + Change::Rewrite { + source_location, + entry_mode, + id, + location, + copy, + .. + } => { + if !*copy { + editor.remove(to_components(source_location))?; + } + (location, entry_mode, id) + } + }; + + editor.upsert( + to_components(alternative_location.unwrap_or(location)), + mode.kind(), + *id, + )?; + Ok(()) +} + +/// A potential conflict that needs to be checked. It comes in several varieties and always happens +/// if paths overlap in some way between *theirs* and *ours*. +#[derive(Debug)] +pub enum PossibleConflict { + /// *our* changes have a tree here, but *they* place a non-tree or edit an existing item (that we removed). + TreeToNonTree { + /// The possibly available change at this node. + change_idx: Option, + }, + /// A non-tree in *our* tree turned into a tree in *theirs* - this can be done with additions in *theirs*, + /// or if we added a blob, while they added a directory. + NonTreeToTree { + /// The possibly available change at this node. + change_idx: Option, + }, + /// A perfect match, i.e. *our* change at `a/b/c` corresponds to *their* change at the same path. + Match { + /// The index to *our* change at *their* path. + change_idx: usize, + }, + /// *their* change at `a/b/c` passed `a/b` which is an index to *our* change indicating a directory that was rewritten, + /// with all its contents being renamed. However, *theirs* has been added *into* that renamed directory. + PassedRewrittenDirectory { change_idx: usize }, +} + +impl PossibleConflict { + pub(super) fn change_idx(&self) -> Option { + match self { + PossibleConflict::TreeToNonTree { change_idx, .. } | PossibleConflict::NonTreeToTree { change_idx, .. } => { + *change_idx + } + PossibleConflict::Match { change_idx, .. } + | PossibleConflict::PassedRewrittenDirectory { change_idx, .. } => Some(*change_idx), + } + } +} + +/// The flat list of all tree-nodes so we can avoid having a linked-tree using pointers +/// which is useful for traversal and initial setup as that can then trivially be non-recursive. +pub struct TreeNodes(Vec); + +/// Trees lead to other trees, or leafs (without children), and it can be represented by a renamed directory. +#[derive(Debug, Default, Clone)] +struct TreeNode { + /// A mapping of path components to their children to quickly see if `theirs` in some way is potentially + /// conflicting with `ours`. + children: HashMap, + /// The index to a change, which is always set if this is a leaf node (with no children), and if there are children and this + /// is a rewritten tree. + change_idx: Option, + /// Keep track of where the location of this node is derived from. + location: ChangeLocation, +} + +#[derive(Debug, Default, Clone, Copy)] +enum ChangeLocation { + /// The change is at its current (and only) location, or in the source location of a rename. + #[default] + CurrentLocation, + /// This is always the destination of a rename. + RenamedLocation, +} + +impl TreeNode { + fn is_leaf_node(&self) -> bool { + self.children.is_empty() + } +} + +impl TreeNodes { + pub fn new() -> Self { + TreeNodes(vec![TreeNode::default()]) + } + + /// Insert our `change` at `change_idx`, into a linked-tree, assuring that each `change` is non-conflicting + /// with this tree structure, i.e. each leaf path is only seen once. + /// Note that directories can be added in between. + pub fn track_change(&mut self, change: &Change, change_idx: usize) { + for (path, location_hint) in [ + Some((change.source_location(), ChangeLocation::CurrentLocation)), + match change { + Change::Addition { .. } | Change::Deletion { .. } | Change::Modification { .. } => None, + Change::Rewrite { location, .. } => Some((location.as_bstr(), ChangeLocation::RenamedLocation)), + }, + ] + .into_iter() + .flatten() + { + let mut components = to_components(path).peekable(); + let mut next_index = self.0.len(); + let mut cursor = &mut self.0[0]; + while let Some(component) = components.next() { + let is_last = components.peek().is_none(); + match cursor.children.get(component).copied() { + None => { + let new_node = TreeNode { + children: Default::default(), + change_idx: is_last.then_some(change_idx), + location: location_hint, + }; + cursor.children.insert(component.to_owned(), next_index); + self.0.push(new_node); + cursor = &mut self.0[next_index]; + next_index += 1; + } + Some(index) => { + cursor = &mut self.0[index]; + if is_last && !cursor.is_leaf_node() { + assert_eq!( + cursor.change_idx, None, + "BUG: each node should only see a single change when tracking initially: {path} {change_idx}" + ); + cursor.change_idx = Some(change_idx); + } + } + } + } + } + } + + /// Search the tree with `our` changes for `theirs` by [`source_location()`](Change::source_location())). + /// If there is an entry but both are the same, or if there is no entry, return `None`. + pub fn check_conflict(&mut self, theirs_location: &BStr) -> Option { + if self.0.len() == 1 { + return None; + } + let components = to_components(theirs_location); + let mut cursor = &mut self.0[0]; + let mut cursor_idx = 0; + let mut intermediate_change = None; + for component in components { + if cursor.change_idx.is_some() { + intermediate_change = cursor.change_idx.map(|change_idx| (change_idx, cursor_idx)); + } + match cursor.children.get(component).copied() { + // *their* change is outside *our* tree + None => { + let res = if cursor.is_leaf_node() { + Some(PossibleConflict::NonTreeToTree { + change_idx: cursor.change_idx, + }) + } else { + // a change somewhere else, i.e. `a/c` and we know `a/b` only. + intermediate_change.and_then(|(change, cursor_idx)| { + let cursor = &mut self.0[cursor_idx]; + // If this is a destination location of a rename, then the `their_location` + // is already at the right spot, and we can just ignore it. + if matches!(cursor.location, ChangeLocation::CurrentLocation) { + Some(PossibleConflict::PassedRewrittenDirectory { change_idx: change }) + } else { + None + } + }) + }; + return res; + } + Some(child_idx) => { + cursor_idx = child_idx; + cursor = &mut self.0[cursor_idx]; + } + } + } + + if cursor.is_leaf_node() { + PossibleConflict::Match { + change_idx: cursor.change_idx.expect("leaf nodes always have a change"), + } + } else { + PossibleConflict::TreeToNonTree { + change_idx: cursor.change_idx, + } + } + .into() + } + + /// Compare both changes and return `true` if they are *not* exactly the same. + /// One two changes are the same, they will have the same effect. + /// Since this is called after [`Self::check_conflict`], *our* change will not be applied, + /// only theirs, which naturally avoids double-application + /// (which shouldn't have side effects, but let's not risk it) + pub fn is_not_same_change_in_possible_conflict( + &self, + theirs: &Change, + conflict: &PossibleConflict, + our_changes: &ChangeListRef, + ) -> bool { + conflict + .change_idx() + .map_or(true, |idx| our_changes[idx].inner != *theirs) + } + + pub fn remove_existing_leaf(&mut self, location: &BStr) { + self.remove_leaf_inner(location, true); + } + + pub fn remove_leaf(&mut self, location: &BStr) { + self.remove_leaf_inner(location, false); + } + + fn remove_leaf_inner(&mut self, location: &BStr, must_exist: bool) { + let mut components = to_components(location).peekable(); + let mut cursor = &mut self.0[0]; + while let Some(component) = components.next() { + match cursor.children.get(component).copied() { + None => assert!(!must_exist, "didn't find '{location}' for removal"), + Some(existing_idx) => { + let is_last = components.peek().is_none(); + if is_last { + cursor.children.remove(component); + cursor = &mut self.0[existing_idx]; + debug_assert!( + cursor.is_leaf_node(), + "BUG: we should really only try to remove leaf nodes" + ); + cursor.change_idx = None; + } else { + cursor = &mut self.0[existing_idx]; + } + } + } + } + } + + /// Insert `new_change` which affects this tree into it and put it into `storage` to obtain the index. + /// Panic if that change already exists as it must be made so that it definitely doesn't overlap with this tree. + pub fn insert(&mut self, new_change: &Change, new_change_idx: usize) { + let mut next_index = self.0.len(); + let mut cursor = &mut self.0[0]; + for component in to_components(new_change.location()) { + match cursor.children.get(component).copied() { + None => { + cursor.children.insert(component.to_owned(), next_index); + self.0.push(TreeNode::default()); + cursor = &mut self.0[next_index]; + next_index += 1; + } + Some(existing_idx) => { + cursor = &mut self.0[existing_idx]; + } + } + } + + debug_assert!( + !matches!(new_change, Change::Rewrite { .. }), + "BUG: we thought we wouldn't do that current.location is related?" + ); + cursor.change_idx = Some(new_change_idx); + cursor.location = ChangeLocation::CurrentLocation; + } +} + +pub fn to_components_bstring_ref(rela_path: &BString) -> impl Iterator { + rela_path.split(|b| *b == b'/').map(Into::into) +} + +pub fn to_components(rela_path: &BStr) -> impl Iterator { + rela_path.split(|b| *b == b'/').map(Into::into) +} + +impl Conflict { + pub(super) fn without_resolution( + resolution: ResolutionFailure, + changes: (&Change, &Change, ConflictMapping, ConflictMapping), + ) -> Self { + Conflict::maybe_resolved(Err(resolution), changes) + } + + pub(super) fn with_resolution( + resolution: Resolution, + changes: (&Change, &Change, ConflictMapping, ConflictMapping), + ) -> Self { + Conflict::maybe_resolved(Ok(resolution), changes) + } + + pub(super) fn maybe_resolved( + resolution: Result, + (ours, theirs, map, outer_map): (&Change, &Change, ConflictMapping, ConflictMapping), + ) -> Self { + Conflict { + resolution, + ours: ours.clone(), + theirs: theirs.clone(), + map: match outer_map { + ConflictMapping::Original => map, + ConflictMapping::Swapped => map.swapped(), + }, + } + } +} diff --git a/gix-merge/tests/fixtures/generated-archives/make_blob_repo.tar b/gix-merge/tests/fixtures/generated-archives/make_blob_repo.tar index 9105e6caa79..d190eeee11d 100644 Binary files a/gix-merge/tests/fixtures/generated-archives/make_blob_repo.tar and b/gix-merge/tests/fixtures/generated-archives/make_blob_repo.tar differ diff --git a/gix-merge/tests/fixtures/generated-archives/text-baseline.tar b/gix-merge/tests/fixtures/generated-archives/text-baseline.tar index 8bd4e8f2244..c0601e39ede 100644 Binary files a/gix-merge/tests/fixtures/generated-archives/text-baseline.tar and b/gix-merge/tests/fixtures/generated-archives/text-baseline.tar differ diff --git a/gix-merge/tests/fixtures/generated-archives/tree-baseline.tar b/gix-merge/tests/fixtures/generated-archives/tree-baseline.tar new file mode 100644 index 00000000000..4b74b96874d Binary files /dev/null and b/gix-merge/tests/fixtures/generated-archives/tree-baseline.tar differ diff --git a/gix-merge/tests/fixtures/make_blob_repo.sh b/gix-merge/tests/fixtures/make_blob_repo.sh index 8f4d23f38ec..59f3ed046d8 100644 --- a/gix-merge/tests/fixtures/make_blob_repo.sh +++ b/gix-merge/tests/fixtures/make_blob_repo.sh @@ -11,7 +11,7 @@ echo unset > unset echo unspecified > unspecified cat <.gitattributes -just-set merge +just-set merge conflict-marker-size=32 b merge=b union merge=union missing merge=missing diff --git a/gix-merge/tests/fixtures/text-baseline.sh b/gix-merge/tests/fixtures/text-baseline.sh index 17d954aa278..a943d92c3d3 100644 --- a/gix-merge/tests/fixtures/text-baseline.sh +++ b/gix-merge/tests/fixtures/text-baseline.sh @@ -12,8 +12,11 @@ function baseline() { shift 4 git merge-file --stdout "$@" "$ours" "$base" "$theirs" > "$output" || true - echo "$ours" "$base" "$theirs" "$output" "$@" >> baseline.cases + + local output="${output}-reversed" + git merge-file --stdout "$@" "$theirs" "$base" "$ours" > "${output}" || true + echo "$theirs" "$base" "$ours" "${output}" "$@" >> baseline-reversed.cases } mkdir simple diff --git a/gix-merge/tests/fixtures/tree-baseline.sh b/gix-merge/tests/fixtures/tree-baseline.sh new file mode 100644 index 00000000000..928a52abea5 --- /dev/null +++ b/gix-merge/tests/fixtures/tree-baseline.sh @@ -0,0 +1,864 @@ +#!/usr/bin/env bash +set -eu -o pipefail + +function tick () { + if test -z "${tick+set}" + then + tick=1112911993 + else + tick=$(($tick + 60)) + fi + GIT_COMMITTER_DATE="$tick -0700" + GIT_AUTHOR_DATE="$tick -0700" + export GIT_COMMITTER_DATE GIT_AUTHOR_DATE +} + +function write_lines () { + printf "%s\n" "$@" +} + +function seq () { + case $# in + 1) set 1 "$@" ;; + 2) ;; + *) { echo "need 1 or 2 parameters: or " 1>&2 && exit 2; } ;; + esac + local seq_counter=$1 + while test "$seq_counter" -le "$2" + do + echo "$seq_counter" + seq_counter=$(( seq_counter + 1 )) + done +} + +function baseline () ( + local dir=${1:?the directory to enter} + local output_name=${2:?the basename of the output of the merge} + local our_committish=${3:?our side from which a commit can be derived} + local their_committish=${4:?Their side from which a commit can be derived} + local opt_deviation_message=${5:-} + local one_side=${6:-} + + cd "$dir" + local our_commit_id + local their_commit_id + + local conflict_style="merge" + if [[ "$output_name" == *-merge ]]; then + conflict_style="merge" + elif [[ "$output_name" == *-diff3 ]]; then + conflict_style="diff3" + fi + + our_commit_id="$(git rev-parse "$our_committish")" + their_commit_id="$(git rev-parse "$their_committish")" + local maybe_expected_tree="$(git rev-parse expected^{tree})" + if [ -z "$opt_deviation_message" ]; then + maybe_expected_tree="expected^{tree}" + fi + + local merge_info="${output_name}.merge-info" + git -c merge.conflictStyle=$conflict_style merge-tree -z --write-tree --allow-unrelated-histories "$our_committish" "$their_committish" > "$merge_info" || : + echo "$dir" "$conflict_style" "$our_commit_id" "$our_committish" "$their_commit_id" "$their_committish" "$merge_info" "$maybe_expected_tree" "$opt_deviation_message" >> ../baseline.cases + + if [[ "$one_side" != "no-reverse" ]]; then + local merge_info="${output_name}-reversed.merge-info" + git -c merge.conflictStyle=$conflict_style merge-tree -z --write-tree --allow-unrelated-histories "$their_committish" "$our_committish" > "$merge_info" || : + echo "$dir" "$conflict_style" "$their_commit_id" "$their_committish" "$our_commit_id" "$our_committish" "$merge_info" "$maybe_expected_tree" "$opt_deviation_message" >> ../baseline.cases + fi +) + +git init simple +(cd simple + rm -Rf .git/hooks + write_lines 1 2 3 4 5 >numbers + echo hello >greeting + echo foo >whatever + git add numbers greeting whatever + tick + git commit -m initial + + git branch side1 + git branch side2 + git branch side3 + git branch side4 + + git checkout side1 + write_lines 1 2 3 4 5 6 >numbers + echo hi >greeting + echo bar >whatever + git add numbers greeting whatever + tick + git commit -m modify-stuff + + git checkout side2 + write_lines 0 1 2 3 4 5 >numbers + echo yo >greeting + git rm whatever + mkdir whatever + >whatever/empty + git add numbers greeting whatever/empty + tick + git commit -m other-modifications + + git checkout side3 + git mv numbers sequence + tick + git commit -m rename-numbers + + git checkout side4 + write_lines 0 1 2 3 4 5 >numbers + echo yo >greeting + git add numbers greeting + tick + git commit -m other-content-modifications + + git switch --orphan unrelated + >something-else + git add something-else + tick + git commit -m first-commit + + git checkout -b tweak1 side1 + write_lines zero 1 2 3 4 5 6 >numbers + git add numbers + git mv numbers "Αυτά μου φαίνονται κινέζικα" + git commit -m "Renamed numbers" +) + +git init rename-delete +(cd rename-delete + write_lines 1 2 3 4 5 >foo + mkdir olddir + for i in a b c; do echo $i >olddir/$i; done + git add foo olddir + git commit -m "original" + + git branch A + git branch B + + git checkout A + write_lines 1 2 3 4 5 6 >foo + git add foo + git mv olddir newdir + git commit -m "Modify foo, rename olddir to newdir" + + git checkout B + write_lines 1 2 3 4 5 six >foo + git add foo + git mv foo olddir/bar + git commit -m "Modify foo & rename foo -> olddir/bar" +) + +git init rename-add +(cd rename-add + write_lines original 1 2 3 4 5 >foo + git add foo + git commit -m "original" + + git branch A + git branch B + + git checkout A + write_lines 1 2 3 4 5 >foo + echo "different file" >bar + git add foo bar + git commit -m "Modify foo, add bar" + + git checkout B + write_lines original 1 2 3 4 5 6 >foo + git add foo + git mv foo bar + git commit -m "rename foo to bar" +) + +git init rename-add-symlink +(cd rename-add-symlink + write_lines original 1 2 3 4 5 >foo + git add foo + git commit -m "original" + + git branch A + git branch B + + git checkout A + write_lines 1 2 3 4 5 >foo + ln -s foo bar + git add foo bar + git commit -m "Modify foo, add symlink bar" + + git checkout B + write_lines original 1 2 3 4 5 6 >foo + git add foo + git mv foo bar + git commit -m "rename foo to bar" +) + +git init rename-rename-plus-content +(cd rename-rename-plus-content + write_lines 1 2 3 4 5 >foo + git add foo + git commit -m "original" + + git branch A + git branch B + + git checkout A + write_lines 1 2 3 4 5 six >foo + git add foo + git mv foo bar + git commit -m "Modify foo + rename to bar" + + git checkout B + write_lines 1 2 3 4 5 6 >foo + git add foo + git mv foo baz + git commit -m "Modify foo + rename to baz" +) + +git init rename-add-delete +( + cd rename-add-delete + echo "original file" >foo + git add foo + git commit -m "original" + + git branch A + git branch B + + git checkout A + git rm foo + echo "different file" >bar + git add bar + git commit -m "Remove foo, add bar" + + git checkout B + git mv foo bar + git commit -m "rename foo to bar" +) + +git init rename-rename-delete-delete +( + cd rename-rename-delete-delete + echo foo >foo + echo bar >bar + git add foo bar + git commit -m O + + git branch A + git branch B + + git checkout A + git mv foo baz + git rm bar + git commit -m "Rename foo, remove bar" + + git checkout B + git mv bar baz + git rm foo + git commit -m "Rename bar, remove foo" +) + +git init super-1 +(cd super-1 + seq 11 19 >one + seq 31 39 >three + seq 51 59 >five + git add . + tick + git commit -m "O" + + git branch A + git branch B + + git checkout A + seq 10 19 >one + echo 40 >>three + git add one three + git mv one two + git mv three four + git mv five six + tick + git commit -m "A" + + git checkout B + echo 20 >>one + echo forty >>three + echo 60 >>five + git add one three five + git mv one six + git mv three two + git mv five four + tick + git commit -m "B" +) + +git init super-2 +(cd super-2 + write_lines 1 2 3 4 5 >foo + mkdir olddir + for i in a b c; do echo $i >olddir/$i || exit 1; done + git add foo olddir + git commit -m "original" + + git branch A + git branch B + + git checkout A + git rm foo + git mv olddir newdir + mkdir newdir/bar + >newdir/bar/file + git add newdir/bar/file + git commit -m "rm foo, olddir/ -> newdir/, + newdir/bar/file" + + git checkout B + write_lines 1 2 3 4 5 6 >foo + git add foo + git mv foo olddir/bar + git commit -m "Modify foo & rename foo -> olddir/bar" +) + +git init rename-within-rename +(cd rename-within-rename + mkdir a && write_lines original 1 2 3 4 5 >a/x.f + mkdir a/sub && write_lines original 1 2 3 4 5 >a/sub/y.f + touch a/w a/sub/z + git add . && git commit -m "original" + + git branch A + git branch B + git branch expected + + git checkout A + write_lines 1 2 3 4 5 >a/x.f + write_lines 1 2 3 4 5 >a/sub/y.f + git mv a a-renamed + git commit -am "changed all content, renamed a -> a-renamed" + + git checkout B + write_lines original 1 2 3 4 5 6 >a/x.f + write_lines original 1 2 3 4 5 6 >a/sub/y.f + git mv a/sub a/sub-renamed + git commit -am "changed all content, renamed a/sub -> a/sub-renamed" + + git checkout expected + write_lines 1 2 3 4 5 6 >a/x.f + write_lines 1 2 3 4 5 6 >a/sub/y.f + git mv a/sub a/sub-renamed + git mv a a-renamed + git commit -am "tracked both renames, applied all modifications by merge" +) + +git init rename-within-rename-2 +(cd rename-within-rename-2 + mkdir a && write_lines original 1 2 3 4 5 >a/x.f + mkdir a/sub && write_lines original 1 2 3 4 5 >a/sub/y.f + touch a/w a/sub/z + git add . && git commit -m "original" + + git branch A + git branch B + git branch expected + + git checkout A + write_lines 1 2 3 4 5 >a/x.f + write_lines 1 2 3 4 5 >a/sub/y.f + git mv a/sub a/sub-renamed + git mv a a-renamed + git commit -am "changed all content, renamed a -> a-renamed, a/sub -> a/sub-renamed" + + git checkout B + write_lines original 1 2 3 4 5 6 >a/x.f + write_lines original 1 2 3 4 5 6 >a/sub/y.f + git mv a/sub a/sub-renamed + git commit -am "changed all content, renamed a/sub -> a/sub-renamed" + + git checkout expected + write_lines 1 2 3 4 5 6 >a/x.f + write_lines 1 2 3 4 5 6 >a/sub/y.f + git mv a/sub a/sub-renamed + git mv a a-renamed + git commit -am "tracked both renames, applied all modifications by merge" +) + +git init conflicting-rename +(cd conflicting-rename + mkdir a && write_lines original 1 2 3 4 5 >a/x.f + mkdir a/sub && write_lines original 1 2 3 4 5 >a/sub/y.f + touch a/w a/sub/z + git add . && git commit -m "original" + + git branch A + git branch B + + git checkout A + write_lines 1 2 3 4 5 >a/x.f + write_lines 1 2 3 4 5 >a/sub/y.f + git mv a a-renamed + git commit -am "changed all content, renamed a -> a-renamed" + + git checkout B + write_lines original 1 2 3 4 5 6 >a/x.f + write_lines original 1 2 3 4 5 6 >a/sub/y.f + git mv a a-different + git commit -am "changed all content, renamed a -> a-different" +) + +git init conflicting-rename-2 +(cd conflicting-rename-2 + mkdir a && write_lines original 1 2 3 4 5 >a/x.f + mkdir a/sub && write_lines original 1 2 3 4 5 >a/sub/y.f + touch a/w a/sub/z + git add . && git commit -m "original" + + git branch A + git branch B + + git checkout A + write_lines 1 2 3 4 5 >a/x.f + write_lines 1 2 3 4 5 >a/sub/y.f + git mv a/sub a/sub-renamed + git commit -am "changed all content, renamed a/sub -> a/sub-renamed" + + git checkout B + write_lines original 1 2 3 4 5 6 >a/x.f + write_lines original 1 2 3 4 5 6 >a/sub/y.f + git mv a/sub a/sub-different + git commit -am "changed all content, renamed a/sub -> a/sub-different" +) + +git init conflicting-rename-complex +(cd conflicting-rename-complex + mkdir a && write_lines original 1 2 3 4 5 >a/x.f + mkdir a/sub && write_lines original 1 2 3 4 5 >a/sub/y.f + touch a/w a/sub/z + git add . && git commit -m "original" + + git branch A + git branch B + git branch expected + + git checkout A + write_lines 1 2 3 4 5 >a/x.f + write_lines 1 2 3 4 5 >a/sub/y.f + git mv a a-renamed + git commit -am "changed all content, renamed a -> a-renamed" + + git checkout B + write_lines original 1 2 3 4 5 6 >a/sub/y.f + git mv a/sub tmp + git rm -r a + git mv tmp a + git commit -am "change something in subdirectory, then overwrite directory with subdirectory" + + git checkout expected + rm .git/index + rm -Rf ./a + mkdir -p a-renamed/sub + write_lines 1 2 3 4 5 >a-renamed/sub/y.f + write_lines 1 2 3 4 5 6 >a-renamed/y.f + touch a-renamed/z a-renamed/sub/z + git add . + git commit -m "Close to what Git has, but different due to rename tracking (which looses 'a/w', and 'x.f' becomes y.f). But the merge is so 'erroneous' that it's beyond rescue" +) + +git init same-rename-different-mode +(cd same-rename-different-mode + mkdir a && write_lines original 1 2 3 4 5 >a/x.f + touch a/w + git add . && git commit -m "original" + + git branch A + git branch B + git branch expected + + git checkout A + write_lines 1 2 3 4 5 >a/x.f + chmod +x a/x.f + chmod +x a/w + git mv a a-renamed + git commit -am "changed all content, add +x, renamed a -> a-renamed" + + git checkout B + write_lines original 1 2 3 4 5 6 >a/x.f + git mv a a-renamed + git commit -am "changed all content, renamed a -> a-renamed" + + git checkout expected + chmod +x a/x.f + chmod +x a/w + write_lines 1 2 3 4 5 6 >a/x.f + git mv a a-renamed + git commit -am "Git, when branches are reversed, doesn't keep the +x flag on a/w so we specify our own expectation" +) + +git init renamed-symlink-with-conflict +(cd renamed-symlink-with-conflict + mkdir a && write_lines original 1 2 3 4 5 >a/x.f + ln -s a/x.f link + git add . && git commit -m "original" + + git branch A + git branch B + + git checkout A + write_lines 1 2 3 4 5 >a/x.f + git mv link link-renamed + git commit -am "changed a/x.f, renamed link -> link-renamed" + + git checkout B + write_lines original 1 2 3 4 5 6 >a/x.f + git mv link link-different + git commit -am "change content, renamed link -> link-different" +) + +git init added-file-changed-content-and-mode +(cd added-file-changed-content-and-mode + mkdir a && write_lines original 1 2 3 4 5 >a/x.f + git add . && git commit -m "original" + + git branch A + git branch B + git branch expected + + git checkout A + write_lines 1 2 3 4 5 >new + git add . + git commit -m "add 'new' with content A" + + git checkout B + write_lines original 1 2 3 4 5 6 >new + chmod +x new + git add . + git commit -m "add new with content B and +x" + + git checkout expected + echo -n $'<<<<<<< A\n1\n2\n3\n4\n5\n=======\noriginal\n1\n2\n3\n4\n5\n6\n>>>>>>> B\n' >new + chmod +x new + git add new + git commit -m "Git has a better merge here, but that's due to better hunk handling/hunk splitting. We, however, consistently use +x" +) + +git init type-change-and-renamed +(cd type-change-and-renamed + mkdir a && >a/x.f + ln -s a/x.f link + git add . && git commit -m "original" + + git branch A + git branch B + + git checkout A + rm link && echo not-link > link + git commit -am "link type-changed, file changed" + + git checkout B + git mv link link-renamed + git commit -am "just renamed the link" +) + +git init change-and-delete +(cd change-and-delete + mkdir a && write_lines original 1 2 3 4 5 >a/x.f + ln -s a/x.f link + git add . && git commit -m "original" + + git branch A + git branch B + + git checkout A + write_lines 1 2 3 4 5 6 >a/x.f + rm link && echo not-link > link + git commit -am "link type-changed, file changed" + + git checkout B + git rm link a/x.f + git commit -am "delete everything" +) + +git init submodule-both-modify +(cd submodule-both-modify + mkdir sub + (cd sub + git init + echo original > file + git add file + tick + git commit -m sub-root + ) + git add sub + tick + git commit -m root + + git branch expected + + git checkout -b A main + (cd sub + echo A > file + git add file + tick + git commit -m sub-a + ) + git add sub + tick + git commit -m a + + git checkout -b B main + (cd sub + echo B > file + git add file + tick + git commit -m sub-b + ) + git add sub + tick + git commit -m b +) + +git init both-modify-union-attr +(cd both-modify-union-attr + mkdir a && write_lines original 1 2 3 4 5 >a/x.f + echo "a/* merge=union" >.gitattributes + git add . && git commit -m "original" + + git branch A + git branch B + + git checkout A + write_lines A 1 2 3 4 5 6 >a/x.f + git commit -am "change file" + + git checkout B + write_lines B 1 2 3 4 5 7 >a/x.f + git commit -am "change file differently" +) + +git init both-modify-binary +(cd both-modify-binary + mkdir a && printf '\x00 binary' >a/x.f + git add . && git commit -m "original" + + git branch A + git branch B + + git checkout A + printf '\x00 A' >a/x.f + git commit -am "change binary file" + + git checkout B + printf '\x00 B' >a/x.f + git commit -am "change binary file differently" +) + +git init both-modify-file-with-binary-attr +(cd both-modify-file-with-binary-attr + mkdir a && echo 'not binary' >a/x.f + git add . && git commit -m "original" + + git branch A + git branch B + + git checkout A + echo 'A binary' >a/x.f + git commit -am "change pseudo-binary file" + + git checkout B + echo 'B binary' >a/x.f + git commit -am "change pseudo-binary file differently" +) + +git init big-file-merge +(cd big-file-merge + git config --local core.bigFileThreshold 80 + mkdir a && write_lines original 1 2 3 4 5 >a/x.f + git add . && git commit -m "original" + + git branch A + git branch B + + git checkout A + seq 30 >a/x.f + git commit -am "turn normal file into big one (81 bytes)" + git branch expected + + git checkout B + write_lines 1 2 3 4 5 6 >a/x.f + git commit -am "a normal but conflicting file change" +) + +git init no-merge-base +(cd no-merge-base + git checkout -b A + echo "A" >content && git add . && git commit -m "content A" + + git checkout --orphan B + echo "B" >content && git add . && git commit -m "content B" + + git checkout -b expectation +) + +git init multiple-merge-bases +(cd multiple-merge-bases + write_lines 1 2 3 4 5 >content + git add . && git commit -m "initial" + + git branch A + git branch B + + git checkout A + write_lines 0 1 2 3 4 5 >content + git commit -am "change in A" && git tag A1 + + git checkout B + write_lines 1 2 3 4 5 6 >content + git commit -am "change in B" && git tag B1 + + git checkout A + git merge B1 + + git checkout B + git merge A1 + + git checkout A + write_lines 0 1 2 3 4 5 A >content + git commit -am "conflicting in A" + + git checkout B + git rm content + write_lines 0 2 3 4 5 six >renamed + git commit -m "rename in B" +) + +git init rename-and-modification +(cd rename-and-modification + mkdir a && write_lines original 1 2 3 4 5 >a/x.f + git add . && git commit -m "original" + + git branch A + git branch B + + git checkout A + git mv a/x.f x.f + git commit -am "move a/x.f to the top-level" + + git checkout B + write_lines 1 2 3 4 5 6 >a/x.f + git commit -am "changed a/x.f" +) + +git init symlink-modification +(cd symlink-modification + touch a b o + ln -s o link + git add . && git commit -m "original" + + git branch A + git branch B + + git checkout A + rm link && ln -s a link + git commit -am "set link to point to 'a'" + + git checkout B + rm link && ln -s b link + git commit -am "set link to point to 'b'" +) + +git init symlink-addition +(cd symlink-addition + touch a b + git add . && git commit -m "original without symlink" + + git branch A + git branch B + + git checkout A + ln -s a link && git add . + git commit -m "new link to point to 'a'" + + git checkout B + ln -s b link && git add . + git commit -m "new link to point to 'b'" +) + +git init type-change-to-symlink +(cd type-change-to-symlink + touch a b link + git add . && git commit -m "original without symlink" + + git branch A + git branch B + + git checkout A + git rm link + ln -s a link && git add . + git commit -m "new link to point to 'a'" + + git checkout B + git rm link + ln -s b link && git add . + git commit -m "new link to point to 'b'" +) + + + +baseline simple side-1-3-without-conflict side1 side3 +baseline simple fast-forward side1 main +baseline simple no-change main main +baseline simple side-1-3-without-conflict-diff3 side1 side3 +baseline simple side-1-2-various-conflicts side1 side2 +baseline simple side-1-2-various-conflicts-diff3 side1 side2 +baseline simple single-content-conflict side1 side4 +baseline simple single-content-conflict-diff3 side1 side4 +baseline simple tweak1-side2 tweak1 side2 +baseline simple tweak1-side2-diff3 tweak1 side2 +baseline simple side-1-unrelated side1 unrelated +baseline simple side-1-unrelated-diff3 side1 unrelated +baseline rename-delete A-B A B +baseline rename-delete A-similar A A +baseline rename-delete B-similar B B +baseline rename-add A-B A B +baseline rename-add A-B-diff3 A B +baseline rename-add-symlink A-B A B +baseline rename-add-symlink A-B-diff3 A B +baseline rename-rename-plus-content A-B A B +baseline rename-rename-plus-content A-B-diff3 A B +baseline rename-add-delete A-B A B +baseline rename-rename-delete-delete A-B A B +baseline super-1 A-B A B +baseline super-1 A-B-diff3 A B +baseline super-2 A-B A B +baseline super-2 A-B-diff3 A B + +baseline rename-within-rename A-B-deviates A B "Git doesn't detect the rename-nesting so there is duplication - we achieve the optimal result" +baseline rename-within-rename-2 A-B-deviates A B "TBD: Right, something is different documentation was forgotten :/" +baseline conflicting-rename A-B A B +baseline conflicting-rename-2 A-B A B +baseline conflicting-rename-complex A-B A B "Git has different rename tracking which is why a-renamed/w disappears - it's still close enough" + +baseline same-rename-different-mode A-B A B "Git works for the A/B case, but for B/A it forgets to set the executable bit" +baseline same-rename-different-mode A-B-diff3 A B "Git works for the A/B case, but for B/A it forgets to set the executable bit" +baseline renamed-symlink-with-conflict A-B A B +baseline added-file-changed-content-and-mode A-B A B "We improve on executable bit handling, but loose on diff quality as we are definitely missing some tweaks" + +baseline type-change-and-renamed A-B A B +baseline change-and-delete A-B A B +baseline submodule-both-modify A-B A B "We can't handle submodules yet and just mark them as conflicting. This is planned to be improved." +baseline both-modify-union-attr A-B A B +baseline both-modify-union-attr A-B-diff3 A B +baseline both-modify-binary A-B A B +baseline both-modify-binary A-B A B +baseline both-modify-file-with-binary-attr A-B A B +baseline big-file-merge A-B A B "Git actually ignores core.bigFileThreshold during merging and tries a normal merge (or binary one) anyway. We don't ignore it and treat big files like binary files" \ + no-reverse +baseline no-merge-base A-B A B +baseline no-merge-base A-B-diff3 A B + +baseline multiple-merge-bases A-B A B +baseline multiple-merge-bases A-B-diff3 A B + +baseline rename-and-modification A-B A B +baseline symlink-modification A-B A B +baseline symlink-addition A-B A B +baseline type-change-to-symlink A-B A B diff --git a/gix-merge/tests/merge/blob/builtin_driver.rs b/gix-merge/tests/merge/blob/builtin_driver.rs index b0d7afa8f85..fd08c0f8140 100644 --- a/gix-merge/tests/merge/blob/builtin_driver.rs +++ b/gix-merge/tests/merge/blob/builtin_driver.rs @@ -25,7 +25,8 @@ fn binary() { mod text { use bstr::ByteSlice; - use gix_merge::blob::Resolution; + use gix_merge::blob::builtin_driver::text::Conflict; + use gix_merge::blob::{builtin_driver, Resolution}; use pretty_assertions::assert_str_eq; const DIVERGING: &[&str] = &[ @@ -71,60 +72,122 @@ mod text { "complex/spurious-c-conflicts/zdiff3-histogram.merged", ]; + /// Should be a copy of `DIVERGING` once the reverse operation truly works like before + const DIVERGING_REVERSED: &[&str] = &[ + // expected cases + "zdiff3-middlecommon/merge.merged-reversed", + "zdiff3-middlecommon/merge-union.merged-reversed", + "zdiff3-interesting/merge.merged-reversed", + "zdiff3-interesting/merge-theirs.merged-reversed", + "zdiff3-interesting/diff3.merged-reversed", + "zdiff3-interesting/diff3-histogram.merged-reversed", + "zdiff3-interesting/zdiff3.merged-reversed", + "zdiff3-interesting/zdiff3-histogram.merged-reversed", + "zdiff3-interesting/merge-union.merged-reversed", + "zdiff3-evil/merge.merged-reversed", + "zdiff3-evil/merge-union.merged-reversed", + "complex/missing-LF-at-EOF/merge.merged-reversed", + "complex/missing-LF-at-EOF/diff3.merged-reversed", + "complex/missing-LF-at-EOF/diff3-histogram.merged-reversed", + "complex/missing-LF-at-EOF/zdiff3.merged-reversed", + "complex/missing-LF-at-EOF/zdiff3-histogram.merged-reversed", + "complex/missing-LF-at-EOF/merge-ours.merged-reversed", + "complex/missing-LF-at-EOF/merge-theirs.merged-reversed", + "complex/missing-LF-at-EOF/merge-union.merged-reversed", + "complex/auto-simplification/merge.merged-reversed", + "complex/auto-simplification/merge-union.merged-reversed", + "complex/marker-newline-handling-lf2/zdiff3.merged-reversed", + "complex/marker-newline-handling-lf2/zdiff3-histogram.merged-reversed", + "complex/spurious-c-conflicts/merge.merged-reversed", + "complex/spurious-c-conflicts/merge-union.merged-reversed", + "complex/spurious-c-conflicts/diff3-histogram.merged-reversed", + "complex/spurious-c-conflicts/zdiff3-histogram.merged-reversed", + ]; + // TODO: fix all of these eventually - fn is_case_diverging(case: &baseline::Expectation) -> bool { - DIVERGING.iter().any(|name| case.name == *name) + fn is_case_diverging(case: &baseline::Expectation, diverging: &[&str]) -> bool { + diverging.iter().any(|name| case.name == *name) + } + + #[test] + fn fuzzed() { + for (ours, base, theirs, opts) in [ + ( + &[255, 10, 10, 255][..], + &[0, 10, 10, 13, 10, 193, 0, 51, 8, 33][..], + &[10, 255, 10, 10, 10, 0, 10][..], + builtin_driver::text::Options { + conflict: Conflict::ResolveWithUnion, + diff_algorithm: imara_diff::Algorithm::Myers, + }, + ), + ( + &[], + &[10, 255, 255, 255], + &[255, 10, 255, 10, 10, 255, 40], + builtin_driver::text::Options::default(), + ), + ] { + let mut out = Vec::new(); + let mut input = imara_diff::intern::InternedInput::default(); + gix_merge::blob::builtin_driver::text(&mut out, &mut input, Default::default(), ours, base, theirs, opts); + } } #[test] fn run_baseline() -> crate::Result { let root = gix_testtools::scripted_fixture_read_only("text-baseline.sh")?; - let cases = std::fs::read_to_string(root.join("baseline.cases"))?; - let mut out = Vec::new(); - let mut num_diverging = 0; - let mut num_cases = 0; - for case in baseline::Expectations::new(&root, &cases) { - num_cases += 1; - let mut input = imara_diff::intern::InternedInput::default(); - let actual = gix_merge::blob::builtin_driver::text( - &mut out, - &mut input, - case.labels(), - &case.ours, - &case.base, - &case.theirs, - case.options, - ); - if is_case_diverging(&case) { - num_diverging += 1; - } else { - let expected_resolution = if case.expected.contains_str("<<<<<<<") { - Resolution::Conflict - } else { - Resolution::Complete - }; - assert_eq!(out.as_bstr(), case.expected); - assert_str_eq!( - out.as_bstr().to_str_lossy(), - case.expected.to_str_lossy(), - "{}: output mismatch\n{}", - case.name, - out.as_bstr() + for (baseline, diverging, expected_percentage) in [ + ("baseline-reversed.cases", DIVERGING_REVERSED, 11), + ("baseline.cases", DIVERGING, 11), + ] { + let cases = std::fs::read_to_string(root.join(baseline))?; + let mut out = Vec::new(); + let mut num_diverging = 0; + let mut num_cases = 0; + for case in baseline::Expectations::new(&root, &cases) { + num_cases += 1; + let mut input = imara_diff::intern::InternedInput::default(); + let actual = gix_merge::blob::builtin_driver::text( + &mut out, + &mut input, + case.labels(), + &case.ours, + &case.base, + &case.theirs, + case.options, ); - assert_eq!(actual, expected_resolution, "{}: resolution mismatch", case.name,); + if is_case_diverging(&case, diverging) { + num_diverging += 1; + } else { + let expected_resolution = if case.expected.contains_str("<<<<<<<") { + Resolution::Conflict + } else { + Resolution::Complete + }; + assert_str_eq!( + out.as_bstr().to_str_lossy(), + case.expected.to_str_lossy(), + "{}: output mismatch\n{}", + case.name, + out.as_bstr() + ); + assert_eq!(out.as_bstr(), case.expected); + assert_eq!(actual, expected_resolution, "{}: resolution mismatch", case.name,); + } } - } - assert_eq!( - num_diverging, - DIVERGING.len(), - "Number of expected diverging cases must match the actual one - probably the implementation improved" - ); - assert_eq!( - ((num_diverging as f32 / num_cases as f32) * 100.0) as usize, - 11, - "Just to show the percentage of skipped tests - this should get better" - ); + assert_eq!( + num_diverging, + diverging.len(), + "Number of expected diverging cases must match the actual one - probably the implementation improved" + ); + assert_eq!( + ((num_diverging as f32 / num_cases as f32) * 100.0) as usize, + expected_percentage, + "Just to show the percentage of skipped tests - this should get better" + ); + } Ok(()) } @@ -185,15 +248,16 @@ mod text { let read = |rela_path: &str| read_blob(self.root, rela_path); let mut options = gix_merge::blob::builtin_driver::text::Options::default(); + let marker_size = 7.try_into().unwrap(); for arg in words { options.conflict = match arg { "--diff3" => Conflict::Keep { style: ConflictStyle::Diff3, - marker_size: 7, + marker_size, }, "--zdiff3" => Conflict::Keep { style: ConflictStyle::ZealousDiff3, - marker_size: 7, + marker_size, }, "--ours" => Conflict::ResolveWithOurs, "--theirs" => Conflict::ResolveWithTheirs, diff --git a/gix-merge/tests/merge/blob/platform.rs b/gix-merge/tests/merge/blob/platform.rs index d03a3ddc960..b9df80a8f21 100644 --- a/gix-merge/tests/merge/blob/platform.rs +++ b/gix-merge/tests/merge/blob/platform.rs @@ -5,12 +5,14 @@ use gix_merge::blob::Platform; mod merge { use crate::blob::platform::new_platform; use crate::blob::util::ObjectDb; + use crate::hex_to_id; use bstr::{BStr, ByteSlice}; use gix_merge::blob::builtin_driver::text::ConflictStyle; use gix_merge::blob::platform::builtin_merge::Pick; use gix_merge::blob::platform::DriverChoice; use gix_merge::blob::{builtin_driver, pipeline, platform, BuiltinDriver, Resolution, ResourceKind}; use gix_object::tree::EntryKind; + use std::convert::Infallible; use std::process::Stdio; #[test] @@ -46,7 +48,7 @@ mod merge { ); let mut buf = Vec::new(); - let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + let res = platform_ref.merge(&mut buf, default_labels(), &Default::default())?; assert_eq!( res, (Pick::Ours, Resolution::Conflict), @@ -54,7 +56,7 @@ mod merge { ); platform_ref.options.resolve_binary_with = Some(builtin_driver::binary::ResolveWith::Theirs); - let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + let res = platform_ref.merge(&mut buf, default_labels(), &Default::default())?; assert_eq!( res, (Pick::Theirs, Resolution::Complete), @@ -66,8 +68,9 @@ mod merge { #[test] fn builtin_with_conflict() -> crate::Result { let mut platform = new_platform(None, pipeline::Mode::ToGit); + let non_existing_ancestor_id = hex_to_id("ffffffffffffffffffffffffffffffffffffffff"); platform.set_resource( - gix_hash::Kind::Sha1.null(), + non_existing_ancestor_id, EntryKind::Blob, "b".into(), ResourceKind::CommonAncestorOrBase, @@ -86,7 +89,7 @@ mod merge { let mut platform_ref = platform.prepare_merge(&db, Default::default())?; assert_eq!(platform_ref.driver, DriverChoice::BuiltIn(BuiltinDriver::Text)); let mut buf = Vec::new(); - let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + let res = platform_ref.merge(&mut buf, default_labels(), &Default::default())?; assert_eq!(res, (Pick::Buffer, Resolution::Conflict)); assert_eq!( buf.as_bstr(), @@ -100,9 +103,9 @@ theirs ); platform_ref.options.text.conflict = builtin_driver::text::Conflict::Keep { style: ConflictStyle::Diff3, - marker_size: 3, + marker_size: 3.try_into().unwrap(), }; - let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + let res = platform_ref.merge(&mut buf, default_labels(), &Default::default())?; assert_eq!(res, (Pick::Buffer, Resolution::Conflict)); assert_eq!( @@ -119,7 +122,7 @@ theirs ); platform_ref.options.text.conflict = builtin_driver::text::Conflict::ResolveWithOurs; - let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + let res = platform_ref.merge(&mut buf, default_labels(), &Default::default())?; assert_eq!( res, (Pick::Buffer, Resolution::Complete), @@ -128,23 +131,23 @@ theirs assert_eq!(buf.as_bstr(), "ours"); platform_ref.options.text.conflict = builtin_driver::text::Conflict::ResolveWithTheirs; - let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + let res = platform_ref.merge(&mut buf, default_labels(), &Default::default())?; assert_eq!(res, (Pick::Buffer, Resolution::Complete)); assert_eq!(buf.as_bstr(), "theirs"); platform_ref.options.text.conflict = builtin_driver::text::Conflict::ResolveWithUnion; - let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + let res = platform_ref.merge(&mut buf, default_labels(), &Default::default())?; assert_eq!(res, (Pick::Buffer, Resolution::Complete)); assert_eq!(buf.as_bstr(), "ours\ntheirs"); platform_ref.driver = DriverChoice::BuiltIn(BuiltinDriver::Union); platform_ref.options.text.conflict = builtin_driver::text::Conflict::default(); - let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + let res = platform_ref.merge(&mut buf, default_labels(), &Default::default())?; assert_eq!(res, (Pick::Buffer, Resolution::Complete)); assert_eq!(buf.as_bstr(), "ours\ntheirs"); platform_ref.driver = DriverChoice::BuiltIn(BuiltinDriver::Binary); - let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + let res = platform_ref.merge(&mut buf, default_labels(), &Default::default())?; assert_eq!( res, (Pick::Ours, Resolution::Conflict), @@ -152,20 +155,56 @@ theirs ); assert!(buf.is_empty(), "it tells us where to get the content from"); assert_eq!( - platform_ref.buffer_by_pick(res.0).unwrap().as_bstr(), + platform_ref.buffer_by_pick(res.0).unwrap().unwrap().as_bstr(), "ours", "getting access to the content is simplified" ); + assert_eq!( + platform_ref + .id_by_pick(res.0, &buf, |_buf| -> Result<_, Infallible> { + panic!("no need to write buffer") + }) + .unwrap() + .unwrap(), + hex_to_id("424860eef4edb9f5a2dacbbd6dc8c2d2e7645035"), + "there is no need to write a buffer here, it just returns one of our inputs" + ); - for (expected, expected_pick, resolve) in [ - ("ours", Pick::Ours, builtin_driver::binary::ResolveWith::Ours), - ("theirs", Pick::Theirs, builtin_driver::binary::ResolveWith::Theirs), - ("b\n", Pick::Ancestor, builtin_driver::binary::ResolveWith::Ancestor), + for (expected, expected_pick, resolve, expected_id) in [ + ( + "ours", + Pick::Ours, + builtin_driver::binary::ResolveWith::Ours, + hex_to_id("424860eef4edb9f5a2dacbbd6dc8c2d2e7645035"), + ), + ( + "theirs", + Pick::Theirs, + builtin_driver::binary::ResolveWith::Theirs, + hex_to_id("228068dbe790983c15535164cd483eb77ade97e4"), + ), + ( + "b\n", + Pick::Ancestor, + builtin_driver::binary::ResolveWith::Ancestor, + non_existing_ancestor_id, + ), ] { platform_ref.options.resolve_binary_with = Some(resolve); - let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + let res = platform_ref.merge(&mut buf, default_labels(), &Default::default())?; assert_eq!(res, (expected_pick, Resolution::Complete)); - assert_eq!(platform_ref.buffer_by_pick(res.0).unwrap().as_bstr(), expected); + assert_eq!(platform_ref.buffer_by_pick(res.0).unwrap().unwrap().as_bstr(), expected); + + assert_eq!( + platform_ref + .id_by_pick(res.0, &buf, |_buf| -> Result<_, Infallible> { + panic!("no need to write buffer") + }) + .unwrap() + .unwrap(), + expected_id, + "{expected}: each input has an id, so it's just returned as is without handling buffers" + ); } Ok(()) @@ -202,7 +241,7 @@ theirs let platform_ref = platform.prepare_merge(&db, Default::default())?; let mut buf = Vec::new(); - let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + let res = platform_ref.merge(&mut buf, default_labels(), &Default::default())?; assert_eq!(res, (Pick::Buffer, Resolution::Complete), "merge drivers always merge "); let mut lines = cleaned_driver_lines(&buf)?; for tmp_file in lines.by_ref().take(3) { @@ -228,12 +267,24 @@ theirs let id = db.insert("binary\0"); platform.set_resource(id, EntryKind::Blob, "b".into(), ResourceKind::OtherOrTheirs, &db)?; let platform_ref = platform.prepare_merge(&db, Default::default())?; - let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + let res = platform_ref.merge(&mut buf, default_labels(), &Default::default())?; assert_eq!( res, (Pick::Buffer, Resolution::Complete), "merge drivers deal with binary themselves" ); + assert_eq!( + platform_ref.buffer_by_pick(res.0), + Ok(None), + "This indicates the buffer must be read" + ); + + let marker = hex_to_id("ffffffffffffffffffffffffffffffffffffffff"); + assert_eq!( + platform_ref.id_by_pick(res.0, &buf, |_buf| Ok::<_, Infallible>(marker)), + Ok(Some(marker)), + "the id is created by hashing the merge buffer" + ); let mut lines = cleaned_driver_lines(&buf)?; for tmp_file in lines.by_ref().take(3) { assert!(tmp_file.contains_str(&b".tmp"[..]), "{tmp_file}"); @@ -282,7 +333,7 @@ theirs let platform_ref = platform.prepare_merge(&gix_object::find::Never, Default::default())?; let mut buf = Vec::new(); - let res = platform_ref.merge(&mut buf, Default::default(), Default::default())?; + let res = platform_ref.merge(&mut buf, Default::default(), &Default::default())?; assert_eq!( res, (Pick::Buffer, Resolution::Complete), @@ -295,11 +346,7 @@ theirs let mut input = imara_diff::intern::InternedInput::new(&[][..], &[]); let res = platform_ref.builtin_merge(BuiltinDriver::Text, &mut buf, &mut input, Default::default()); - assert_eq!( - res, - Some((Pick::Buffer, Resolution::Complete)), - "both versions are deleted" - ); + assert_eq!(res, (Pick::Buffer, Resolution::Complete), "both versions are deleted"); assert!(buf.is_empty(), "the result is the same on direct invocation"); let print_all = "for arg in $@ %O %A %B %L %P %S %X %Y %F; do echo $arg; done"; @@ -358,24 +405,27 @@ theirs assert_eq!(platform_ref.other.data, platform::resource::Data::TooLarge { size: 12 }); let mut out = Vec::new(); - let err = platform_ref - .merge(&mut out, Default::default(), Default::default()) - .unwrap_err(); - assert!(matches!(err, platform::merge::Error::ResourceTooLarge)); + let res = platform_ref.merge(&mut out, Default::default(), &Default::default())?; + assert_eq!( + res, + (Pick::Ours, Resolution::Conflict), + "this is the default for binary merges, which are used in this case" + ); let mut input = imara_diff::intern::InternedInput::new(&[][..], &[]); assert_eq!( platform_ref.builtin_merge(BuiltinDriver::Text, &mut out, &mut input, Default::default(),), - None + res, + "we can't enforce it, it will just default to using binary" ); let err = platform_ref .prepare_external_driver("bogus".into(), Default::default(), Default::default()) .unwrap_err(); - assert!(matches!( - err, - platform::prepare_external_driver::Error::ResourceTooLarge { .. } - )); + assert!( + matches!(err, platform::prepare_external_driver::Error::ResourceTooLarge { .. }), + "however, for external drivers, resources can still be too much to handle, until we learn how to stream them" + ); Ok(()) } @@ -464,7 +514,7 @@ mod prepare_merge { platform.set_resource( gix_hash::Kind::Sha1.null(), EntryKind::Blob, - "just-set".into(), + "ancestor does not matter for attributes".into(), ResourceKind::CommonAncestorOrBase, &gix_object::find::Never, )?; @@ -472,7 +522,7 @@ mod prepare_merge { platform.set_resource( gix_hash::Kind::Sha1.null(), EntryKind::Blob, - "does not matter for driver".into(), + "just-set".into(), ResourceKind::CurrentOrOurs, &gix_object::find::Never, )?; @@ -490,6 +540,11 @@ mod prepare_merge { DriverChoice::BuiltIn(BuiltinDriver::Text), "`merge` attribute means text" ); + assert_eq!( + prepared.options.text.conflict.marker_size(), + Some(32), + "marker sizes are picked up from attributes as well" + ); platform.set_resource( gix_hash::Kind::Sha1.null(), diff --git a/gix-merge/tests/merge/main.rs b/gix-merge/tests/merge/main.rs index 9f7a6989d2c..d1887c1c705 100644 --- a/gix-merge/tests/merge/main.rs +++ b/gix-merge/tests/merge/main.rs @@ -1,6 +1,11 @@ +use gix_hash::ObjectId; extern crate core; -#[cfg(feature = "blob")] mod blob; +mod tree; pub use gix_testtools::Result; + +fn hex_to_id(hex: &str) -> ObjectId { + ObjectId::from_hex(hex.as_bytes()).expect("40 bytes hex") +} diff --git a/gix-merge/tests/merge/tree/baseline.rs b/gix-merge/tests/merge/tree/baseline.rs new file mode 100644 index 00000000000..18ba8aa0f06 --- /dev/null +++ b/gix-merge/tests/merge/tree/baseline.rs @@ -0,0 +1,344 @@ +use bstr::{BStr, ByteSlice}; +use gix_hash::ObjectId; +use gix_merge::blob::builtin_driver::text::ConflictStyle; +use gix_object::tree::EntryMode; +use gix_object::FindExt; +use std::path::{Path, PathBuf}; + +/// An entry in the conflict +#[derive(Debug)] +pub struct Entry { + /// The relative path in the repository + pub location: String, + /// The content id. + pub id: gix_hash::ObjectId, + /// The kind of entry. + pub mode: EntryMode, +} + +/// Keep track of all the sides of a conflict. Some might not be set to indicate removal, including the ancestor. +#[derive(Default, Debug)] +pub struct Conflict { + pub ancestor: Option, + pub ours: Option, + pub theirs: Option, +} + +#[derive(Debug)] +pub enum ConflictKind { + /// The conflict was resolved by automatically merging the content. + AutoMerging, + /// The content could not be resolved so it's conflicting. + ConflictContents, + /// Directory in theirs in the way of our file. + ConflictDirectoryBlocksFile, + /// Modified in ours but deleted in theirs. + ConflictModifyDelete, + /// Modified in ours but parent directory renamed in theirs. + DirectoryRenamedWithModificationInside, + /// Added files differ in mode. + DistinctModes, + /// The same file was renamed to different destinations. + RenameRename, + /// Deleted in ours with a new file added, renamed to new file in theirs with original content. + RenameAddDelete, + /// Two binary files were changed in different ways, which can never be merged (without a merge-driver) + Binary, +} + +/// More loosely structured information about the `Conflict`. +#[derive(Debug)] +pub struct ConflictInfo { + /// All the paths involved in the informational message + pub paths: Vec, + /// The type of the conflict, further described in `message`. + pub kind: ConflictKind, + /// An arbitrary message formed from paths and kind + pub message: String, +} + +impl Conflict { + fn any_location(&self) -> Option<&str> { + self.ancestor + .as_ref() + .or(self.ours.as_ref()) + .or(self.theirs.as_ref()) + .map(|a| a.location.as_str()) + } + fn storage_for(&mut self, side: Side, location: &str) -> Option<&mut Option> { + let current_location = self.any_location(); + let location_is_same = current_location.is_none() || current_location == Some(location); + let side = match side { + Side::Ancestor => &mut self.ancestor, + Side::Ours => &mut self.ours, + Side::Theirs => &mut self.theirs, + }; + (!side.is_some() && location_is_same).then_some(side) + } +} + +pub struct MergeInfo { + /// The hash of the merged tree - it may contain intermediate files if the merge didn't succeed entirely. + pub merged_tree: gix_hash::ObjectId, + /// If there were conflicts, this is the conflicting paths. + pub conflicts: Option>, + /// Structured details which to some extent can be compared to our own conflict information. + pub information: Vec, +} + +pub struct Expectation { + pub root: PathBuf, + pub conflict_style: gix_merge::blob::builtin_driver::text::ConflictStyle, + pub odb: gix_odb::memory::Proxy, + pub our_commit_id: gix_hash::ObjectId, + pub our_side_name: String, + pub their_commit_id: gix_hash::ObjectId, + pub their_side_name: String, + pub merge_info: MergeInfo, + pub case_name: String, + pub deviation: Option, +} + +/// Git doesn't provide the same result. +pub struct Deviation { + /// Tells us the reason for expecting a difference compared to the Git result. + pub message: String, + /// The tree we wish to see, it's hand-crafted directly in the test as Git can't provide the baseline here. + pub expected_tree_id: gix_hash::ObjectId, +} + +pub struct Expectations<'a> { + root: &'a Path, + lines: std::str::Lines<'a>, +} + +impl<'a> Expectations<'a> { + pub fn new(root: &'a Path, cases: &'a str) -> Self { + Expectations { + root, + lines: cases.lines(), + } + } +} + +impl Iterator for Expectations<'_> { + type Item = Expectation; + + fn next(&mut self) -> Option { + let line = self.lines.next()?; + let mut tokens = line.split(' '); + let ( + Some(subdir), + Some(conflict_style), + Some(our_commit_id), + Some(our_side_name), + Some(their_commit_id), + Some(their_side_name), + Some(merge_info_filename), + Some(expected_custom_tree), + ) = ( + tokens.next(), + tokens.next(), + tokens.next(), + tokens.next(), + tokens.next(), + tokens.next(), + tokens.next(), + tokens.next(), + ) + else { + unreachable!("invalid line: {line:?}") + }; + let deviation = (expected_custom_tree != "expected^{tree}").then(|| { + let expected_tree_id = gix_hash::ObjectId::from_hex(expected_custom_tree.as_bytes()) + .expect("valid tree id in hex for the expected tree"); + let message = tokens.collect::>().join(" ").trim().to_owned(); + Deviation { + message, + expected_tree_id, + } + }); + + let subdir_path = self.root.join(subdir); + let conflict_style = match conflict_style { + "merge" => ConflictStyle::Merge, + "diff3" => ConflictStyle::Diff3, + unknown => unreachable!("Unknown conflict style: '{unknown}'"), + }; + let odb = gix_odb::at(subdir_path.join(".git/objects")).expect("object dir exists"); + let objects = gix_odb::memory::Proxy::new(odb, gix_hash::Kind::Sha1); + let our_commit_id = gix_hash::ObjectId::from_hex(our_commit_id.as_bytes()).unwrap(); + let their_commit_id = gix_hash::ObjectId::from_hex(their_commit_id.as_bytes()).unwrap(); + let merge_info = parse_merge_info(std::fs::read_to_string(subdir_path.join(merge_info_filename)).unwrap()); + Some(Expectation { + root: subdir_path, + conflict_style, + odb: objects, + our_commit_id, + our_side_name: our_side_name.to_owned(), + their_commit_id, + their_side_name: their_side_name.to_owned(), + merge_info, + case_name: format!( + "{subdir}-{}", + merge_info_filename + .split('.') + .next() + .expect("extension after single dot") + ), + deviation, + }) + } +} + +fn parse_merge_info(content: String) -> MergeInfo { + let mut lines = content.split('\0').filter(|t| !t.is_empty()).peekable(); + let tree_id = gix_hash::ObjectId::from_hex(lines.next().unwrap().as_bytes()).unwrap(); + let mut out = MergeInfo { + merged_tree: tree_id, + conflicts: None, + information: Vec::new(), + }; + + let mut conflicts = Vec::new(); + let mut conflict = Conflict::default(); + while let Some(line) = lines.peek() { + let (entry, side) = match parse_conflict_file_info(line) { + Some(t) => t, + None => break, + }; + lines.next(); + let field = match conflict.storage_for(side, &entry.location) { + None => { + conflicts.push(conflict); + conflict = Conflict::default(); + conflict + .storage_for(side, &entry.location) + .expect("always available for new side") + } + Some(field) => field, + }; + *field = Some(entry); + } + + while lines.peek().is_some() { + out.information + .push(parse_info(&mut lines).expect("if there are lines, it should be valid info")); + } + assert_eq!(lines.next(), None, "TODO: conflict messages"); + out.conflicts = (!conflicts.is_empty()).then_some(conflicts); + out +} + +#[derive(Copy, Clone)] +enum Side { + Ancestor, + Ours, + Theirs, +} + +fn parse_conflict_file_info(line: &str) -> Option<(Entry, Side)> { + let (info, mut path) = line.split_at(line.find('\t')?); + path = &path[1..]; + let mut tokens = info.split(' '); + let (oct_mode, hex_id, stage) = ( + tokens.next().expect("mode"), + tokens.next().expect("id"), + tokens.next().expect("stage"), + ); + assert_eq!( + tokens.next(), + None, + "info line not understood, expected three fields only" + ); + Some(( + Entry { + location: path.to_owned(), + id: gix_hash::ObjectId::from_hex(hex_id.as_bytes()).unwrap(), + mode: EntryMode(gix_utils::btoi::to_signed_with_radix::(oct_mode.as_bytes(), 8).unwrap() as u16), + }, + match stage { + "1" => Side::Ancestor, + "2" => Side::Ours, + "3" => Side::Theirs, + invalid => panic!("{invalid} is an unexpected side"), + }, + )) +} + +fn parse_info<'a>(mut lines: impl Iterator) -> Option { + let num_paths: usize = lines.next()?.parse().ok()?; + let paths: Vec<_> = lines.by_ref().take(num_paths).map(ToOwned::to_owned).collect(); + let kind = match lines.next()? { + "Auto-merging" => ConflictKind::AutoMerging, + "CONFLICT (contents)" => ConflictKind::ConflictContents, + "CONFLICT (file/directory)" => ConflictKind::ConflictDirectoryBlocksFile, + "CONFLICT (modify/delete)" => ConflictKind::ConflictModifyDelete, + "CONFLICT (directory rename suggested)" => ConflictKind::DirectoryRenamedWithModificationInside, + "CONFLICT (distinct modes)" => ConflictKind::DistinctModes, + "CONFLICT (rename/rename)" => ConflictKind::RenameRename, + "CONFLICT (rename/delete)" => ConflictKind::RenameAddDelete, + "CONFLICT (binary)" => ConflictKind::Binary, + conflict_type => panic!("Unkonwn conflict type: {conflict_type}"), + }; + let message = lines.next()?.to_owned(); + Some(ConflictInfo { paths, kind, message }) +} + +pub fn visualize_tree( + id: &gix_hash::oid, + odb: &impl gix_object::Find, + name_and_mode: Option<(&BStr, EntryMode)>, +) -> termtree::Tree { + fn short_id(id: &gix_hash::oid) -> String { + id.to_string()[..7].to_string() + } + let entry_name = |id: &gix_hash::oid, name: Option<(&BStr, EntryMode)>| -> String { + let mut buf = Vec::new(); + match name { + None => short_id(id), + Some((name, mode)) => { + format!( + "{name}:{mode}{} {}", + short_id(id), + match odb.find_blob(id, &mut buf) { + Ok(blob) => format!("{:?}", blob.data.as_bstr()), + Err(_) => "".into(), + }, + mode = if mode.is_tree() { + "".into() + } else { + format!("{:o}:", mode.0) + } + ) + } + } + }; + + let mut tree = termtree::Tree::new(entry_name(id, name_and_mode)); + let mut buf = Vec::new(); + for entry in odb.find_tree(id, &mut buf).unwrap().entries { + if entry.mode.is_tree() { + tree.push(visualize_tree(entry.oid, odb, Some((entry.filename, entry.mode)))); + } else { + tree.push(entry_name(entry.oid, Some((entry.filename, entry.mode)))); + } + } + tree +} + +pub fn show_diff_and_fail( + case_name: &str, + actual_id: ObjectId, + actual: &gix_merge::tree::Outcome<'_>, + expected: &MergeInfo, + odb: &gix_odb::memory::Proxy, +) { + pretty_assertions::assert_str_eq!( + visualize_tree(&actual_id, odb, None).to_string(), + visualize_tree(&expected.merged_tree, odb, None).to_string(), + "{case_name}: merged tree mismatch\n{:#?}\n{:#?}\n{case_name}", + actual.conflicts, + expected.information + ); +} diff --git a/gix-merge/tests/merge/tree/mod.rs b/gix-merge/tests/merge/tree/mod.rs new file mode 100644 index 00000000000..833166bc61a --- /dev/null +++ b/gix-merge/tests/merge/tree/mod.rs @@ -0,0 +1,180 @@ +use crate::tree::baseline::Deviation; +use gix_diff::Rewrites; +use gix_merge::commit::Options; +use gix_object::Write; +use gix_worktree::stack::state::attributes; +use std::path::Path; + +/// ### How to add a new baseline test +/// +/// 1. Add it to the `tree_baseline.sh` script and don't forget to call the +/// `baseline` function there with the respective parameters. +/// 2. Run all tests - maybe it works, if so, jump to the last point. +/// 3. Change `let new_test = None` to `… = Some("case-name")` to focus on the +/// newly added test and its reversed version. +/// 4. Make it work, then set the `let new_test = Some(…)` back to `… = None`. +/// 5. Validate that all tests are still working, and adjust the expected number of cases +/// in the assertion that would then fail. +#[test] +fn run_baseline() -> crate::Result { + let root = gix_testtools::scripted_fixture_read_only("tree-baseline.sh")?; + let cases = std::fs::read_to_string(root.join("baseline.cases"))?; + let mut actual_cases = 0; + // let new_test = Some("simple-fast-forward"); + let new_test = None; + for baseline::Expectation { + root, + conflict_style, + odb, + our_commit_id, + our_side_name, + their_commit_id, + their_side_name, + merge_info, + case_name, + deviation, + } in baseline::Expectations::new(&root, &cases) + .filter(|case| new_test.map_or(true, |prefix: &str| case.case_name.starts_with(prefix))) + { + actual_cases += 1; + let mut graph = gix_revwalk::Graph::new(&odb, None); + let large_file_threshold_bytes = 100; + let mut blob_merge = new_blob_merge_platform(&root, large_file_threshold_bytes); + let mut diff_resource_cache = new_diff_resource_cache(&root); + let mut options = basic_merge_options(); + options.tree_merge.blob_merge.text.conflict = gix_merge::blob::builtin_driver::text::Conflict::Keep { + style: conflict_style, + marker_size: gix_merge::blob::builtin_driver::text::Conflict::DEFAULT_MARKER_SIZE + .try_into() + .expect("non-zero"), + }; + + let mut actual = gix_merge::commit( + our_commit_id, + their_commit_id, + gix_merge::blob::builtin_driver::text::Labels { + ancestor: None, + current: Some(our_side_name.as_str().into()), + other: Some(their_side_name.as_str().into()), + }, + &mut graph, + &mut diff_resource_cache, + &mut blob_merge, + &odb, + &mut |id| id.to_hex_with_len(7).to_string(), + options, + )? + .tree_merge; + + let actual_id = actual.tree.write(|tree| odb.write(tree))?; + match deviation { + None => { + if actual_id != merge_info.merged_tree { + baseline::show_diff_and_fail(&case_name, actual_id, &actual, &merge_info, &odb); + } + } + Some(Deviation { + message, + expected_tree_id, + }) => { + // Sometimes only the reversed part of a specific test is different. + if case_name != "same-rename-different-mode-A-B" && case_name != "same-rename-different-mode-A-B-diff3" + { + assert_ne!( + actual_id, merge_info.merged_tree, + "{case_name}: Git caught up - adjust expectation {message}" + ); + } else { + assert_eq!( + actual_id, merge_info.merged_tree, + "{case_name}: Git should match here, it just doesn't match in one of two cases" + ); + } + pretty_assertions::assert_str_eq!( + baseline::visualize_tree(&actual_id, &odb, None).to_string(), + baseline::visualize_tree(&expected_tree_id, &odb, None).to_string(), + "{case_name}: tree mismatch: {message} \n{:#?}\n{case_name}", + actual.conflicts + ); + } + } + } + + assert_eq!( + actual_cases, 105, + "BUG: update this number, and don't forget to remove a filter in the end" + ); + + Ok(()) +} + +fn basic_merge_options() -> Options { + gix_merge::commit::Options { + allow_missing_merge_base: true, + use_first_merge_base: false, + tree_merge: gix_merge::tree::Options { + symlink_conflicts: None, + rewrites: Some(Rewrites { + copies: None, + percentage: Some(0.5), + limit: 0, + }), + blob_merge: gix_merge::blob::platform::merge::Options::default(), + blob_merge_command_ctx: Default::default(), + fail_on_conflict: None, + marker_size_multiplier: 0, + allow_lossy_resolution: false, + }, + } +} + +fn new_diff_resource_cache(root: &Path) -> gix_diff::blob::Platform { + gix_diff::blob::Platform::new( + Default::default(), + gix_diff::blob::Pipeline::new(Default::default(), Default::default(), Vec::new(), Default::default()), + Default::default(), + gix_worktree::Stack::new( + root, + gix_worktree::stack::State::AttributesStack(gix_worktree::stack::state::Attributes::default()), + Default::default(), + Vec::new(), + Vec::new(), + ), + ) +} + +fn new_blob_merge_platform( + root: &Path, + large_file_threshold_bytes: impl Into>, +) -> gix_merge::blob::Platform { + let attributes = gix_worktree::Stack::new( + root, + gix_worktree::stack::State::AttributesStack(gix_worktree::stack::state::Attributes::new( + Default::default(), + None, + attributes::Source::WorktreeThenIdMapping, + Default::default(), + )), + gix_worktree::glob::pattern::Case::Sensitive, + Vec::new(), + Vec::new(), + ); + let filter = gix_merge::blob::Pipeline::new( + Default::default(), + gix_filter::Pipeline::default(), + gix_merge::blob::pipeline::Options { + large_file_threshold_bytes: large_file_threshold_bytes.into().unwrap_or_default(), + }, + ); + gix_merge::blob::Platform::new( + filter, + gix_merge::blob::pipeline::Mode::ToGit, + attributes, + vec![], + Default::default(), + ) +} + +// TODO: make sure everything is read eventually, even if only to improve debug messages in case of failure. +#[allow(dead_code)] +mod baseline; diff --git a/gix-object/src/lib.rs b/gix-object/src/lib.rs index 0ef56d52954..8c94f9e6bf9 100644 --- a/gix-object/src/lib.rs +++ b/gix-object/src/lib.rs @@ -33,8 +33,14 @@ pub mod data; /// pub mod find; +/// +pub mod write { + /// The error type returned by the [`Write`](crate::Write) trait. + pub type Error = Box; +} + mod traits; -pub use traits::{Exists, Find, FindExt, FindObjectOrHeader, Header as FindHeader, HeaderExt, WriteTo}; +pub use traits::{Exists, Find, FindExt, FindObjectOrHeader, Header as FindHeader, HeaderExt, Write, WriteTo}; pub mod encode; pub(crate) mod parse; diff --git a/gix-object/src/traits.rs b/gix-object/src/traits.rs deleted file mode 100644 index b773a5cc8bf..00000000000 --- a/gix-object/src/traits.rs +++ /dev/null @@ -1,303 +0,0 @@ -use std::io::Write; - -use crate::Kind; - -/// Writing of objects to a `Write` implementation -pub trait WriteTo { - /// Write a representation of this instance to `out`. - fn write_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()>; - - /// Returns the type of this object. - fn kind(&self) -> Kind; - - /// Returns the size of this object's representation (the amount - /// of data which would be written by [`write_to`](Self::write_to)). - /// - /// [`size`](Self::size)'s value has no bearing on the validity of - /// the object, as such it's possible for [`size`](Self::size) to - /// return a sensible value but [`write_to`](Self::write_to) to - /// fail because the object was not actually valid in some way. - fn size(&self) -> u64; - - /// Returns a loose object header based on the object's data - fn loose_header(&self) -> smallvec::SmallVec<[u8; 28]> { - crate::encode::loose_header(self.kind(), self.size()) - } -} - -impl WriteTo for &T -where - T: WriteTo, -{ - fn write_to(&self, out: &mut dyn Write) -> std::io::Result<()> { - ::write_to(self, out) - } - - fn kind(&self) -> Kind { - ::kind(self) - } - - fn size(&self) -> u64 { - ::size(self) - } -} - -mod find { - use crate::find; - - /// Check if an object is present in an object store. - pub trait Exists { - /// Returns `true` if the object exists in the database. - fn exists(&self, id: &gix_hash::oid) -> bool; - } - - /// Find an object in the object store. - /// - /// ## Notes - /// - /// Find effectively needs [generic associated types][issue] to allow a trait for the returned object type. - /// Until then, we will have to make due with explicit types and give them the potentially added features we want. - /// - /// [issue]: https://github.com/rust-lang/rust/issues/44265 - pub trait Find { - /// Find an object matching `id` in the database while placing its raw, possibly encoded data into `buffer`. - /// - /// Returns `Some` object if it was present in the database, or the error that occurred during lookup or object - /// retrieval. - fn try_find<'a>( - &self, - id: &gix_hash::oid, - buffer: &'a mut Vec, - ) -> Result>, find::Error>; - } - - /// Find the header of an object in the object store. - pub trait Header { - /// Find the header of the object matching `id` in the database. - /// - /// Returns `Some` header if it was present, or the error that occurred during lookup. - fn try_header(&self, id: &gix_hash::oid) -> Result, find::Error>; - } - - /// A combination of [`Find`] and [`Header`] traits to help with `dyn` trait objects. - pub trait FindObjectOrHeader: Find + Header {} - - mod _impls { - use std::{ops::Deref, rc::Rc, sync::Arc}; - - use gix_hash::oid; - - use crate::Data; - - impl crate::Exists for &T - where - T: crate::Exists, - { - fn exists(&self, id: &oid) -> bool { - (*self).exists(id) - } - } - - impl crate::FindObjectOrHeader for T where T: crate::Find + crate::FindHeader {} - - impl crate::Find for &T - where - T: crate::Find, - { - fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec) -> Result>, crate::find::Error> { - (*self).try_find(id, buffer) - } - } - - impl crate::FindHeader for &T - where - T: crate::FindHeader, - { - fn try_header(&self, id: &gix_hash::oid) -> Result, crate::find::Error> { - (*self).try_header(id) - } - } - - impl crate::Exists for Box - where - T: crate::Exists, - { - fn exists(&self, id: &oid) -> bool { - self.deref().exists(id) - } - } - - impl crate::Exists for Rc - where - T: crate::Exists, - { - fn exists(&self, id: &oid) -> bool { - self.deref().exists(id) - } - } - - impl crate::Find for Rc - where - T: crate::Find, - { - fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec) -> Result>, crate::find::Error> { - self.deref().try_find(id, buffer) - } - } - - impl crate::FindHeader for Rc - where - T: crate::FindHeader, - { - fn try_header(&self, id: &gix_hash::oid) -> Result, crate::find::Error> { - self.deref().try_header(id) - } - } - - impl crate::Find for Box - where - T: crate::Find, - { - fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec) -> Result>, crate::find::Error> { - self.deref().try_find(id, buffer) - } - } - - impl crate::FindHeader for Box - where - T: crate::FindHeader, - { - fn try_header(&self, id: &gix_hash::oid) -> Result, crate::find::Error> { - self.deref().try_header(id) - } - } - - impl crate::Exists for Arc - where - T: crate::Exists, - { - fn exists(&self, id: &oid) -> bool { - self.deref().exists(id) - } - } - - impl crate::Find for Arc - where - T: crate::Find, - { - fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec) -> Result>, crate::find::Error> { - self.deref().try_find(id, buffer) - } - } - - impl crate::FindHeader for Arc - where - T: crate::FindHeader, - { - fn try_header(&self, id: &gix_hash::oid) -> Result, crate::find::Error> { - self.deref().try_header(id) - } - } - } - - mod ext { - use crate::{ - find, BlobRef, CommitRef, CommitRefIter, Kind, ObjectRef, TagRef, TagRefIter, TreeRef, TreeRefIter, - }; - - macro_rules! make_obj_lookup { - ($method:ident, $object_variant:path, $object_kind:path, $object_type:ty) => { - /// Like [`find(…)`][Self::find()], but flattens the `Result>` into a single `Result` making a non-existing object an error - /// while returning the desired object type. - fn $method<'a>( - &self, - id: &gix_hash::oid, - buffer: &'a mut Vec, - ) -> Result<$object_type, find::existing_object::Error> { - self.try_find(id, buffer) - .map_err(find::existing_object::Error::Find)? - .ok_or_else(|| find::existing_object::Error::NotFound { - oid: id.as_ref().to_owned(), - }) - .and_then(|o| { - o.decode() - .map_err(|err| find::existing_object::Error::Decode { - source: err, - oid: id.as_ref().to_owned(), - }) - }) - .and_then(|o| match o { - $object_variant(o) => return Ok(o), - o => Err(find::existing_object::Error::ObjectKind { - oid: id.as_ref().to_owned(), - actual: o.kind(), - expected: $object_kind, - }), - }) - } - }; - } - - macro_rules! make_iter_lookup { - ($method:ident, $object_kind:path, $object_type:ty, $into_iter:tt) => { - /// Like [`find(…)`][Self::find()], but flattens the `Result>` into a single `Result` making a non-existing object an error - /// while returning the desired iterator type. - fn $method<'a>( - &self, - id: &gix_hash::oid, - buffer: &'a mut Vec, - ) -> Result<$object_type, find::existing_iter::Error> { - self.try_find(id, buffer) - .map_err(find::existing_iter::Error::Find)? - .ok_or_else(|| find::existing_iter::Error::NotFound { - oid: id.as_ref().to_owned(), - }) - .and_then(|o| { - o.$into_iter() - .ok_or_else(|| find::existing_iter::Error::ObjectKind { - oid: id.as_ref().to_owned(), - actual: o.kind, - expected: $object_kind, - }) - }) - } - }; - } - - /// An extension trait with convenience functions. - pub trait HeaderExt: super::Header { - /// Like [`try_header(…)`](super::Header::try_header()), but flattens the `Result>` into a single `Result` making a non-existing header an error. - fn header(&self, id: &gix_hash::oid) -> Result { - self.try_header(id) - .map_err(find::existing::Error::Find)? - .ok_or_else(|| find::existing::Error::NotFound { oid: id.to_owned() }) - } - } - - /// An extension trait with convenience functions. - pub trait FindExt: super::Find { - /// Like [`try_find(…)`](super::Find::try_find()), but flattens the `Result>` into a single `Result` making a non-existing object an error. - fn find<'a>( - &self, - id: &gix_hash::oid, - buffer: &'a mut Vec, - ) -> Result, find::existing::Error> { - self.try_find(id, buffer) - .map_err(find::existing::Error::Find)? - .ok_or_else(|| find::existing::Error::NotFound { oid: id.to_owned() }) - } - - make_obj_lookup!(find_commit, ObjectRef::Commit, Kind::Commit, CommitRef<'a>); - make_obj_lookup!(find_tree, ObjectRef::Tree, Kind::Tree, TreeRef<'a>); - make_obj_lookup!(find_tag, ObjectRef::Tag, Kind::Tag, TagRef<'a>); - make_obj_lookup!(find_blob, ObjectRef::Blob, Kind::Blob, BlobRef<'a>); - make_iter_lookup!(find_commit_iter, Kind::Commit, CommitRefIter<'a>, try_into_commit_iter); - make_iter_lookup!(find_tree_iter, Kind::Tree, TreeRefIter<'a>, try_into_tree_iter); - make_iter_lookup!(find_tag_iter, Kind::Tag, TagRefIter<'a>, try_into_tag_iter); - } - - impl FindExt for T {} - } - pub use ext::{FindExt, HeaderExt}; -} -pub use find::*; diff --git a/gix-object/src/traits/_impls.rs b/gix-object/src/traits/_impls.rs new file mode 100644 index 00000000000..5a1347a3430 --- /dev/null +++ b/gix-object/src/traits/_impls.rs @@ -0,0 +1,74 @@ +use crate::{Kind, WriteTo}; +use gix_hash::ObjectId; +use std::io::Read; +use std::ops::Deref; +use std::rc::Rc; +use std::sync::Arc; + +impl crate::Write for &T +where + T: crate::Write, +{ + fn write(&self, object: &dyn WriteTo) -> Result { + (*self).write(object) + } + + fn write_buf(&self, object: Kind, from: &[u8]) -> Result { + (*self).write_buf(object, from) + } + + fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { + (*self).write_stream(kind, size, from) + } +} + +impl crate::Write for Arc +where + T: crate::Write, +{ + fn write(&self, object: &dyn WriteTo) -> Result { + self.deref().write(object) + } + + fn write_buf(&self, object: Kind, from: &[u8]) -> Result { + self.deref().write_buf(object, from) + } + + fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { + self.deref().write_stream(kind, size, from) + } +} + +impl crate::Write for Rc +where + T: crate::Write, +{ + fn write(&self, object: &dyn WriteTo) -> Result { + self.deref().write(object) + } + + fn write_buf(&self, object: Kind, from: &[u8]) -> Result { + self.deref().write_buf(object, from) + } + + fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { + self.deref().write_stream(kind, size, from) + } +} + +impl WriteTo for &T +where + T: WriteTo, +{ + fn write_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()> { + ::write_to(self, out) + } + + fn kind(&self) -> Kind { + ::kind(self) + } + + fn size(&self) -> u64 { + ::size(self) + } +} diff --git a/gix-object/src/traits/find.rs b/gix-object/src/traits/find.rs new file mode 100644 index 00000000000..8bedbd960a7 --- /dev/null +++ b/gix-object/src/traits/find.rs @@ -0,0 +1,310 @@ +use crate::find; + +/// Check if an object is present in an object store. +pub trait Exists { + /// Returns `true` if the object exists in the database. + fn exists(&self, id: &gix_hash::oid) -> bool; +} + +/// Find an object in the object store. +/// +/// ## Notes +/// +/// Find effectively needs [generic associated types][issue] to allow a trait for the returned object type. +/// Until then, we will have to make due with explicit types and give them the potentially added features we want. +/// +/// [issue]: https://github.com/rust-lang/rust/issues/44265 +pub trait Find { + /// Find an object matching `id` in the database while placing its raw, possibly encoded data into `buffer`. + /// + /// Returns `Some` object if it was present in the database, or the error that occurred during lookup or object + /// retrieval. + fn try_find<'a>(&self, id: &gix_hash::oid, buffer: &'a mut Vec) + -> Result>, find::Error>; +} + +/// Find the header of an object in the object store. +pub trait Header { + /// Find the header of the object matching `id` in the database. + /// + /// Returns `Some` header if it was present, or the error that occurred during lookup. + fn try_header(&self, id: &gix_hash::oid) -> Result, find::Error>; +} + +/// A combination of [`Find`] and [`Header`] traits to help with `dyn` trait objects. +pub trait FindObjectOrHeader: Find + Header {} + +mod _impls { + use std::{ops::Deref, rc::Rc, sync::Arc}; + + use gix_hash::oid; + + use crate::Data; + + impl crate::Exists for &T + where + T: crate::Exists, + { + fn exists(&self, id: &oid) -> bool { + (*self).exists(id) + } + } + + impl crate::FindObjectOrHeader for T where T: crate::Find + crate::FindHeader {} + + impl crate::Find for &T + where + T: crate::Find, + { + fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec) -> Result>, crate::find::Error> { + (*self).try_find(id, buffer) + } + } + + impl crate::FindHeader for &T + where + T: crate::FindHeader, + { + fn try_header(&self, id: &gix_hash::oid) -> Result, crate::find::Error> { + (*self).try_header(id) + } + } + + impl crate::Exists for Box + where + T: crate::Exists, + { + fn exists(&self, id: &oid) -> bool { + self.deref().exists(id) + } + } + + impl crate::Exists for Rc + where + T: crate::Exists, + { + fn exists(&self, id: &oid) -> bool { + self.deref().exists(id) + } + } + + impl crate::Find for Rc + where + T: crate::Find, + { + fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec) -> Result>, crate::find::Error> { + self.deref().try_find(id, buffer) + } + } + + impl crate::FindHeader for Rc + where + T: crate::FindHeader, + { + fn try_header(&self, id: &gix_hash::oid) -> Result, crate::find::Error> { + self.deref().try_header(id) + } + } + + impl crate::Find for Box + where + T: crate::Find, + { + fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec) -> Result>, crate::find::Error> { + self.deref().try_find(id, buffer) + } + } + + impl crate::FindHeader for Box + where + T: crate::FindHeader, + { + fn try_header(&self, id: &gix_hash::oid) -> Result, crate::find::Error> { + self.deref().try_header(id) + } + } + + impl crate::Exists for Arc + where + T: crate::Exists, + { + fn exists(&self, id: &oid) -> bool { + self.deref().exists(id) + } + } + + impl crate::Find for Arc + where + T: crate::Find, + { + fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec) -> Result>, crate::find::Error> { + self.deref().try_find(id, buffer) + } + } + + impl crate::FindHeader for Arc + where + T: crate::FindHeader, + { + fn try_header(&self, id: &gix_hash::oid) -> Result, crate::find::Error> { + self.deref().try_header(id) + } + } +} + +mod ext { + use crate::{find, BlobRef, CommitRef, CommitRefIter, Kind, ObjectRef, TagRef, TagRefIter, TreeRef, TreeRefIter}; + + macro_rules! make_obj_lookup { + ($method:ident, $object_variant:path, $object_kind:path, $object_type:ty) => { + /// Like [`find(…)`][Self::find()], but flattens the `Result>` into a single `Result` making a non-existing object an error + /// while returning the desired object type. + fn $method<'a>( + &self, + id: &gix_hash::oid, + buffer: &'a mut Vec, + ) -> Result<$object_type, find::existing_object::Error> { + self.try_find(id, buffer) + .map_err(find::existing_object::Error::Find)? + .ok_or_else(|| find::existing_object::Error::NotFound { + oid: id.as_ref().to_owned(), + }) + .and_then(|o| { + o.decode().map_err(|err| find::existing_object::Error::Decode { + source: err, + oid: id.as_ref().to_owned(), + }) + }) + .and_then(|o| match o { + $object_variant(o) => return Ok(o), + o => Err(find::existing_object::Error::ObjectKind { + oid: id.as_ref().to_owned(), + actual: o.kind(), + expected: $object_kind, + }), + }) + } + }; + } + + macro_rules! make_iter_lookup { + ($method:ident, $object_kind:path, $object_type:ty, $into_iter:tt) => { + /// Like [`find(…)`][Self::find()], but flattens the `Result>` into a single `Result` making a non-existing object an error + /// while returning the desired iterator type. + fn $method<'a>( + &self, + id: &gix_hash::oid, + buffer: &'a mut Vec, + ) -> Result<$object_type, find::existing_iter::Error> { + self.try_find(id, buffer) + .map_err(find::existing_iter::Error::Find)? + .ok_or_else(|| find::existing_iter::Error::NotFound { + oid: id.as_ref().to_owned(), + }) + .and_then(|o| { + o.$into_iter() + .ok_or_else(|| find::existing_iter::Error::ObjectKind { + oid: id.as_ref().to_owned(), + actual: o.kind, + expected: $object_kind, + }) + }) + } + }; + } + + /// An extension trait with convenience functions. + pub trait HeaderExt: super::Header { + /// Like [`try_header(…)`](super::Header::try_header()), but flattens the `Result>` into a single `Result` making a non-existing header an error. + fn header(&self, id: &gix_hash::oid) -> Result { + self.try_header(id) + .map_err(find::existing::Error::Find)? + .ok_or_else(|| find::existing::Error::NotFound { oid: id.to_owned() }) + } + } + + /// An extension trait with convenience functions. + pub trait FindExt: super::Find { + /// Like [`try_find(…)`](super::Find::try_find()), but flattens the `Result>` into a single `Result` making a non-existing object an error. + fn find<'a>( + &self, + id: &gix_hash::oid, + buffer: &'a mut Vec, + ) -> Result, find::existing::Error> { + self.try_find(id, buffer) + .map_err(find::existing::Error::Find)? + .ok_or_else(|| find::existing::Error::NotFound { oid: id.to_owned() }) + } + + /// Like [`find(…)`][Self::find()], but flattens the `Result>` into a single `Result` making a non-existing object an error + /// while returning the desired object type. + fn find_blob<'a>( + &self, + id: &gix_hash::oid, + buffer: &'a mut Vec, + ) -> Result, find::existing_object::Error> { + if id == gix_hash::ObjectId::empty_blob(id.kind()) { + return Ok(BlobRef { data: &[] }); + } + self.try_find(id, buffer) + .map_err(find::existing_object::Error::Find)? + .ok_or_else(|| find::existing_object::Error::NotFound { + oid: id.as_ref().to_owned(), + }) + .and_then(|o| { + o.decode().map_err(|err| find::existing_object::Error::Decode { + source: err, + oid: id.as_ref().to_owned(), + }) + }) + .and_then(|o| match o { + ObjectRef::Blob(o) => Ok(o), + o => Err(find::existing_object::Error::ObjectKind { + oid: id.as_ref().to_owned(), + actual: o.kind(), + expected: Kind::Blob, + }), + }) + } + + /// Like [`find(…)`][Self::find()], but flattens the `Result>` into a single `Result` making a non-existing object an error + /// while returning the desired object type. + fn find_tree<'a>( + &self, + id: &gix_hash::oid, + buffer: &'a mut Vec, + ) -> Result, find::existing_object::Error> { + if id == gix_hash::ObjectId::empty_tree(id.kind()) { + return Ok(TreeRef { entries: Vec::new() }); + } + self.try_find(id, buffer) + .map_err(find::existing_object::Error::Find)? + .ok_or_else(|| find::existing_object::Error::NotFound { + oid: id.as_ref().to_owned(), + }) + .and_then(|o| { + o.decode().map_err(|err| find::existing_object::Error::Decode { + source: err, + oid: id.as_ref().to_owned(), + }) + }) + .and_then(|o| match o { + ObjectRef::Tree(o) => Ok(o), + o => Err(find::existing_object::Error::ObjectKind { + oid: id.as_ref().to_owned(), + actual: o.kind(), + expected: Kind::Tree, + }), + }) + } + + make_obj_lookup!(find_commit, ObjectRef::Commit, Kind::Commit, CommitRef<'a>); + make_obj_lookup!(find_tag, ObjectRef::Tag, Kind::Tag, TagRef<'a>); + make_iter_lookup!(find_commit_iter, Kind::Commit, CommitRefIter<'a>, try_into_commit_iter); + make_iter_lookup!(find_tree_iter, Kind::Tree, TreeRefIter<'a>, try_into_tree_iter); + make_iter_lookup!(find_tag_iter, Kind::Tag, TagRefIter<'a>, try_into_tag_iter); + } + + impl FindExt for T {} +} +pub use ext::{FindExt, HeaderExt}; diff --git a/gix-object/src/traits/mod.rs b/gix-object/src/traits/mod.rs new file mode 100644 index 00000000000..f0078fd9345 --- /dev/null +++ b/gix-object/src/traits/mod.rs @@ -0,0 +1,54 @@ +use std::io; + +use crate::Kind; + +/// Describe the capability to write git objects into an object store. +pub trait Write { + /// Write objects using the intrinsic kind of [`hash`](gix_hash::Kind) into the database, + /// returning id to reference it in subsequent reads. + fn write(&self, object: &dyn WriteTo) -> Result { + let mut buf = Vec::with_capacity(2048); + object.write_to(&mut buf)?; + self.write_stream(object.kind(), buf.len() as u64, &mut buf.as_slice()) + } + /// As [`write`](Write::write), but takes an [`object` kind](Kind) along with its encoded bytes. + fn write_buf(&self, object: crate::Kind, mut from: &[u8]) -> Result { + self.write_stream(object, from.len() as u64, &mut from) + } + /// As [`write`](Write::write), but takes an input stream. + /// This is commonly used for writing blobs directly without reading them to memory first. + fn write_stream( + &self, + kind: crate::Kind, + size: u64, + from: &mut dyn io::Read, + ) -> Result; +} + +/// Writing of objects to a `Write` implementation +pub trait WriteTo { + /// Write a representation of this instance to `out`. + fn write_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()>; + + /// Returns the type of this object. + fn kind(&self) -> Kind; + + /// Returns the size of this object's representation (the amount + /// of data which would be written by [`write_to`](Self::write_to)). + /// + /// [`size`](Self::size)'s value has no bearing on the validity of + /// the object, as such it's possible for [`size`](Self::size) to + /// return a sensible value but [`write_to`](Self::write_to) to + /// fail because the object was not actually valid in some way. + fn size(&self) -> u64; + + /// Returns a loose object header based on the object's data + fn loose_header(&self) -> smallvec::SmallVec<[u8; 28]> { + crate::encode::loose_header(self.kind(), self.size()) + } +} + +mod _impls; + +mod find; +pub use find::*; diff --git a/gix-object/src/tree/editor.rs b/gix-object/src/tree/editor.rs index 4edf723b434..0b86fa090a1 100644 --- a/gix-object/src/tree/editor.rs +++ b/gix-object/src/tree/editor.rs @@ -46,7 +46,7 @@ impl<'a> Editor<'a> { find, object_hash, trees: HashMap::from_iter(Some((empty_path(), root))), - path_buf: Vec::with_capacity(256).into(), + path_buf: BString::from(Vec::with_capacity(256)).into(), tree_buf: Vec::with_capacity(512), } } @@ -54,7 +54,8 @@ impl<'a> Editor<'a> { /// Operations impl Editor<'_> { - /// Write the entire in-memory state of all changed trees (and only changed trees) to `out`. + /// Write the entire in-memory state of all changed trees (and only changed trees) to `out`, and remove + /// written portions from our state except for the root tree, which affects [`get()`](Editor::get()). /// Note that the returned object id *can* be the empty tree if everything was removed or if nothing /// was added to the tree. /// @@ -72,7 +73,7 @@ impl Editor<'_> { /// It is absolutely and intentionally possible to write out invalid trees with this method. /// Higher layers are expected to perform detailed validation. pub fn write(&mut self, out: impl FnMut(&Tree) -> Result) -> Result { - self.path_buf.clear(); + self.path_buf.borrow_mut().clear(); self.write_at_pathbuf(out, WriteMode::Normal) } @@ -85,10 +86,24 @@ impl Editor<'_> { I: IntoIterator, C: AsRef, { - self.path_buf.clear(); + self.path_buf.borrow_mut().clear(); self.upsert_or_remove_at_pathbuf(rela_path, None) } + /// Obtain the entry at `rela_path` or return `None` if none was found, or the tree wasn't yet written + /// to that point. + /// Note that after [writing](Self::write) only the root path remains, all other intermediate trees are removed. + /// The entry can be anything that can be stored in a tree, but may have a null-id if it's a newly + /// inserted tree. Also, ids of trees might not be accurate as they may have been changed in memory. + pub fn get(&self, rela_path: I) -> Option<&tree::Entry> + where + I: IntoIterator, + C: AsRef, + { + self.path_buf.borrow_mut().clear(); + self.get_inner(rela_path) + } + /// Insert a new entry of `kind` with `id` at `rela_path`, an iterator over each path component in the tree, /// like `a/b/c`. Names are matched case-sensitively. /// @@ -108,10 +123,41 @@ impl Editor<'_> { I: IntoIterator, C: AsRef, { - self.path_buf.clear(); + self.path_buf.borrow_mut().clear(); self.upsert_or_remove_at_pathbuf(rela_path, Some((kind, id, UpsertMode::Normal))) } + fn get_inner(&self, rela_path: I) -> Option<&tree::Entry> + where + I: IntoIterator, + C: AsRef, + { + let mut path_buf = self.path_buf.borrow_mut(); + let mut cursor = self.trees.get(path_buf.as_bstr()).expect("root is always present"); + let mut rela_path = rela_path.into_iter().peekable(); + while let Some(name) = rela_path.next() { + let name = name.as_ref(); + let is_last = rela_path.peek().is_none(); + match cursor + .entries + .binary_search_by(|e| cmp_entry_with_name(e, name, true)) + .or_else(|_| cursor.entries.binary_search_by(|e| cmp_entry_with_name(e, name, false))) + { + Ok(idx) if is_last => return Some(&cursor.entries[idx]), + Ok(idx) => { + if cursor.entries[idx].mode.is_tree() { + push_path_component(&mut path_buf, name); + cursor = self.trees.get(path_buf.as_bstr())?; + } else { + break; + } + } + Err(_) => break, + }; + } + None + } + fn write_at_pathbuf( &mut self, mut out: impl FnMut(&Tree) -> Result, @@ -120,11 +166,12 @@ impl Editor<'_> { assert_ne!(self.trees.len(), 0, "there is at least the root tree"); // back is for children, front is for parents. + let path_buf = self.path_buf.borrow_mut(); let mut parents = vec![( None::, - self.path_buf.clone(), + path_buf.clone(), self.trees - .remove(&path_hash(&self.path_buf)) + .remove(path_buf.as_bstr()) .expect("root tree is always present"), )]; let mut children = Vec::new(); @@ -133,7 +180,7 @@ impl Editor<'_> { for entry in &tree.entries { if entry.mode.is_tree() { let prev_len = push_path_component(&mut rela_path, &entry.filename); - if let Some(sub_tree) = self.trees.remove(&path_hash(&rela_path)) { + if let Some(sub_tree) = self.trees.remove(&rela_path) { all_entries_unchanged_or_written = false; let next_parent_idx = parents.len(); children.push((Some(next_parent_idx), rela_path.clone(), sub_tree)); @@ -167,7 +214,7 @@ impl Editor<'_> { } } else if parents.is_empty() { debug_assert!(children.is_empty(), "we consume children before parents"); - debug_assert_eq!(rela_path, self.path_buf, "this should always be the root tree"); + debug_assert_eq!(rela_path, **path_buf, "this should always be the root tree"); // There may be left-over trees if they are replaced with blobs for example. match out(&tree) { @@ -207,10 +254,8 @@ impl Editor<'_> { I: IntoIterator, C: AsRef, { - let mut cursor = self - .trees - .get_mut(&path_hash(&self.path_buf)) - .expect("root is always present"); + let mut path_buf = self.path_buf.borrow_mut(); + let mut cursor = self.trees.get_mut(path_buf.as_bstr()).expect("root is always present"); let mut rela_path = rela_path.into_iter().peekable(); let new_kind_is_tree = kind_and_id.map_or(false, |(kind, _, _)| kind == EntryKind::Tree); while let Some(name) = rela_path.next() { @@ -294,9 +339,8 @@ impl Editor<'_> { if is_last && kind_and_id.map_or(false, |(_, _, mode)| mode == UpsertMode::Normal) { break; } - push_path_component(&mut self.path_buf, name); - let path_id = path_hash(&self.path_buf); - cursor = match self.trees.entry(path_id) { + push_path_component(&mut path_buf, name); + cursor = match self.trees.entry(path_buf.clone()) { hash_map::Entry::Occupied(e) => e.into_mut(), hash_map::Entry::Vacant(e) => e.insert( if let Some(tree_id) = tree_to_lookup.filter(|tree_id| !tree_id.is_empty_tree()) { @@ -307,6 +351,7 @@ impl Editor<'_> { ), }; } + drop(path_buf); Ok(self) } @@ -325,7 +370,7 @@ impl Editor<'_> { mod cursor { use crate::tree::editor::{Cursor, UpsertMode, WriteMode}; use crate::tree::{Editor, EntryKind}; - use crate::Tree; + use crate::{tree, Tree}; use bstr::{BStr, BString}; use gix_hash::ObjectId; @@ -350,26 +395,41 @@ mod cursor { I: IntoIterator, C: AsRef, { - self.path_buf.clear(); + self.path_buf.borrow_mut().clear(); self.upsert_or_remove_at_pathbuf( rela_path, Some((EntryKind::Tree, self.object_hash.null(), UpsertMode::AssureTreeOnly)), )?; + let prefix = self.path_buf.borrow_mut().clone(); Ok(Cursor { - prefix: self.path_buf.clone(), /* set during the upsert call */ + prefix, /* set during the upsert call */ parent: self, }) } } impl Cursor<'_, '_> { + /// Obtain the entry at `rela_path` or return `None` if none was found, or the tree wasn't yet written + /// to that point. + /// Note that after [writing](Self::write) only the root path remains, all other intermediate trees are removed. + /// The entry can be anything that can be stored in a tree, but may have a null-id if it's a newly + /// inserted tree. Also, ids of trees might not be accurate as they may have been changed in memory. + pub fn get(&self, rela_path: I) -> Option<&tree::Entry> + where + I: IntoIterator, + C: AsRef, + { + self.parent.path_buf.borrow_mut().clone_from(&self.prefix); + self.parent.get_inner(rela_path) + } + /// Like [`Editor::upsert()`], but with the constraint of only editing in this cursor's tree. pub fn upsert(&mut self, rela_path: I, kind: EntryKind, id: ObjectId) -> Result<&mut Self, super::Error> where I: IntoIterator, C: AsRef, { - self.parent.path_buf.clone_from(&self.prefix); + self.parent.path_buf.borrow_mut().clone_from(&self.prefix); self.parent .upsert_or_remove_at_pathbuf(rela_path, Some((kind, id, UpsertMode::Normal)))?; Ok(self) @@ -381,14 +441,14 @@ mod cursor { I: IntoIterator, C: AsRef, { - self.parent.path_buf.clone_from(&self.prefix); + self.parent.path_buf.borrow_mut().clone_from(&self.prefix); self.parent.upsert_or_remove_at_pathbuf(rela_path, None)?; Ok(self) } /// Like [`Editor::write()`], but will write only the subtree of the cursor. pub fn write(&mut self, out: impl FnMut(&Tree) -> Result) -> Result { - self.parent.path_buf.clone_from(&self.prefix); + self.parent.path_buf.borrow_mut().clone_from(&self.prefix); self.parent.write_at_pathbuf(out, WriteMode::FromCursor) } } @@ -424,10 +484,6 @@ fn empty_path() -> BString { BString::default() } -fn path_hash(path: &[u8]) -> BString { - path.to_vec().into() -} - fn push_path_component(base: &mut BString, component: &[u8]) -> usize { let prev_len = base.len(); debug_assert!(base.last() != Some(&b'/')); diff --git a/gix-object/src/tree/mod.rs b/gix-object/src/tree/mod.rs index 688af26dd57..1fcb1004a2d 100644 --- a/gix-object/src/tree/mod.rs +++ b/gix-object/src/tree/mod.rs @@ -2,6 +2,7 @@ use crate::{ bstr::{BStr, BString}, tree, Tree, TreeRef, }; +use std::cell::RefCell; use std::cmp::Ordering; /// @@ -30,7 +31,7 @@ pub struct Editor<'a> { /// dropped when writing at the latest. trees: std::collections::HashMap, /// A buffer to build up paths when finding the tree to edit. - path_buf: BString, + path_buf: RefCell, /// Our buffer for storing tree-data in, right before decoding it. tree_buf: Vec, } diff --git a/gix-object/tests/object/tree/editor.rs b/gix-object/tests/object/tree/editor.rs index f74b5c15602..2565a319cb0 100644 --- a/gix-object/tests/object/tree/editor.rs +++ b/gix-object/tests/object/tree/editor.rs @@ -1,4 +1,4 @@ -use gix_object::tree::EntryKind; +use gix_object::tree::{Entry, EntryKind}; use gix_object::Tree; #[test] @@ -32,6 +32,19 @@ fn from_empty_cursor() -> crate::Result { "only one item is left in the tree, which also keeps it alive" ); assert_eq!(num_writes_and_clear(), 1, "root tree"); + assert_eq!( + cursor.get(None::<&str>), + None, + "the 'root' can't be obtained, no entry exists for it, ever" + ); + assert_eq!( + cursor.get(Some("empty-dir-via-cursor")), + Some(&Entry { + mode: EntryKind::Tree.into(), + filename: "empty-dir-via-cursor".into(), + oid: gix_hash::ObjectId::empty_tree(gix_hash::Kind::Sha1), + }), + ); let actual = edit.write(&mut write)?; assert_eq!( @@ -52,6 +65,20 @@ fn from_empty_cursor() -> crate::Result { let mut cursor = edit.cursor_at(cursor_path)?; let actual = cursor.remove(Some("empty-dir-via-cursor"))?.write(&mut write)?; assert_eq!(actual, empty_tree(), "it keeps the empty tree like the editor would"); + assert_eq!( + edit.get(["some", "deeply", "nested", "path"]), + Some(&Entry { + mode: EntryKind::Tree.into(), + filename: "path".into(), + oid: gix_hash::Kind::Sha1.null(), + }), + "the directory leading to the removed one is still present" + ); + assert_eq!( + edit.get(["some", "deeply", "nested", "path", "empty-dir-via-cursor"]), + None, + "but the removed entry is indee removed" + ); let actual = edit.write(&mut write)?; assert_eq!( @@ -400,6 +427,11 @@ fn from_empty_add() -> crate::Result { "4b825dc642cb6eb9a060e54bf8d69288fbee4904\n" ); assert_eq!(odb.access_count_and_clear(), 0); + assert_eq!( + edit.get(None::<&str>), + None, + "the 'root' can't be obtained, no entry exists for it, ever" + ); let actual = edit .upsert(Some("hi"), EntryKind::Blob, gix_hash::Kind::Sha1.null())? @@ -431,12 +463,28 @@ fn from_empty_add() -> crate::Result { assert_eq!(storage.borrow().len(), 1, "still nothing but empty trees"); assert_eq!(odb.access_count_and_clear(), 0); - let actual = edit - .upsert(["a", "b"], EntryKind::Tree, empty_tree())? + edit.upsert(["a", "b"], EntryKind::Tree, empty_tree())? .upsert(["a", "b", "c"], EntryKind::Tree, empty_tree())? - .upsert(["a", "b", "d", "e"], EntryKind::Tree, empty_tree())? - .write(&mut write) - .expect("it's OK to write empty trees"); + .upsert(["a", "b", "d", "e"], EntryKind::Tree, empty_tree())?; + assert_eq!( + edit.get(["a", "b"]), + Some(&Entry { + mode: EntryKind::Tree.into(), + filename: "b".into(), + oid: hex_to_id("4b825dc642cb6eb9a060e54bf8d69288fbee4904"), + }), + "before writing, entries are still present, just like they were written" + ); + assert_eq!( + edit.get(["a", "b", "c"]), + Some(&Entry { + mode: EntryKind::Tree.into(), + filename: "c".into(), + oid: gix_hash::ObjectId::empty_tree(gix_hash::Kind::Sha1), + }), + ); + + let actual = edit.write(&mut write).expect("it's OK to write empty trees"); assert_eq!( display_tree(actual, &storage), "bf91a94ae659ac8a9da70d26acf42df1a36adb6e @@ -450,6 +498,16 @@ fn from_empty_add() -> crate::Result { ); assert_eq!(num_writes_and_clear(), 4, "it wrote the trees it needed to write"); assert_eq!(odb.access_count_and_clear(), 0); + assert_eq!(edit.get(["a", "b"]), None, "nothing exists here"); + assert_eq!( + edit.get(Some("a")), + Some(&Entry { + mode: EntryKind::Tree.into(), + filename: "a".into(), + oid: hex_to_id("850bf83c26003cb0541318718bc9217c4a5bde6d"), + }), + "but the top-level tree is still available and can yield its entries, as written with proper ids" + ); let actual = edit .upsert(["a"], EntryKind::Blob, any_blob())? @@ -532,6 +590,24 @@ fn from_empty_add() -> crate::Result { "still, only the root-tree changes effectively" ); assert_eq!(odb.access_count_and_clear(), 0); + + let actual = edit + .upsert(["a", "b"], EntryKind::Tree, empty_tree())? + .upsert(["a", "b", "c"], EntryKind::BlobExecutable, any_blob())? + // .upsert(["a", "b", "d"], EntryKind::Blob, any_blob())? + .write(&mut write)?; + assert_eq!( + display_tree(actual, &storage), + "d8d3f558776965f70452625b72363234f517b290 +└── a + └── b + └── c bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.100755 +", + "the intermediate tree is rewritten to be suitable to hold the blob" + ); + assert_eq!(num_writes_and_clear(), 3, "root, and two child-trees"); + assert_eq!(odb.access_count_and_clear(), 0); + Ok(()) } diff --git a/gix-odb/src/cache.rs b/gix-odb/src/cache.rs index cc737041e63..77022a09661 100644 --- a/gix-odb/src/cache.rs +++ b/gix-odb/src/cache.rs @@ -141,11 +141,16 @@ mod impls { use crate::{find::Header, pack::data::entry::Location, Cache}; - impl crate::Write for Cache + impl gix_object::Write for Cache where - S: crate::Write, + S: gix_object::Write, { - fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { + fn write_stream( + &self, + kind: Kind, + size: u64, + from: &mut dyn Read, + ) -> Result { self.inner.write_stream(kind, size, from) } } diff --git a/gix-odb/src/lib.rs b/gix-odb/src/lib.rs index aaedc74a175..ca91fa7a185 100644 --- a/gix-odb/src/lib.rs +++ b/gix-odb/src/lib.rs @@ -77,13 +77,7 @@ pub mod find; /// An object database equivalent to `/dev/null`, dropping all objects stored into it. mod traits; -pub use traits::{Header, HeaderExt, Write}; - -/// -pub mod write { - /// The error type returned by the [`Write`](crate::Write) trait. - pub type Error = Box; -} +pub use traits::{Header, HeaderExt}; /// A thread-local handle to access any object. pub type Handle = Cache>>; diff --git a/gix-odb/src/memory.rs b/gix-odb/src/memory.rs index 5fdbd2abe3e..cd8e9fbdc3d 100644 --- a/gix-odb/src/memory.rs +++ b/gix-odb/src/memory.rs @@ -202,16 +202,16 @@ where } } -impl crate::Write for Proxy +impl gix_object::Write for Proxy where - T: crate::Write, + T: gix_object::Write, { fn write_stream( &self, kind: gix_object::Kind, size: u64, from: &mut dyn std::io::Read, - ) -> Result { + ) -> Result { let Some(map) = self.memory.as_ref() else { return self.inner.write_stream(kind, size, from); }; diff --git a/gix-odb/src/sink.rs b/gix-odb/src/sink.rs index 7784901a8c9..46077ff37cf 100644 --- a/gix-odb/src/sink.rs +++ b/gix-odb/src/sink.rs @@ -19,13 +19,13 @@ impl Sink { } } -impl crate::traits::Write for Sink { +impl gix_object::Write for Sink { fn write_stream( &self, kind: gix_object::Kind, mut size: u64, from: &mut dyn io::Read, - ) -> Result { + ) -> Result { let mut buf = [0u8; u16::MAX as usize]; let header = gix_object::encode::loose_header(kind, size); diff --git a/gix-odb/src/store_impls/dynamic/write.rs b/gix-odb/src/store_impls/dynamic/write.rs index ba615f351f4..d0b8735121f 100644 --- a/gix-odb/src/store_impls/dynamic/write.rs +++ b/gix-odb/src/store_impls/dynamic/write.rs @@ -8,7 +8,7 @@ use crate::store; mod error { use crate::{loose, store}; - /// The error returned by the [dynamic Store's][crate::Store] [`Write`][crate::Write] implementation. + /// The error returned by the [dynamic Store's][crate::Store] [`Write`](gix_object::Write) implementation. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { @@ -24,11 +24,11 @@ pub use error::Error; use crate::store_impls::dynamic; -impl crate::Write for store::Handle +impl gix_object::Write for store::Handle where S: Deref + Clone, { - fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { + fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { let mut snapshot = self.snapshot.borrow_mut(); Ok(match snapshot.loose_dbs.first() { Some(ldb) => ldb.write_stream(kind, size, from)?, diff --git a/gix-odb/src/store_impls/loose/verify.rs b/gix-odb/src/store_impls/loose/verify.rs index ae83c1d01cb..dd08e9cd9f2 100644 --- a/gix-odb/src/store_impls/loose/verify.rs +++ b/gix-odb/src/store_impls/loose/verify.rs @@ -5,7 +5,7 @@ use std::{ use gix_features::progress::{Count, DynNestedProgress, Progress}; -use crate::{loose::Store, Write}; +use crate::loose::Store; /// pub mod integrity { @@ -64,6 +64,7 @@ impl Store { progress: &mut dyn DynNestedProgress, should_interrupt: &AtomicBool, ) -> Result { + use gix_object::Write; let mut buf = Vec::new(); let sink = crate::sink(self.object_hash); diff --git a/gix-odb/src/store_impls/loose/write.rs b/gix-odb/src/store_impls/loose/write.rs index a70351a1574..bdef0b90005 100644 --- a/gix-odb/src/store_impls/loose/write.rs +++ b/gix-odb/src/store_impls/loose/write.rs @@ -7,7 +7,7 @@ use tempfile::NamedTempFile; use super::Store; use crate::store_impls::loose; -/// Returned by the [`crate::Write`] trait implementation of [`Store`] +/// Returned by the [`gix_object::Write`] trait implementation of [`Store`] #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { @@ -26,8 +26,8 @@ pub enum Error { }, } -impl crate::traits::Write for Store { - fn write(&self, object: &dyn WriteTo) -> Result { +impl gix_object::Write for Store { + fn write(&self, object: &dyn WriteTo) -> Result { let mut to = self.dest()?; to.write_all(&object.loose_header()).map_err(|err| Error::Io { source: err, @@ -46,7 +46,7 @@ impl crate::traits::Write for Store { /// Write the given buffer in `from` to disk in one syscall at best. /// /// This will cost at least 4 IO operations. - fn write_buf(&self, kind: gix_object::Kind, from: &[u8]) -> Result { + fn write_buf(&self, kind: gix_object::Kind, from: &[u8]) -> Result { let mut to = self.dest().map_err(Box::new)?; to.write_all(&gix_object::encode::loose_header(kind, from.len() as u64)) .map_err(|err| Error::Io { @@ -72,7 +72,7 @@ impl crate::traits::Write for Store { kind: gix_object::Kind, size: u64, mut from: &mut dyn io::Read, - ) -> Result { + ) -> Result { let mut to = self.dest().map_err(Box::new)?; to.write_all(&gix_object::encode::loose_header(kind, size)) .map_err(|err| Error::Io { diff --git a/gix-odb/src/traits.rs b/gix-odb/src/traits.rs index df6bd9b0c12..f3e5cec6403 100644 --- a/gix-odb/src/traits.rs +++ b/gix-odb/src/traits.rs @@ -1,32 +1,5 @@ -use std::io; - -use gix_object::WriteTo; - use crate::find; -/// Describe the capability to write git objects into an object store. -pub trait Write { - /// Write objects using the intrinsic kind of [`hash`][gix_hash::Kind] into the database, - /// returning id to reference it in subsequent reads. - fn write(&self, object: &dyn WriteTo) -> Result { - let mut buf = Vec::with_capacity(2048); - object.write_to(&mut buf)?; - self.write_stream(object.kind(), buf.len() as u64, &mut buf.as_slice()) - } - /// As [`write`][Write::write], but takes an [`object` kind][gix_object::Kind] along with its encoded bytes. - fn write_buf(&self, object: gix_object::Kind, mut from: &[u8]) -> Result { - self.write_stream(object, from.len() as u64, &mut from) - } - /// As [`write`][Write::write], but takes an input stream. - /// This is commonly used for writing blobs directly without reading them to memory first. - fn write_stream( - &self, - kind: gix_object::Kind, - size: u64, - from: &mut dyn io::Read, - ) -> Result; -} - /// A way to obtain object properties without fully decoding it. pub trait Header { /// Try to read the header of the object associated with `id` or return `None` if it could not be found. @@ -34,64 +7,12 @@ pub trait Header { } mod _impls { - use std::{io::Read, ops::Deref, rc::Rc, sync::Arc}; + use std::{ops::Deref, rc::Rc, sync::Arc}; - use gix_hash::{oid, ObjectId}; - use gix_object::{Kind, WriteTo}; + use gix_hash::oid; use crate::find::Header; - impl crate::Write for &T - where - T: crate::Write, - { - fn write(&self, object: &dyn WriteTo) -> Result { - (*self).write(object) - } - - fn write_buf(&self, object: Kind, from: &[u8]) -> Result { - (*self).write_buf(object, from) - } - - fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { - (*self).write_stream(kind, size, from) - } - } - - impl crate::Write for Arc - where - T: crate::Write, - { - fn write(&self, object: &dyn WriteTo) -> Result { - self.deref().write(object) - } - - fn write_buf(&self, object: Kind, from: &[u8]) -> Result { - self.deref().write_buf(object, from) - } - - fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { - self.deref().write_stream(kind, size, from) - } - } - - impl crate::Write for Rc - where - T: crate::Write, - { - fn write(&self, object: &dyn WriteTo) -> Result { - self.deref().write(object) - } - - fn write_buf(&self, object: Kind, from: &[u8]) -> Result { - self.deref().write_buf(object, from) - } - - fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { - self.deref().write_stream(kind, size, from) - } - } - impl crate::Header for &T where T: crate::Header, diff --git a/gix-odb/tests/odb/memory.rs b/gix-odb/tests/odb/memory.rs index d0d8ddb2bfd..f9c34a4c9f7 100644 --- a/gix-odb/tests/odb/memory.rs +++ b/gix-odb/tests/odb/memory.rs @@ -1,6 +1,6 @@ use crate::odb::hex_to_id; +use gix_object::Write; use gix_object::{tree, Exists, FindExt}; -use gix_odb::Write; use gix_testtools::tempfile::TempDir; #[test] diff --git a/gix-odb/tests/odb/sink/mod.rs b/gix-odb/tests/odb/sink/mod.rs index a6859a10b78..44abcae5f80 100644 --- a/gix-odb/tests/odb/sink/mod.rs +++ b/gix-odb/tests/odb/sink/mod.rs @@ -1,4 +1,4 @@ -use gix_odb::Write; +use gix_object::Write; use crate::store::loose::{locate_oid, object_ids}; diff --git a/gix-odb/tests/odb/store/dynamic.rs b/gix-odb/tests/odb/store/dynamic.rs index 87caddd9682..e0241f2ee4b 100644 --- a/gix-odb/tests/odb/store/dynamic.rs +++ b/gix-odb/tests/odb/store/dynamic.rs @@ -1,8 +1,8 @@ use std::process::Command; use gix_hash::ObjectId; -use gix_object::{Exists, FindExt}; -use gix_odb::{store, store::iter::Ordering, Header, Write}; +use gix_object::{Exists, FindExt, Write}; +use gix_odb::{store, store::iter::Ordering, Header}; use gix_testtools::fixture_path_standalone; use crate::{hex_to_id, odb::db}; diff --git a/gix-odb/tests/odb/store/loose.rs b/gix-odb/tests/odb/store/loose.rs index 7848e5b5b5d..e5b4fee35d6 100644 --- a/gix-odb/tests/odb/store/loose.rs +++ b/gix-odb/tests/odb/store/loose.rs @@ -45,7 +45,8 @@ fn verify_integrity() { } mod write { - use gix_odb::{loose, Write}; + use gix_object::Write; + use gix_odb::loose; use crate::store::loose::{locate_oid, object_ids}; diff --git a/gix-ref/src/name.rs b/gix-ref/src/name.rs index 27522758af9..4da2a9db9d8 100644 --- a/gix-ref/src/name.rs +++ b/gix-ref/src/name.rs @@ -62,16 +62,21 @@ impl PartialNameRef { } impl PartialNameRef { - pub(crate) fn looks_like_full_name(&self) -> bool { + pub(crate) fn looks_like_full_name(&self, consider_pseudo_ref: bool) -> bool { let name = self.0.as_bstr(); name.starts_with_str("refs/") || name.starts_with(Category::MainPseudoRef.prefix()) || name.starts_with(Category::LinkedPseudoRef { name: "".into() }.prefix()) - || is_pseudo_ref(name) + || (consider_pseudo_ref && is_pseudo_ref(name)) } - pub(crate) fn construct_full_name_ref<'buf>(&self, inbetween: &str, buf: &'buf mut BString) -> &'buf FullNameRef { + pub(crate) fn construct_full_name_ref<'buf>( + &self, + inbetween: &str, + buf: &'buf mut BString, + consider_pseudo_ref: bool, + ) -> &'buf FullNameRef { buf.clear(); - if !self.looks_like_full_name() { + if !self.looks_like_full_name(consider_pseudo_ref) { buf.push_str("refs/"); } if !inbetween.is_empty() { diff --git a/gix-ref/src/store/file/find.rs b/gix-ref/src/store/file/find.rs index 9842de1d852..439c356b465 100644 --- a/gix-ref/src/store/file/find.rs +++ b/gix-ref/src/store/file/find.rs @@ -6,6 +6,7 @@ use std::{ pub use error::Error; +use crate::name::is_pseudo_ref; use crate::{ file, store_impl::{file::loose, packed}, @@ -103,13 +104,28 @@ impl file::Store { let precomposed_partial_name = precomposed_partial_name_storage .as_ref() .map(std::convert::AsRef::as_ref); - for inbetween in &["", "tags", "heads", "remotes"] { - match self.find_inner(inbetween, partial_name, precomposed_partial_name, packed, &mut buf) { - Ok(Some(r)) => return Ok(Some(decompose_if(r, precomposed_partial_name.is_some()))), - Ok(None) => { - continue; + for consider_pseudo_ref in [true, false] { + if !consider_pseudo_ref && !is_pseudo_ref(partial_name.as_bstr()) { + break; + } + 'try_directories: for inbetween in &["", "tags", "heads", "remotes"] { + match self.find_inner( + inbetween, + partial_name, + precomposed_partial_name, + packed, + &mut buf, + consider_pseudo_ref, + ) { + Ok(Some(r)) => return Ok(Some(decompose_if(r, precomposed_partial_name.is_some()))), + Ok(None) => { + if consider_pseudo_ref && is_pseudo_ref(partial_name.as_bstr()) { + break 'try_directories; + } + continue; + } + Err(err) => return Err(err), } - Err(err) => return Err(err), } } if partial_name.as_bstr() != "HEAD" { @@ -129,6 +145,7 @@ impl file::Store { .map(std::convert::AsRef::as_ref), None, &mut buf, + true, /* consider-pseudo-ref */ ) .map(|res| res.map(|r| decompose_if(r, precomposed_partial_name_storage.is_some()))) } else { @@ -143,10 +160,11 @@ impl file::Store { precomposed_partial_name: Option<&PartialNameRef>, packed: Option<&packed::Buffer>, path_buf: &mut BString, + consider_pseudo_ref: bool, ) -> Result, Error> { let full_name = precomposed_partial_name .unwrap_or(partial_name) - .construct_full_name_ref(inbetween, path_buf); + .construct_full_name_ref(inbetween, path_buf, consider_pseudo_ref); let content_buf = self.ref_contents(full_name).map_err(|err| Error::ReadFileContents { source: err, path: self.reference_path(full_name), diff --git a/gix-ref/src/store/packed/find.rs b/gix-ref/src/store/packed/find.rs index 59988c4069b..a172ab61032 100644 --- a/gix-ref/src/store/packed/find.rs +++ b/gix-ref/src/store/packed/find.rs @@ -17,7 +17,7 @@ impl packed::Buffer { let name = name.try_into()?; let mut buf = BString::default(); for inbetween in &["", "tags", "heads", "remotes"] { - let (name, was_absolute) = if name.looks_like_full_name() { + let (name, was_absolute) = if name.looks_like_full_name(false) { let name = FullNameRef::new_unchecked(name.as_bstr()); let name = match transform_full_name_for_lookup(name) { None => return Ok(None), @@ -25,7 +25,7 @@ impl packed::Buffer { }; (name, true) } else { - let full_name = name.construct_full_name_ref(inbetween, &mut buf); + let full_name = name.construct_full_name_ref(inbetween, &mut buf, false); (full_name, false) }; match self.try_find_full_name(name)? { diff --git a/gix-ref/tests/Cargo.toml b/gix-ref/tests/Cargo.toml index 31864c6cbeb..1f6ec8940e7 100644 --- a/gix-ref/tests/Cargo.toml +++ b/gix-ref/tests/Cargo.toml @@ -17,7 +17,7 @@ serde = ["gix-ref/serde"] [[test]] name = "refs" -path = "refs.rs" +path = "refs/main.rs" [dev-dependencies] gix-ref = { path = ".." } diff --git a/gix-ref/tests/fixtures/generated-archives/make_packed_ref_repository.tar b/gix-ref/tests/fixtures/generated-archives/make_packed_ref_repository.tar index 9c2bc2ea2b9..87d999da6ba 100644 Binary files a/gix-ref/tests/fixtures/generated-archives/make_packed_ref_repository.tar and b/gix-ref/tests/fixtures/generated-archives/make_packed_ref_repository.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_packed_ref_repository_for_overlay.tar b/gix-ref/tests/fixtures/generated-archives/make_packed_ref_repository_for_overlay.tar index e46ce16be89..b6b126990a7 100644 Binary files a/gix-ref/tests/fixtures/generated-archives/make_packed_ref_repository_for_overlay.tar and b/gix-ref/tests/fixtures/generated-archives/make_packed_ref_repository_for_overlay.tar differ diff --git a/gix-ref/tests/fixtures/generated-archives/make_ref_repository.tar b/gix-ref/tests/fixtures/generated-archives/make_ref_repository.tar index ae58c4f1ab5..d239944d93b 100644 Binary files a/gix-ref/tests/fixtures/generated-archives/make_ref_repository.tar and b/gix-ref/tests/fixtures/generated-archives/make_ref_repository.tar differ diff --git a/gix-ref/tests/fixtures/make_packed_ref_repository.sh b/gix-ref/tests/fixtures/make_packed_ref_repository.sh index daea997523b..3a81139d5aa 100755 --- a/gix-ref/tests/fixtures/make_packed_ref_repository.sh +++ b/gix-ref/tests/fixtures/make_packed_ref_repository.sh @@ -7,6 +7,7 @@ git checkout -q -b main git commit -q --allow-empty -m c1 git branch dt1 git branch d1 +git branch A mkdir -p .git/refs/remotes/origin diff --git a/gix-ref/tests/fixtures/make_packed_ref_repository_for_overlay.sh b/gix-ref/tests/fixtures/make_packed_ref_repository_for_overlay.sh index 9410375c73d..03574b865ae 100755 --- a/gix-ref/tests/fixtures/make_packed_ref_repository_for_overlay.sh +++ b/gix-ref/tests/fixtures/make_packed_ref_repository_for_overlay.sh @@ -6,6 +6,7 @@ git init -q git checkout -q -b main git commit -q --allow-empty -m c1 git branch newer-as-loose +git branch A git tag -m "tag object" tag-object mkdir -p .git/refs/remotes/origin diff --git a/gix-ref/tests/fixtures/make_ref_repository.sh b/gix-ref/tests/fixtures/make_ref_repository.sh index 7288ffe59f0..38080cddfca 100755 --- a/gix-ref/tests/fixtures/make_ref_repository.sh +++ b/gix-ref/tests/fixtures/make_ref_repository.sh @@ -7,6 +7,7 @@ git checkout -q -b main git commit -q --allow-empty -m c1 git branch dt1 git branch d1 +git branch A mkdir -p .git/refs/remotes/origin diff --git a/gix-ref/tests/file/log.rs b/gix-ref/tests/refs/file/log.rs similarity index 100% rename from gix-ref/tests/file/log.rs rename to gix-ref/tests/refs/file/log.rs diff --git a/gix-ref/tests/file/mod.rs b/gix-ref/tests/refs/file/mod.rs similarity index 100% rename from gix-ref/tests/file/mod.rs rename to gix-ref/tests/refs/file/mod.rs diff --git a/gix-ref/tests/file/reference.rs b/gix-ref/tests/refs/file/reference.rs similarity index 100% rename from gix-ref/tests/file/reference.rs rename to gix-ref/tests/refs/file/reference.rs diff --git a/gix-ref/tests/file/store/access.rs b/gix-ref/tests/refs/file/store/access.rs similarity index 100% rename from gix-ref/tests/file/store/access.rs rename to gix-ref/tests/refs/file/store/access.rs diff --git a/gix-ref/tests/file/store/find.rs b/gix-ref/tests/refs/file/store/find.rs similarity index 87% rename from gix-ref/tests/file/store/find.rs rename to gix-ref/tests/refs/file/store/find.rs index d4eb8dd0e88..f9cb0a26489 100644 --- a/gix-ref/tests/file/store/find.rs +++ b/gix-ref/tests/refs/file/store/find.rs @@ -2,12 +2,22 @@ mod existing { use crate::{file::store_at, hex_to_id}; #[test] - fn with_packed_refs() -> crate::Result { - let store = store_at("make_packed_ref_repository_for_overlay.sh")?; - let c1 = hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"); - let r = store.find("main")?; - assert_eq!(r.target.into_id(), c1); - assert_eq!(r.name.as_bstr(), "refs/heads/main"); + fn various_repositories() -> crate::Result { + for fixture in [ + "make_ref_repository.sh", + "make_packed_ref_repository.sh", + "make_packed_ref_repository_for_overlay.sh", + ] { + let store = store_at(fixture)?; + let c1 = hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"); + let r = store.find("main")?; + assert_eq!(r.target.into_id(), c1); + assert_eq!(r.name.as_bstr(), "refs/heads/main"); + let r = store + .find("A") + .unwrap_or_else(|_| panic!("{fixture}: should find capitalized refs")); + assert_eq!(r.target.into_id(), c1); + } Ok(()) } @@ -98,6 +108,17 @@ mod loose { use crate::file::store; + #[test] + fn capitalized_branch() -> crate::Result { + let store = store()?; + assert_eq!( + store.find("A")?.name.as_bstr(), + "refs/heads/A", + "capitalized loose refs can be found fine" + ); + Ok(()) + } + #[test] fn success_and_failure() -> crate::Result { let store = store()?; diff --git a/gix-ref/tests/file/store/iter.rs b/gix-ref/tests/refs/file/store/iter.rs similarity index 97% rename from gix-ref/tests/file/store/iter.rs rename to gix-ref/tests/refs/file/store/iter.rs index 900a15d56f0..046bdbc58b1 100644 --- a/gix-ref/tests/file/store/iter.rs +++ b/gix-ref/tests/refs/file/store/iter.rs @@ -253,7 +253,7 @@ fn no_packed_available_thus_no_iteration_possible() -> crate::Result { #[test] fn packed_file_iter() -> crate::Result { let store = store_with_packed_refs()?; - assert_eq!(store.open_packed_buffer()?.expect("pack available").iter()?.count(), 8); + assert_eq!(store.open_packed_buffer()?.expect("pack available").iter()?.count(), 9); Ok(()) } @@ -262,7 +262,7 @@ fn loose_iter_with_broken_refs() -> crate::Result { let store = store()?; let mut actual: Vec<_> = store.loose_iter()?.collect(); - assert_eq!(actual.len(), 15); + assert_eq!(actual.len(), 16); actual.sort_by_key(Result::is_err); let first_error = actual .iter() @@ -271,7 +271,7 @@ fn loose_iter_with_broken_refs() -> crate::Result { .expect("there is an error"); assert_eq!( - first_error, 14, + first_error, 15, "there is exactly one invalid item, and it didn't abort the iterator most importantly" ); #[cfg(not(windows))] @@ -291,6 +291,7 @@ fn loose_iter_with_broken_refs() -> crate::Result { ref_paths, vec![ "d1", + "heads/A", "heads/d1", "heads/dt1", "heads/main", @@ -343,6 +344,7 @@ fn loose_iter_with_prefix() -> crate::Result { assert_eq!( actual, vec![ + "refs/heads/A", "refs/heads/d1", "refs/heads/dt1", "refs/heads/main", @@ -394,7 +396,8 @@ fn overlay_iter() -> crate::Result { assert_eq!( ref_names, vec![ - (b"refs/heads/main".as_bstr().to_owned(), Object(c1)), + (b"refs/heads/A".as_bstr().to_owned(), Object(c1)), + (b"refs/heads/main".into(), Object(c1)), ("refs/heads/newer-as-loose".into(), Object(c2)), ( "refs/remotes/origin/HEAD".into(), @@ -440,7 +443,8 @@ fn overlay_prefixed_iter() -> crate::Result { assert_eq!( ref_names, vec![ - (b"refs/heads/main".as_bstr().to_owned(), Object(c1)), + (b"refs/heads/A".as_bstr().to_owned(), Object(c1)), + (b"refs/heads/main".into(), Object(c1)), ("refs/heads/newer-as-loose".into(), Object(c2)), ] ); diff --git a/gix-ref/tests/file/store/mod.rs b/gix-ref/tests/refs/file/store/mod.rs similarity index 100% rename from gix-ref/tests/file/store/mod.rs rename to gix-ref/tests/refs/file/store/mod.rs diff --git a/gix-ref/tests/file/store/reflog.rs b/gix-ref/tests/refs/file/store/reflog.rs similarity index 100% rename from gix-ref/tests/file/store/reflog.rs rename to gix-ref/tests/refs/file/store/reflog.rs diff --git a/gix-ref/tests/file/transaction/mod.rs b/gix-ref/tests/refs/file/transaction/mod.rs similarity index 100% rename from gix-ref/tests/file/transaction/mod.rs rename to gix-ref/tests/refs/file/transaction/mod.rs diff --git a/gix-ref/tests/file/transaction/prepare_and_commit/create_or_update/collisions.rs b/gix-ref/tests/refs/file/transaction/prepare_and_commit/create_or_update/collisions.rs similarity index 100% rename from gix-ref/tests/file/transaction/prepare_and_commit/create_or_update/collisions.rs rename to gix-ref/tests/refs/file/transaction/prepare_and_commit/create_or_update/collisions.rs diff --git a/gix-ref/tests/file/transaction/prepare_and_commit/create_or_update/mod.rs b/gix-ref/tests/refs/file/transaction/prepare_and_commit/create_or_update/mod.rs similarity index 99% rename from gix-ref/tests/file/transaction/prepare_and_commit/create_or_update/mod.rs rename to gix-ref/tests/refs/file/transaction/prepare_and_commit/create_or_update/mod.rs index dbdb5e4035c..f90d8cf03c4 100644 --- a/gix-ref/tests/file/transaction/prepare_and_commit/create_or_update/mod.rs +++ b/gix-ref/tests/refs/file/transaction/prepare_and_commit/create_or_update/mod.rs @@ -791,7 +791,7 @@ fn packed_refs_creation_with_packed_refs_mode_prune_removes_original_loose_refs( assert_eq!( edits.len(), - 8, + 9, "there are a certain amount of loose refs that are packed" ); diff --git a/gix-ref/tests/file/transaction/prepare_and_commit/delete.rs b/gix-ref/tests/refs/file/transaction/prepare_and_commit/delete.rs similarity index 100% rename from gix-ref/tests/file/transaction/prepare_and_commit/delete.rs rename to gix-ref/tests/refs/file/transaction/prepare_and_commit/delete.rs diff --git a/gix-ref/tests/file/worktree.rs b/gix-ref/tests/refs/file/worktree.rs similarity index 100% rename from gix-ref/tests/file/worktree.rs rename to gix-ref/tests/refs/file/worktree.rs diff --git a/gix-ref/tests/fullname/mod.rs b/gix-ref/tests/refs/fullname/mod.rs similarity index 100% rename from gix-ref/tests/fullname/mod.rs rename to gix-ref/tests/refs/fullname/mod.rs diff --git a/gix-ref/tests/refs.rs b/gix-ref/tests/refs/main.rs similarity index 100% rename from gix-ref/tests/refs.rs rename to gix-ref/tests/refs/main.rs diff --git a/gix-ref/tests/namespace/mod.rs b/gix-ref/tests/refs/namespace/mod.rs similarity index 100% rename from gix-ref/tests/namespace/mod.rs rename to gix-ref/tests/refs/namespace/mod.rs diff --git a/gix-ref/tests/packed/find.rs b/gix-ref/tests/refs/packed/find.rs similarity index 95% rename from gix-ref/tests/packed/find.rs rename to gix-ref/tests/refs/packed/find.rs index 9cc6753c6c4..20fb3f9a444 100644 --- a/gix-ref/tests/packed/find.rs +++ b/gix-ref/tests/refs/packed/find.rs @@ -14,6 +14,19 @@ fn a_lock_file_would_not_be_a_valid_partial_name() { assert_eq!(err.to_string(), "A reference must be a valid tag name as well"); } +#[test] +fn capitalized_branch() -> crate::Result { + let store = store_with_packed_refs()?; + let packed_refs = store.open_packed_buffer()?.expect("packed-refs exist"); + + assert_eq!( + packed_refs.find("A")?.name.as_bstr(), + "refs/heads/A", + "fully capitalized refs aren't just considered pseudorefs" + ); + Ok(()) +} + #[test] fn all_iterable_refs_can_be_found() -> crate::Result { let store = store_with_packed_refs()?; diff --git a/gix-ref/tests/packed/iter.rs b/gix-ref/tests/refs/packed/iter.rs similarity index 97% rename from gix-ref/tests/packed/iter.rs rename to gix-ref/tests/refs/packed/iter.rs index 28d15ab1ca3..b22c7c7adba 100644 --- a/gix-ref/tests/packed/iter.rs +++ b/gix-ref/tests/refs/packed/iter.rs @@ -18,7 +18,7 @@ fn packed_refs_with_header() -> crate::Result { let dir = gix_testtools::scripted_fixture_read_only_standalone("make_packed_ref_repository.sh")?; let buf = std::fs::read(dir.join(".git").join("packed-refs"))?; let iter = packed::Iter::new(&buf)?; - assert_eq!(iter.count(), 8, "it finds the right amount of items"); + assert_eq!(iter.count(), 9, "it finds the right amount of items"); Ok(()) } @@ -31,7 +31,8 @@ fn iter_prefix() -> crate::Result { .map(|r| r.map(|r| r.name.as_bstr())) .collect::, _>>()?, vec![ - "refs/heads/d1".as_bytes().as_bstr(), + "refs/heads/A".as_bytes().as_bstr(), + "refs/heads/d1".into(), "refs/heads/dt1".into(), "refs/heads/main".into() ] diff --git a/gix-ref/tests/packed/mod.rs b/gix-ref/tests/refs/packed/mod.rs similarity index 100% rename from gix-ref/tests/packed/mod.rs rename to gix-ref/tests/refs/packed/mod.rs diff --git a/gix-ref/tests/packed/open.rs b/gix-ref/tests/refs/packed/open.rs similarity index 100% rename from gix-ref/tests/packed/open.rs rename to gix-ref/tests/refs/packed/open.rs diff --git a/gix-ref/tests/reference/mod.rs b/gix-ref/tests/refs/reference/mod.rs similarity index 100% rename from gix-ref/tests/reference/mod.rs rename to gix-ref/tests/refs/reference/mod.rs diff --git a/gix-ref/tests/store/mod.rs b/gix-ref/tests/refs/store/mod.rs similarity index 100% rename from gix-ref/tests/store/mod.rs rename to gix-ref/tests/refs/store/mod.rs diff --git a/gix-ref/tests/transaction/mod.rs b/gix-ref/tests/refs/transaction/mod.rs similarity index 100% rename from gix-ref/tests/transaction/mod.rs rename to gix-ref/tests/refs/transaction/mod.rs diff --git a/gix/Cargo.toml b/gix/Cargo.toml index aa5f4efe20f..2afbf8d2b59 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -139,7 +139,7 @@ revparse-regex = ["regex", "revision"] blob-diff = ["gix-diff/blob", "attributes"] ## Add functions to specifically merge files, using the standard three-way merge that git offers. -blob-merge = ["dep:gix-merge", "gix-merge/blob", "attributes"] +blob-merge = ["blob-diff", "dep:gix-merge", "attributes"] ## Make it possible to turn a tree into a stream of bytes, which can be decoded to entries and turned into various other formats. worktree-stream = ["gix-worktree-stream", "attributes"] diff --git a/gix/src/config/tree/sections/diff.rs b/gix/src/config/tree/sections/diff.rs index f328254e8fd..7caa0b447a7 100644 --- a/gix/src/config/tree/sections/diff.rs +++ b/gix/src/config/tree/sections/diff.rs @@ -165,7 +165,7 @@ mod renames { } } -mod validate { +pub(super) mod validate { use crate::{ bstr::BStr, config::tree::{keys, Diff}, diff --git a/gix/src/config/tree/sections/merge.rs b/gix/src/config/tree/sections/merge.rs index 32eb833d84e..966dda16083 100644 --- a/gix/src/config/tree/sections/merge.rs +++ b/gix/src/config/tree/sections/merge.rs @@ -6,6 +6,17 @@ use crate::config::{ }; impl Merge { + /// The `merge.renameLimit` key. + pub const RENAME_LIMIT: keys::UnsignedInteger = keys::UnsignedInteger::new_unsigned_integer( + "renameLimit", + &config::Tree::MERGE, + ) + .with_note( + "The limit is actually squared, so 1000 stands for up to 1 million diffs if fuzzy rename tracking is enabled", + ); + /// The `merge.renames` key. + #[cfg(feature = "blob-merge")] + pub const RENAMES: super::diff::Renames = super::diff::Renames::new_renames("renames", &config::Tree::MERGE); /// The `merge.renormalize` key pub const RENORMALIZE: keys::Boolean = keys::Boolean::new_boolean("renormalize", &Tree::MERGE); /// The `merge.default` key @@ -32,11 +43,16 @@ impl Section for Merge { fn keys(&self) -> &[&dyn Key] { &[ + &Self::RENAME_LIMIT, + #[cfg(feature = "blob-merge")] + &Self::RENAMES, &Self::RENORMALIZE, &Self::DEFAULT, &Self::DRIVER_NAME, &Self::DRIVER_COMMAND, &Self::DRIVER_RECURSIVE, + #[cfg(feature = "blob-merge")] + &Self::CONFLICT_STYLE, ] } } diff --git a/gix/src/diff.rs b/gix/src/diff.rs index 5308c5c3fbd..01361b1bbdd 100644 --- a/gix/src/diff.rs +++ b/gix/src/diff.rs @@ -122,7 +122,7 @@ pub mod rename { /// #[cfg(feature = "blob-diff")] -mod utils { +pub(crate) mod utils { use gix_diff::{rewrites::Copies, Rewrites}; use crate::{ @@ -172,10 +172,18 @@ mod utils { config: &gix_config::File<'static>, lenient: bool, ) -> Result, new_rewrites::Error> { - let key = "diff.renames"; + new_rewrites_inner(config, lenient, &Diff::RENAMES, &Diff::RENAME_LIMIT) + } + + pub fn new_rewrites_inner( + config: &gix_config::File<'static>, + lenient: bool, + renames: &'static crate::config::tree::diff::Renames, + rename_limit: &'static crate::config::tree::keys::UnsignedInteger, + ) -> Result, new_rewrites::Error> { let copies = match config - .boolean(key) - .map(|value| Diff::RENAMES.try_into_renames(value)) + .boolean(renames) + .map(|value| renames.try_into_renames(value)) .transpose() .with_leniency(lenient)? { @@ -191,8 +199,8 @@ mod utils { Ok(Rewrites { copies, limit: config - .integer("diff.renameLimit") - .map(|value| Diff::RENAME_LIMIT.try_into_usize(value)) + .integer(rename_limit) + .map(|value| rename_limit.try_into_usize(value)) .transpose() .with_leniency(lenient)? .unwrap_or(default.limit), diff --git a/gix/src/prelude.rs b/gix/src/prelude.rs index 8a3e01124c2..03954574a36 100644 --- a/gix/src/prelude.rs +++ b/gix/src/prelude.rs @@ -1,5 +1,5 @@ pub use gix_features::parallel::reduce::Finalize; -pub use gix_object::{Find, FindExt}; -pub use gix_odb::{Header, HeaderExt, Write}; +pub use gix_object::{Find, FindExt, Write}; +pub use gix_odb::{Header, HeaderExt}; pub use crate::ext::*; diff --git a/gix/src/repository/impls.rs b/gix/src/repository/impls.rs index 1dbf21fa2e0..f528cda2860 100644 --- a/gix/src/repository/impls.rs +++ b/gix/src/repository/impls.rs @@ -1,3 +1,6 @@ +use gix_object::Exists; +use std::ops::DerefMut; + impl Clone for crate::Repository { fn clone(&self) -> Self { let mut new = crate::Repository::from_refs_and_objects( @@ -93,3 +96,55 @@ impl From for crate::ThreadSafeRepository { } } } + +impl gix_object::Write for crate::Repository { + fn write(&self, object: &dyn gix_object::WriteTo) -> Result { + let mut buf = self.empty_reusable_buffer(); + object.write_to(buf.deref_mut())?; + self.write_buf(object.kind(), &buf) + } + + fn write_buf(&self, object: gix_object::Kind, from: &[u8]) -> Result { + let oid = gix_object::compute_hash(self.object_hash(), object, from); + if self.objects.exists(&oid) { + return Ok(oid); + } + self.objects.write_buf(object, from) + } + + fn write_stream( + &self, + kind: gix_object::Kind, + size: u64, + from: &mut dyn std::io::Read, + ) -> Result { + let mut buf = self.empty_reusable_buffer(); + let bytes = std::io::copy(from, buf.deref_mut())?; + if size != bytes { + return Err(format!("Found {bytes} bytes in stream, but had {size} bytes declared").into()); + } + self.write_buf(kind, &buf) + } +} + +impl gix_object::FindHeader for crate::Repository { + fn try_header(&self, id: &gix_hash::oid) -> Result, gix_object::find::Error> { + self.objects.try_header(id) + } +} + +impl gix_object::Find for crate::Repository { + fn try_find<'a>( + &self, + id: &gix_hash::oid, + buffer: &'a mut Vec, + ) -> Result>, gix_object::find::Error> { + self.objects.try_find(id, buffer) + } +} + +impl gix_object::Exists for crate::Repository { + fn exists(&self, id: &gix_hash::oid) -> bool { + self.objects.exists(id) + } +} diff --git a/gix/src/repository/merge.rs b/gix/src/repository/merge.rs index 6b038489b02..7f41f23fd46 100644 --- a/gix/src/repository/merge.rs +++ b/gix/src/repository/merge.rs @@ -1,8 +1,9 @@ use crate::config::cache::util::ApplyLeniencyDefault; use crate::config::tree; -use crate::repository::{blob_merge_options, merge_resource_cache}; +use crate::repository::{blob_merge_options, merge_resource_cache, merge_trees, tree_merge_options}; use crate::Repository; use gix_merge::blob::builtin_driver::text; +use gix_object::Write; use std::borrow::Cow; /// Merge-utilities @@ -10,7 +11,7 @@ impl Repository { /// Create a resource cache that can hold the three resources needed for a three-way merge. `worktree_roots` /// determines which side of the merge is read from the worktree, or from which worktree. /// - /// The platform can be used to setup resources and finally perform a merge. + /// The platform can be used to set up resources and finally perform a merge among blobs. /// /// Note that the current index is used for attribute queries. pub fn merge_resource_cache( @@ -55,7 +56,8 @@ impl Repository { Ok(gix_merge::blob::Platform::new(filter, mode, attrs, drivers, options)) } - /// Return options for use with [`gix_merge::blob::PlatformRef::merge()`]. + /// Return options for use with [`gix_merge::blob::PlatformRef::merge()`], accessible through + /// [merge_resource_cache()](Self::merge_resource_cache). pub fn blob_merge_options(&self) -> Result { Ok(gix_merge::blob::platform::merge::Options { is_virtual_ancestor: false, @@ -74,9 +76,63 @@ impl Repository { }) .transpose()? .unwrap_or_default(), - marker_size: text::Conflict::DEFAULT_MARKER_SIZE, + marker_size: text::Conflict::DEFAULT_MARKER_SIZE.try_into().unwrap(), }, }, }) } + + /// Read all relevant configuration options to instantiate options for use in [`merge_trees()`](Self::merge_trees). + pub fn tree_merge_options(&self) -> Result { + Ok(gix_merge::tree::Options { + rewrites: crate::diff::utils::new_rewrites_inner( + &self.config.resolved, + self.config.lenient_config, + &tree::Merge::RENAMES, + &tree::Merge::RENAME_LIMIT, + )?, + blob_merge: self.blob_merge_options()?, + blob_merge_command_ctx: self.command_context()?, + fail_on_conflict: None, + marker_size_multiplier: 0, + symlink_conflicts: None, + allow_lossy_resolution: false, + }) + } + + /// Merge `our_tree` and `their_tree` together, assuming they have the same `ancestor_tree`, to yield a new tree + /// which is provided as [tree editor](gix_object::tree::Editor) to inspect and finalize results at will. + /// No change to the worktree or index is made, but objects may be written to the object database as merge results + /// are stored. + /// If these changes should not be observable outside of this instance, consider [enabling object memory](Self::with_object_memory). + /// + /// Note that `ancestor_tree` can be the [empty tree hash](gix_hash::ObjectId::empty_tree) to indicate no common ancestry. + /// + /// `labels` are typically chosen to identify the refs or names for `our_tree` and `their_tree` and `ancestor_tree` respectively. + /// + /// `options` should be initialized with [`tree_merge_options()`](Self::tree_merge_options()). + // TODO: Use `crate::merge::Options` here and add niceties such as setting the resolution strategy. + pub fn merge_trees( + &self, + ancestor_tree: impl AsRef, + our_tree: impl AsRef, + their_tree: impl AsRef, + labels: gix_merge::blob::builtin_driver::text::Labels<'_>, + options: gix_merge::tree::Options, + ) -> Result, merge_trees::Error> { + let mut diff_cache = self.diff_resource_cache_for_tree_diff()?; + let mut blob_merge = self.merge_resource_cache(Default::default())?; + Ok(gix_merge::tree( + ancestor_tree.as_ref(), + our_tree.as_ref(), + their_tree.as_ref(), + labels, + self, + |buf| self.write_buf(gix_object::Kind::Blob, buf), + &mut Default::default(), + &mut diff_cache, + &mut blob_merge, + options, + )?) + } } diff --git a/gix/src/repository/mod.rs b/gix/src/repository/mod.rs index a7659a858de..8485b8d7cd5 100644 --- a/gix/src/repository/mod.rs +++ b/gix/src/repository/mod.rs @@ -112,6 +112,38 @@ pub mod merge_resource_cache { } } +/// +#[cfg(feature = "blob-merge")] +pub mod merge_trees { + /// The error returned by [Repository::merge_trees()](crate::Repository::merge_trees()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + MergeResourceCache(#[from] super::merge_resource_cache::Error), + #[error(transparent)] + DiffResourceCache(#[from] super::diff_resource_cache::Error), + #[error(transparent)] + TreeMerge(#[from] gix_merge::tree::Error), + } +} + +/// +#[cfg(feature = "blob-merge")] +pub mod tree_merge_options { + /// The error returned by [Repository::tree_merge_options()](crate::Repository::tree_merge_options()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + BlobMergeOptions(#[from] super::blob_merge_options::Error), + #[error(transparent)] + RewritesConfig(#[from] crate::diff::new_rewrites::Error), + #[error(transparent)] + CommandContext(#[from] crate::config::command_context::Error), + } +} + /// #[cfg(feature = "blob-diff")] pub mod diff_resource_cache { diff --git a/gix/src/repository/object.rs b/gix/src/repository/object.rs index dcb999aa763..c0112ab1195 100644 --- a/gix/src/repository/object.rs +++ b/gix/src/repository/object.rs @@ -2,8 +2,8 @@ use std::ops::DerefMut; use gix_hash::ObjectId; -use gix_object::{Exists, Find, FindExt}; -use gix_odb::{Header, HeaderExt, Write}; +use gix_object::{Exists, Find, FindExt, Write}; +use gix_odb::{Header, HeaderExt}; use gix_ref::{ transaction::{LogChange, PreviousValue, RefLog}, FullName, diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 944c7c9022f..c7859a0938f 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -164,23 +164,42 @@ pub fn main() -> Result<()> { repository(Mode::Lenient)?, out, format, - resolve_with.map(|c| match c { - merge::ResolveWith::Union => { - gix::merge::blob::builtin_driver::text::Conflict::ResolveWithUnion - } - merge::ResolveWith::Ours => { - gix::merge::blob::builtin_driver::text::Conflict::ResolveWithOurs - } - merge::ResolveWith::Theirs => { - gix::merge::blob::builtin_driver::text::Conflict::ResolveWithTheirs - } - }), + resolve_with.map(Into::into), base, ours, theirs, ) }, ), + merge::SubCommands::Tree { + in_memory, + resolve_content_with, + ours, + base, + theirs, + } => prepare_and_run( + "merge-tree", + trace, + verbose, + progress, + progress_keep_open, + None, + move |_progress, out, err| { + core::repository::merge::tree( + repository(Mode::Lenient)?, + out, + err, + base, + ours, + theirs, + core::repository::merge::tree::Options { + format, + resolve_content_merge: resolve_content_with.map(Into::into), + in_memory, + }, + ) + }, + ), }, Subcommands::MergeBase(crate::plumbing::options::merge_base::Command { first, others }) => prepare_and_run( "merge-base", diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index b6b1b005f11..47f92b4034f 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -357,6 +357,16 @@ pub mod merge { Theirs, } + impl From for gix::merge::blob::builtin_driver::text::Conflict { + fn from(value: ResolveWith) -> Self { + match value { + ResolveWith::Union => gix::merge::blob::builtin_driver::text::Conflict::ResolveWithUnion, + ResolveWith::Ours => gix::merge::blob::builtin_driver::text::Conflict::ResolveWithOurs, + ResolveWith::Theirs => gix::merge::blob::builtin_driver::text::Conflict::ResolveWithTheirs, + } + } + } + #[derive(Debug, clap::Parser)] #[command(about = "perform merges of various kinds")] pub struct Platform { @@ -382,6 +392,28 @@ pub mod merge { #[clap(value_name = "OURS", value_parser = crate::shared::AsBString)] theirs: BString, }, + + /// Merge a tree by specifying ours, base and theirs, writing it to the object database. + Tree { + /// Keep all objects to be written in memory to avoid any disk IO. + /// + /// Note that the resulting tree won't be available or inspectable. + #[clap(long, short = 'm')] + in_memory: bool, + /// Decide how to resolve content conflicts. If unset, write conflict markers and fail. + #[clap(long, short = 'c')] + resolve_content_with: Option, + + /// A revspec to our treeish. + #[clap(value_name = "OURS", value_parser = crate::shared::AsBString)] + ours: BString, + /// A revspec to the base as treeish for both ours and theirs. + #[clap(value_name = "BASE", value_parser = crate::shared::AsBString)] + base: BString, + /// A revspec to their treeish. + #[clap(value_name = "OURS", value_parser = crate::shared::AsBString)] + theirs: BString, + }, } } diff --git a/src/plumbing/progress.rs b/src/plumbing/progress.rs index 846837694f8..429fe81876e 100644 --- a/src/plumbing/progress.rs +++ b/src/plumbing/progress.rs @@ -29,7 +29,7 @@ impl Display for Usage { } InUse(deviation) => { if !deviation.is_empty() { - write!(f, "❗️❗️❗️{deviation}")?; + write!(f, "❗️{deviation}")?; } } } @@ -411,12 +411,12 @@ static GIT_CONFIG: &[Record] = &[ usage: Planned("Required for big monorepos, and typically used in conjunction with sparse indices") }, Record { - config: "merge.renameLimit", - usage: Planned("The same as 'diff.renameLimit'") + config: "merge.directoryRenames", + usage: NotPlanned("On demand") }, Record { - config: "merge.renames", - usage: Planned("The same as 'diff.renames'") + config: "merge.verbosity", + usage: NotApplicable("Only useful for Git CLI") }, Record { config: "status.renameLimit", @@ -431,7 +431,7 @@ static GIT_CONFIG: &[Record] = &[ usage: Planned("Currently we are likely to expose passwords in errors or in other places, and it's better to by default not do that") }, Record { - config: "diff.*.cachetextconv", + config: "diff.*.cacheTextConv", usage: NotPlanned("It seems to slow to do that, and persisting results to save a relatively cheap computation doesn't seem right") }, ];