Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crate-status.md
Original file line number Diff line number Diff line change
Expand Up @@ -843,7 +843,7 @@ A git directory walk.
* [x] pathspec based filtering
* [ ] multi-threaded initialization of icase hash table is always used to accelerate index lookups, even if ignoreCase = false for performance
* [ ] special handling of submodules (for now, submodules or nested repositories are detected, but they can't be walked into naturally)
* [ ] accelerated walk with `untracked`-cache (as provided by `UNTR` extension of `gix_index::File`)
* [x] accelerated walk with `untracked`-cache (as provided by `UNTR` extension of `gix_index::File`)

### gix-index

Expand Down
6 changes: 6 additions & 0 deletions gix-dir/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ test = false
sha1 = ["gix-index/sha1"]
## Enable support for the SHA-256 hash by forwarding the feature to dependencies.
sha256 = ["gix-index/sha256"]
## Enable support for handling attributes, forwarding the feature to dependencies.
attributes = ["gix-worktree/attributes"]

[dependencies]
gix-trace = { version = "^0.1.19", path = "../gix-trace" }
Expand All @@ -36,6 +38,10 @@ gix-utils = { version = "^0.3.2", path = "../gix-utils", features = ["bstr"] }
bstr = { version = "1.12.0", default-features = false }
thiserror = "2.0.18"

[target.'cfg(unix)'.dependencies]
libc = { version = "0.2.182" }
rustix = { version = "1.1.2", default-features = false, features = ["std", "system"] }

[dev-dependencies]
gix-testtools = { path = "../tests/tools" }
gix-fs = { path = "../gix-fs" }
Expand Down
10 changes: 9 additions & 1 deletion gix-dir/src/walk/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use bstr::{BStr, BString, ByteSlice};

use crate::{
EntryRef, entry,
walk::{Action, Context, Delegate, Error, ForDeletionMode, Options, Outcome, classify, readdir},
walk::{Action, Context, Delegate, Error, ForDeletionMode, Options, Outcome, classify, readdir, untracked_cache},
};

/// A function to perform a git-style, unsorted, directory walk.
Expand Down Expand Up @@ -105,6 +105,10 @@ pub fn walk(
}

let mut state = readdir::State::new(worktree_root, ctx.current_dir, options.for_deletion.is_some());
let untracked_cache = options
.use_untracked_cache
.then(|| untracked_cache::validate(worktree_root, ctx.index, &ctx, options))
.flatten();
let may_collapse = root != worktree_root && state.may_collapse(&current);
let (action, _) = readdir::recursive(
may_collapse,
Expand All @@ -116,6 +120,10 @@ pub fn walk(
delegate,
&mut out,
&mut state,
untracked_cache.as_ref(),
untracked_cache
.as_ref()
.map(|cache: &untracked_cache::Validated<'_>| cache.root_dir()),
)?;
if action.is_continue() {
state.emit_remaining(may_collapse, options, &mut out, delegate);
Expand Down
32 changes: 31 additions & 1 deletion gix-dir/src/walk/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ pub enum ForDeletionMode {
}

/// Options for use in [`walk()`](function::walk()) function.
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
pub struct Options<'a> {
/// If `true`, the filesystem will store paths as decomposed unicode, i.e. `ä` becomes `"a\u{308}"`, which means that
/// we have to turn these forms back from decomposed to precomposed unicode before storing it in the index or generally
Expand Down Expand Up @@ -188,13 +188,36 @@ pub struct Options<'a> {
///
/// In other words, for Git compatibility this flag should be `false`, the default, for `git2` compatibility it should be `true`.
pub symlinks_to_directories_are_ignored_like_directories: bool,
/// If `true`, consult the untracked cache if it is present and otherwise applicable.
pub use_untracked_cache: bool,
/// A set of all git worktree checkouts that are located within the main worktree directory.
///
/// They will automatically be detected as 'tracked', but without providing index information (as there is no actual index entry).
/// Note that the unicode composition must match the `precompose_unicode` field so that paths will match verbatim.
pub worktree_relative_worktree_dirs: Option<&'a BTreeSet<BString>>,
}

impl Default for Options<'_> {
fn default() -> Self {
Self {
precompose_unicode: false,
ignore_case: false,
recurse_repositories: false,
emit_pruned: false,
emit_ignored: None,
for_deletion: None,
classify_untracked_bare_repositories: false,
emit_tracked: false,
emit_untracked: Default::default(),
emit_empty_directories: false,
emit_collapsed: None,
symlinks_to_directories_are_ignored_like_directories: false,
use_untracked_cache: true,
worktree_relative_worktree_dirs: None,
}
}
}

/// All information that is required to perform a dirwalk, and classify paths properly.
pub struct Context<'a> {
/// If not `None`, it will be checked before entering any directory to trigger early interruption.
Expand Down Expand Up @@ -269,6 +292,12 @@ pub struct Outcome {
pub returned_entries: usize,
/// The amount of entries, prior to pathspecs filtering them out or otherwise excluding them.
pub seen_entries: u32,
/// The number of directories whose contents were served entirely from the untracked cache,
/// avoiding a `read_dir` syscall.
pub untracked_cache_hits: u32,
/// The number of directories skipped by the untracked cache due to a failed per-directory
/// stat validation, falling back to a real `read_dir` call instead.
pub untracked_cache_misses: u32,
}

/// The error returned by [`walk()`](function::walk()).
Expand Down Expand Up @@ -306,3 +335,4 @@ pub enum Error {
mod classify;
pub(crate) mod function;
mod readdir;
mod untracked_cache;
179 changes: 179 additions & 0 deletions gix-dir/src/walk/readdir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,38 @@ pub(super) fn recursive(
delegate: &mut dyn Delegate,
out: &mut Outcome,
state: &mut State,
untracked_cache: Option<&walk::untracked_cache::Validated<'_>>,
untracked_cache_dir: Option<usize>,
) -> Result<(Action, bool), Error> {
if ctx.should_interrupt.is_some_and(|flag| flag.load(Ordering::Relaxed)) {
return Err(Error::Interrupted);
}
let cache_attempted = untracked_cache.zip(untracked_cache_dir);
let cache_valid = cache_attempted.filter(|(cache, dir)| cache.is_dir_valid(*dir, current));
if cache_attempted.is_some() && cache_valid.is_none() {
out.untracked_cache_misses += 1;
}
if let Some((action, prevent_collapse)) = cache_valid
.map(|(cache, dir)| {
recursive_from_untracked_cache(
dir,
may_collapse,
current,
current_bstr,
current_info,
ctx,
opts,
delegate,
out,
state,
cache,
)
})
.transpose()?
{
out.untracked_cache_hits += 1;
return Ok((action, prevent_collapse));
}
out.read_dir_calls += 1;
let entries = gix_fs::read_dir(current, opts.precompose_unicode).map_err(|err| Error::ReadDir {
path: current.to_owned(),
Expand Down Expand Up @@ -94,6 +122,15 @@ pub(super) fn recursive(
delegate,
out,
state,
untracked_cache,
untracked_cache_dir.and_then(|dir| {
untracked_cache.and_then(|cache| {
let component = current_bstr
.rfind_byte(b'/')
.map_or(current_bstr.as_bstr(), |pos| current_bstr[pos + 1..].as_bstr());
cache.child_dir(dir, component)
})
}),
)?;
prevent_collapse |= subdir_prevent_collapse;
if action.is_break() {
Expand Down Expand Up @@ -139,6 +176,148 @@ pub(super) fn recursive(
Ok((res, prevent_collapse))
}

#[allow(clippy::too_many_arguments)]
fn recursive_from_untracked_cache(
cache_dir: usize,
may_collapse: bool,
current: &mut PathBuf,
current_bstr: &mut BString,
current_info: classify::Outcome,
ctx: &mut Context<'_>,
opts: Options<'_>,
delegate: &mut dyn Delegate,
out: &mut Outcome,
state: &mut State,
untracked_cache: &walk::untracked_cache::Validated<'_>,
) -> Result<(Action, bool), Error> {
let Some(cached) = untracked_cache.directory(cache_dir) else {
return Ok((std::ops::ControlFlow::Continue(()), false));
};

let mut num_entries = 0;
let mark = state.mark(may_collapse);
let mut prevent_collapse = current_info.status == Status::Tracked;

// Build the set of sub-directory names so we can skip their `"<name>/"` entries in
// `untracked_entries` — those are handled (with proper stat validation) below.
let subdir_names: std::collections::HashSet<&[u8]> = cached
.sub_directories()
.iter()
.filter_map(|&i| untracked_cache.directory(i))
.map(|d| d.name().as_bytes())
.collect();

for &subdir_idx in cached.sub_directories() {
let Some(subdir) = untracked_cache.directory(subdir_idx) else {
continue;
};
let prev_len = current_bstr.len();
if prev_len != 0 {
current_bstr.push(b'/');
}
current_bstr.extend_from_slice(subdir.name());
current.push(gix_path::from_bstr(subdir.name()));

let info = classify::path(
current,
current_bstr,
if prev_len == 0 { 0 } else { prev_len + 1 },
Some(entry::Kind::Directory),
|| Some(entry::Kind::Directory),
opts,
ctx,
)?;
num_entries += 1;
if can_recurse(current_bstr.as_bstr(), info, opts.for_deletion, false, delegate) {
let subdir_may_collapse = state.may_collapse(current);
let (action, subdir_prevent_collapse) = recursive(
subdir_may_collapse,
current,
current_bstr,
info,
ctx,
opts,
delegate,
out,
state,
Some(untracked_cache),
Some(subdir_idx),
)?;
prevent_collapse |= subdir_prevent_collapse;
if action.is_break() {
return Ok((action, prevent_collapse));
}
} else if !state.held_for_directory_collapse(current_bstr.as_bstr(), info, &opts) {
let action = emit_entry(Cow::Borrowed(current_bstr.as_bstr()), info, None, opts, out, delegate);
if action.is_break() {
return Ok((action, prevent_collapse));
}
}
current_bstr.truncate(prev_len);
current.pop();
}

for file in cached.untracked_entries() {
// Git stores collapsed untracked directories in BOTH `sub_directories` AND as
Comment thread
AaronMoat marked this conversation as resolved.
// `"<name>/"` in `untracked_entries`. Skip the `untracked_entries` copy — the
// sub_directories loop above handles it (with proper per-directory stat
// validation via `recursive()`). Emitting from here would bypass the stat check
// and serve stale cache entries (e.g. if files inside were deleted).
let (file_name, is_collapsed_dir) = file
.as_slice()
.strip_suffix(b"/")
.map_or((file.as_slice(), false), |s| (s, true));
if is_collapsed_dir && subdir_names.contains(file_name) {
continue;
}

num_entries += 1;
let prev_len = current_bstr.len();
if prev_len != 0 {
current_bstr.push(b'/');
}
current_bstr.extend_from_slice(file_name);
current.push(gix_path::from_bstr(bstr::BStr::new(file_name)));
let current_path = current.clone();

let info = classify::path(
current,
current_bstr,
if prev_len == 0 { 0 } else { prev_len + 1 },
None,
|| {
std::fs::symlink_metadata(&current_path)
.ok()
.map(|ft| ft.file_type().into())
},
Comment on lines +281 to +292
opts,
ctx,
)?;
if !state.held_for_directory_collapse(current_bstr.as_bstr(), info, &opts) {
let action = emit_entry(Cow::Borrowed(current_bstr.as_bstr()), info, None, opts, out, delegate);
if action.is_break() {
return Ok((action, prevent_collapse));
}
}
current_bstr.truncate(prev_len);
current.pop();
}

let res = mark.reduce_held_entries(
num_entries,
state,
&mut prevent_collapse,
current,
current_bstr.as_bstr(),
current_info,
opts,
out,
ctx,
delegate,
);
Ok((res, prevent_collapse))
}

pub(super) struct State {
/// The entries to hold back until it's clear what to do with them.
pub on_hold: Vec<Entry>,
Expand Down
Loading
Loading