From dcf502a12322b4ab018ef5e2d92b3072c83db7f3 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Tue, 21 Jan 2025 20:46:38 -0500 Subject: [PATCH 1/3] test(package): remove dirty workspace manifest check Workspace manifest dirtiness is more complicated than the other cases. We will construct dedicated tests for it in following commits. --- tests/testsuite/package.rs | 33 +-------------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/tests/testsuite/package.rs b/tests/testsuite/package.rs index ae1c5d71024..8d9b51481ad 100644 --- a/tests/testsuite/package.rs +++ b/tests/testsuite/package.rs @@ -1212,15 +1212,6 @@ fn vcs_status_check_for_each_workspace_member() { }); git::commit(&repo); - p.change_file( - "Cargo.toml", - r#" - [workspace] - members = ["isengard", "mordor"] - [workspace.package] - edition = "2021" - "#, - ); // Dirty file outside won't affect packaging. p.change_file("hobbit", "changed!"); p.change_file("mordor/src/lib.rs", "changed!"); @@ -1321,8 +1312,6 @@ fn dirty_file_outside_pkg_root_considered_dirty() { [workspace] members = ["isengard"] resolver = "2" - [workspace.package] - edition = "2015" "#, ) .file("lib.rs", r#"compile_error!("you shall not pass")"#) @@ -1333,7 +1322,7 @@ fn dirty_file_outside_pkg_root_considered_dirty() { r#" [package] name = "isengard" - edition.workspace = true + edition = "2021" homepage = "saruman" description = "saruman" license-file = "../LICENSE" @@ -1357,17 +1346,6 @@ fn dirty_file_outside_pkg_root_considered_dirty() { p.change_file("original-dir/file", "after"); // * Changes in files outside pkg root that `license-file`/`readme` point to p.change_file("LICENSE", "after"); - // * When workspace inheritance is involved and changed - p.change_file( - "Cargo.toml", - r#" - [workspace] - members = ["isengard"] - resolver = "2" - [workspace.package] - edition = "2021" - "#, - ); // Changes in files outside git workdir won't affect vcs status check p.change_file( &main_outside_pkg_root, @@ -1398,14 +1376,6 @@ to proceed despite this and include the uncommitted changes, pass the `--allow-d "#]]) .run(); - let cargo_toml = str![[r##" -... -[package] -edition = "2021" -... - -"##]]; - let f = File::open(&p.root().join("target/package/isengard-0.0.0.crate")).unwrap(); validate_crate_contents( f, @@ -1427,7 +1397,6 @@ edition = "2021" ("symlink-dir/file", str!["after"]), ("README.md", str!["after"]), ("LICENSE", str!["after"]), - ("Cargo.toml", cargo_toml), ], ); } From 37fe6a701a46b03e9f767189027f33edc9671279 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Tue, 21 Jan 2025 21:12:22 -0500 Subject: [PATCH 2/3] test(package): dirtiness check doesnt work for workspace inheritance This commits adds three tests to demonstrate that dirtiness check does not work for Cargo.toml involved with workspace inheritance. * One for basic check. If any change in workspace manifest could affect the inherited value, this should be caught as dirty. * One for broken workspace manifest in Git index. * The other is also for broken workspace manifest in Git index, but has no inherited field. --- tests/testsuite/package.rs | 263 +++++++++++++++++++++++++++++++++++++ 1 file changed, 263 insertions(+) diff --git a/tests/testsuite/package.rs b/tests/testsuite/package.rs index 8d9b51481ad..64b34a22539 100644 --- a/tests/testsuite/package.rs +++ b/tests/testsuite/package.rs @@ -1401,6 +1401,269 @@ to proceed despite this and include the uncommitted changes, pass the `--allow-d ); } +#[cargo_test] +fn dirty_workspace_manifest() { + Package::new("dep", "1.0.0").publish(); + let (p, repo) = git::new_repo("foo", |p| { + p.file( + "Cargo.toml", + r#" + [workspace] + members = ["isengard"] + resolver = "2" + [workspace.package] + edition = "2015" + [workspace.dependencies] + dep = "1" + "#, + ) + .file( + "isengard/Cargo.toml", + r#" + [package] + name = "isengard" + edition.workspace = true + [dependencies] + dep = "1" + "#, + ) + .file("isengard/src/lib.rs", "") + }); + git::commit(&repo); + + // change in field not inherited by member wont be considered dirty + p.change_file( + "Cargo.toml", + r#" + [workspace] + members = ["isengard"] + resolver = "2" + [workspace.package] + edition = "2015" + [workspace.dependencies] + dep = "2" + "#, + ); + + p.cargo("package --workspace --no-verify --no-metadata") + .with_stderr_data(str![[r#" +[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard) +[UPDATING] `dummy-registry` index +[PACKAGED] 5 files, [FILE_SIZE]B ([FILE_SIZE]B compressed) + +"#]]) + .run(); + + let cargo_toml = str![[r#" +... +[package] +edition = "2015" +... +[dependencies.dep] +version = "1" +... +"#]]; + + let f = File::open(&p.root().join("target/package/isengard-0.0.0.crate")).unwrap(); + validate_crate_contents( + f, + "isengard-0.0.0.crate", + &[ + ".cargo_vcs_info.json", + "Cargo.toml", + "Cargo.toml.orig", + "src/lib.rs", + "Cargo.lock", + ], + [("Cargo.toml", cargo_toml)], + ); + + // change in field inherited by member will be considered dirty + p.change_file( + "Cargo.toml", + r#" + [workspace] + members = ["isengard"] + resolver = "2" + [workspace.package] + edition = "2021" + [workspace.dependencies] + dep = "1" + "#, + ); + + p.cargo("package --workspace --no-verify --no-metadata") + .with_stderr_data(str![[r#" +[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard) +[UPDATING] `dummy-registry` index +[PACKAGED] 5 files, [FILE_SIZE]B ([FILE_SIZE]B compressed) + +"#]]) + .run(); + + // --allow-dirty works + p.cargo("package --workspace --no-verify --no-metadata --allow-dirty") + .with_stderr_data(str![[r#" +[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard) +[UPDATING] `dummy-registry` index +[PACKAGED] 5 files, [FILE_SIZE]B ([FILE_SIZE]B compressed) + +"#]]) + .run(); + + let cargo_toml = str![[r##" +... +[package] +edition = "2021" +... +[dependencies.dep] +version = "1" +... +"##]]; + + let f = File::open(&p.root().join("target/package/isengard-0.0.0.crate")).unwrap(); + validate_crate_contents( + f, + "isengard-0.0.0.crate", + &[ + ".cargo_vcs_info.json", + "Cargo.toml", + "Cargo.toml.orig", + "src/lib.rs", + "Cargo.lock", + ], + [("Cargo.toml", cargo_toml)], + ); +} + +#[cargo_test] +fn dirty_and_broken_workspace_manifest_with_inherited_fields() { + let (p, repo) = git::new_repo("foo", |p| { + p.file( + "Cargo.toml", + r#" + [workspace] + members = ["isengard"] + broken + "#, + ) + .file( + "isengard/Cargo.toml", + r#" + [package] + name = "isengard" + edition.workspace = true + "#, + ) + .file("isengard/src/lib.rs", "") + }); + git::commit(&repo); + + p.change_file( + "Cargo.toml", + r#" + [workspace] + members = ["isengard"] + resolver = "2" + [workspace.package] + edition = "2021" + "#, + ); + + p.cargo("package --workspace --no-verify --no-metadata") + .with_stderr_data(str![[r#" +[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard) +[PACKAGED] 5 files, [FILE_SIZE]B ([FILE_SIZE]B compressed) + +"#]]) + .run(); + + let cargo_toml = str![[r#" +... +[package] +edition = "2021" +... +"#]]; + + let f = File::open(&p.root().join("target/package/isengard-0.0.0.crate")).unwrap(); + validate_crate_contents( + f, + "isengard-0.0.0.crate", + &[ + ".cargo_vcs_info.json", + "Cargo.toml", + "Cargo.toml.orig", + "src/lib.rs", + "Cargo.lock", + ], + [("Cargo.toml", cargo_toml)], + ); +} + +#[cargo_test] +fn dirty_and_broken_workspace_manifest_without_inherited_fields() { + let (p, repo) = git::new_repo("foo", |p| { + p.file( + "Cargo.toml", + r#" + [workspace] + members = ["isengard"] + broken + "#, + ) + .file( + "isengard/Cargo.toml", + r#" + [package] + name = "isengard" + edition = "2021" + "#, + ) + .file("isengard/src/lib.rs", "") + }); + git::commit(&repo); + + p.change_file( + "Cargo.toml", + r#" + [workspace] + members = ["isengard"] + resolver = "2" + [workspace.package] + edition = "2024" + "#, + ); + + p.cargo("package --workspace --no-verify --no-metadata") + .with_stderr_data(str![[r#" +[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard) +[PACKAGED] 5 files, [FILE_SIZE]B ([FILE_SIZE]B compressed) + +"#]]) + .run(); + + let cargo_toml = str![[r#" +... +[package] +edition = "2021" +... +"#]]; + + let f = File::open(&p.root().join("target/package/isengard-0.0.0.crate")).unwrap(); + validate_crate_contents( + f, + "isengard-0.0.0.crate", + &[ + ".cargo_vcs_info.json", + "Cargo.toml", + "Cargo.toml.orig", + "src/lib.rs", + "Cargo.lock", + ], + [("Cargo.toml", cargo_toml)], + ); +} + #[cargo_test] fn issue_13695_allow_dirty_vcs_info() { let p = project() From 1a91bd3551c441161be35a25d9cdf40972ec78b1 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Tue, 21 Jan 2025 21:33:40 -0500 Subject: [PATCH 3/3] fix(package): detect dirty workspace manifest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This addresses one of the corner cases of VCS dirtiness in #14967. > Workspace inheritance — like changing `workspace.package.edition` > to `2021` would actually make member Cargo.toml dirty, > but the current dirty status check doesn't capture that. The solution here is * Retrieve workspace manifest from Git index. * Use the ws manifest from index to normalize package manifest. * Compare the difference between normalized tomls from Git index and from Git working directory. The implementation here is a bit ugly, as it exposes some internals functions to `pub(crate)`. The current implementation also has performance issues. When the workspace contains lots of members and has a dirty workspace manifest: * It adds one extra manifest parsing and normalization for checking each member. * Parsing part can be cached for the entire workspace. However normalization cannot be skipped. * It adds two TOML serializations for checking each member. * If we derive `Eq` for manifest types, we might be able to skip serializations and instead just compare them. --- src/cargo/ops/cargo_package/mod.rs | 2 +- src/cargo/ops/cargo_package/vcs.rs | 161 +++++++++++++++++++++++++++-- src/cargo/util/toml/mod.rs | 24 ++--- tests/testsuite/package.rs | 21 +++- 4 files changed, 185 insertions(+), 23 deletions(-) diff --git a/src/cargo/ops/cargo_package/mod.rs b/src/cargo/ops/cargo_package/mod.rs index 6c920eec4e3..161316f7bf6 100644 --- a/src/cargo/ops/cargo_package/mod.rs +++ b/src/cargo/ops/cargo_package/mod.rs @@ -387,7 +387,7 @@ fn prepare_archive( let src_files = src.list_files(pkg)?; // Check (git) repository state, getting the current commit hash. - let vcs_info = vcs::check_repo_state(pkg, &src_files, gctx, &opts)?; + let vcs_info = vcs::check_repo_state(pkg, &src_files, ws, &opts)?; build_ar_list(ws, pkg, src_files, vcs_info) } diff --git a/src/cargo/ops/cargo_package/vcs.rs b/src/cargo/ops/cargo_package/vcs.rs index 04d7a9d5afd..4aa84fa0667 100644 --- a/src/cargo/ops/cargo_package/vcs.rs +++ b/src/cargo/ops/cargo_package/vcs.rs @@ -6,10 +6,13 @@ use std::path::PathBuf; use anyhow::Context as _; use cargo_util::paths; +use cargo_util_schemas::manifest::TomlManifest; use serde::Serialize; use tracing::debug; use crate::core::Package; +use crate::core::Workspace; +use crate::core::WorkspaceRootConfig; use crate::sources::PathEntry; use crate::CargoResult; use crate::GlobalContext; @@ -44,9 +47,10 @@ pub struct GitVcsInfo { pub fn check_repo_state( p: &Package, src_files: &[PathEntry], - gctx: &GlobalContext, + ws: &Workspace<'_>, opts: &PackageOpts<'_>, ) -> CargoResult> { + let gctx = ws.gctx(); let Ok(repo) = git2::Repository::discover(p.root()) else { gctx.shell().verbose(|shell| { shell.warn(format!("no (git) VCS found for `{}`", p.root().display())) @@ -105,7 +109,7 @@ pub fn check_repo_state( .and_then(|p| p.to_str()) .unwrap_or("") .replace("\\", "/"); - let Some(git) = git(p, gctx, src_files, &repo, &opts)? else { + let Some(git) = git(p, ws, src_files, &repo, &opts)? else { // If the git repo lacks essensial field like `sha1`, and since this field exists from the beginning, // then don't generate the corresponding file in order to maintain consistency with past behavior. return Ok(None); @@ -163,11 +167,12 @@ fn warn_symlink_checked_out_as_plain_text_file( /// The real git status check starts from here. fn git( pkg: &Package, - gctx: &GlobalContext, + ws: &Workspace<'_>, src_files: &[PathEntry], repo: &git2::Repository, opts: &PackageOpts<'_>, ) -> CargoResult> { + let gctx = ws.gctx(); // This is a collection of any dirty or untracked files. This covers: // - new/modified/deleted/renamed/type change (index or worktree) // - untracked files (which are "new" worktree files) @@ -189,7 +194,7 @@ fn git( .iter() .filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path))) .map(|p| p.as_ref()) - .chain(dirty_files_outside_pkg_root(pkg, repo, src_files)?.iter()) + .chain(dirty_files_outside_pkg_root(ws, pkg, repo, src_files)?.iter()) .map(|path| { pathdiff::diff_paths(path, cwd) .as_ref() @@ -233,6 +238,7 @@ fn git( /// current package root, but still under the git workdir, affecting the /// final packaged `.crate` file. fn dirty_files_outside_pkg_root( + ws: &Workspace<'_>, pkg: &Package, repo: &git2::Repository, src_files: &[PathEntry], @@ -247,7 +253,7 @@ fn dirty_files_outside_pkg_root( .map(|path| paths::normalize_path(&pkg_root.join(path))) .collect(); - let mut dirty_symlinks = HashSet::new(); + let mut dirty_files = HashSet::new(); for rel_path in src_files .iter() .filter(|p| p.is_symlink_or_under_symlink()) @@ -259,10 +265,151 @@ fn dirty_files_outside_pkg_root( .filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok()) { if repo.status_file(&rel_path)? != git2::Status::CURRENT { - dirty_symlinks.insert(workdir.join(rel_path)); + dirty_files.insert(workdir.join(rel_path)); } } - Ok(dirty_symlinks) + + if let Some(dirty_ws_manifest) = dirty_workspace_manifest(ws, pkg, repo)? { + dirty_files.insert(dirty_ws_manifest); + } + Ok(dirty_files) +} + +fn dirty_workspace_manifest( + ws: &Workspace<'_>, + pkg: &Package, + repo: &git2::Repository, +) -> CargoResult> { + let workdir = repo.workdir().unwrap(); + let ws_manifest_path = ws.root_manifest(); + if pkg.manifest_path() == ws_manifest_path { + // The workspace manifest is also the primary package manifest. + // Normal file statuc check should have covered it. + return Ok(None); + } + if paths::strip_prefix_canonical(ws_manifest_path, pkg.root()).is_ok() { + // Inside package root. Don't bother checking git status. + return Ok(None); + } + let Ok(rel_path) = paths::strip_prefix_canonical(ws_manifest_path, workdir) else { + // Completely outside this git workdir. + return Ok(None); + }; + + // Outside package root but under git workdir. + if repo.status_file(&rel_path)? == git2::Status::CURRENT { + return Ok(None); + } + + let from_index = ws_manifest_and_root_config_from_index(ws, repo, &rel_path); + // If there is no workable workspace manifest in Git index, + // create a default inheritable fields. + // With it, we can detect any member manifest has inherited fields, + // and then the workspace manifest should be considered dirty. + let inheritable = if let Some(fields) = from_index + .as_ref() + .map(|(_, root_config)| root_config.inheritable()) + { + fields + } else { + &Default::default() + }; + + let empty = Vec::new(); + let cargo_features = crate::core::Features::new( + from_index + .as_ref() + .and_then(|(manifest, _)| manifest.cargo_features.as_ref()) + .unwrap_or(&empty), + ws.gctx(), + &mut Default::default(), + pkg.package_id().source_id().is_path(), + ) + .unwrap_or_default(); + + let dirty_path = || Ok(Some(workdir.join(&rel_path))); + let dirty = |msg| { + debug!( + "{msg} for `{}` of repo at `{}`", + rel_path.display(), + workdir.display(), + ); + dirty_path() + }; + + let Ok(normalized_toml) = crate::util::toml::normalize_toml( + pkg.manifest().original_toml(), + &cargo_features, + &|| Ok(inheritable), + pkg.manifest_path(), + ws.gctx(), + &mut Default::default(), + &mut Default::default(), + ) else { + return dirty("failed to normalize pkg manifest from index"); + }; + + let Ok(from_index) = toml::to_string_pretty(&normalized_toml) else { + return dirty("failed to serialize pkg manifest from index"); + }; + + let Ok(from_working_dir) = toml::to_string_pretty(pkg.manifest().normalized_toml()) else { + return dirty("failed to serialize pkg manifest from working directory"); + }; + + if from_index != from_working_dir { + tracing::trace!("--- from index ---\n{from_index}"); + tracing::trace!("--- from working dir ---\n{from_working_dir}"); + return dirty("normalized manifests from index and in working directory mismatched"); + } + + Ok(None) +} + +/// Gets workspace manifest and workspace root config from Git index. +/// +/// This returns an `Option` because workspace manifest might be broken or not +/// exist at all. +fn ws_manifest_and_root_config_from_index( + ws: &Workspace<'_>, + repo: &git2::Repository, + ws_manifest_rel_path: &Path, +) -> Option<(TomlManifest, WorkspaceRootConfig)> { + let workdir = repo.workdir().unwrap(); + let dirty = |msg| { + debug!( + "{msg} for `{}` of repo at `{}`", + ws_manifest_rel_path.display(), + workdir.display(), + ); + None + }; + let Ok(index) = repo.index() else { + debug!("no index for repo at `{}`", workdir.display()); + return None; + }; + let Some(entry) = index.get_path(ws_manifest_rel_path, 0) else { + return dirty("workspace manifest not found"); + }; + let Ok(blob) = repo.find_blob(entry.id) else { + return dirty("failed to find manifest blob"); + }; + let Ok(contents) = String::from_utf8(blob.content().to_vec()) else { + return dirty("failed parse as UTF-8 encoding"); + }; + let Ok(document) = crate::util::toml::parse_document(&contents) else { + return dirty("failed to parse file"); + }; + let Ok(ws_manifest_from_index) = crate::util::toml::deserialize_toml(&document) else { + return dirty("failed to deserialize doc"); + }; + let Some(toml_workspace) = ws_manifest_from_index.workspace.as_ref() else { + return dirty("not a workspace manifest"); + }; + + let ws_root_config = + crate::util::toml::to_workspace_root_config(toml_workspace, ws.root_manifest()); + Some((ws_manifest_from_index, ws_root_config)) } /// Helper to collect dirty statuses for a single repo. diff --git a/src/cargo/util/toml/mod.rs b/src/cargo/util/toml/mod.rs index 6ab5932d1ad..32b9e2c4cdd 100644 --- a/src/cargo/util/toml/mod.rs +++ b/src/cargo/util/toml/mod.rs @@ -84,10 +84,14 @@ pub fn read_manifest( .borrow_mut() .insert(package_root.to_owned(), ws_root_config.clone()); } + let inherit_cell = LazyCell::new(); + let inherit = || { + inherit_cell.try_borrow_with(|| load_inheritable_fields(gctx, path, &workspace_config)) + }; let normalized_toml = normalize_toml( &original_toml, &features, - &workspace_config, + &inherit, path, gctx, &mut warnings, @@ -158,12 +162,14 @@ fn read_toml_string(path: &Path, gctx: &GlobalContext) -> CargoResult { } #[tracing::instrument(skip_all)] -fn parse_document(contents: &str) -> Result, toml_edit::de::Error> { +pub(crate) fn parse_document( + contents: &str, +) -> Result, toml_edit::de::Error> { toml_edit::ImDocument::parse(contents.to_owned()).map_err(Into::into) } #[tracing::instrument(skip_all)] -fn deserialize_toml( +pub(crate) fn deserialize_toml( document: &toml_edit::ImDocument, ) -> Result { let mut unused = BTreeSet::new(); @@ -242,7 +248,7 @@ fn to_workspace_config( Ok(workspace_config) } -fn to_workspace_root_config( +pub(crate) fn to_workspace_root_config( normalized_toml: &manifest::TomlWorkspace, manifest_file: &Path, ) -> WorkspaceRootConfig { @@ -266,22 +272,16 @@ fn to_workspace_root_config( /// See [`Manifest::normalized_toml`] for more details #[tracing::instrument(skip_all)] -fn normalize_toml( +pub(crate) fn normalize_toml<'a>( original_toml: &manifest::TomlManifest, features: &Features, - workspace_config: &WorkspaceConfig, + inherit: &dyn Fn() -> CargoResult<&'a InheritableFields>, manifest_file: &Path, gctx: &GlobalContext, warnings: &mut Vec, errors: &mut Vec, ) -> CargoResult { let package_root = manifest_file.parent().unwrap(); - - let inherit_cell: LazyCell = LazyCell::new(); - let inherit = || { - inherit_cell - .try_borrow_with(|| load_inheritable_fields(gctx, manifest_file, &workspace_config)) - }; let workspace_root = || inherit().map(|fields| fields.ws_root().as_path()); let mut normalized_toml = manifest::TomlManifest { diff --git a/tests/testsuite/package.rs b/tests/testsuite/package.rs index 64b34a22539..2516488efa9 100644 --- a/tests/testsuite/package.rs +++ b/tests/testsuite/package.rs @@ -1493,10 +1493,13 @@ version = "1" ); p.cargo("package --workspace --no-verify --no-metadata") + .with_status(101) .with_stderr_data(str![[r#" -[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard) -[UPDATING] `dummy-registry` index -[PACKAGED] 5 files, [FILE_SIZE]B ([FILE_SIZE]B compressed) +[ERROR] 1 files in the working directory contain changes that were not yet committed into git: + +Cargo.toml + +to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag "#]]) .run(); @@ -1571,6 +1574,18 @@ fn dirty_and_broken_workspace_manifest_with_inherited_fields() { ); p.cargo("package --workspace --no-verify --no-metadata") + .with_status(101) + .with_stderr_data(str![[r#" +[ERROR] 1 files in the working directory contain changes that were not yet committed into git: + +Cargo.toml + +to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag + +"#]]) + .run(); + + p.cargo("package --workspace --no-verify --no-metadata --allow-dirty") .with_stderr_data(str![[r#" [PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard) [PACKAGED] 5 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)