Skip to content

Commit acf4bd7

Browse files
committed
Fix CI when codegen .so is clobbered
The workspace builds rustc_codegen_nvvm as a normal Cargo member. During every CUDA build script, cuda_builder re-builds that same crate through -Zcodegen-backend=. Cargo produces two identical librustc_codegen_nvvm.so. cuda_builder then tried to copy the backend .so into target/codegen-backends while rustc_codegen_nvvm was still being linked. It sometimes read a half- written file. Now cuda_builder still knows how to depend on rustc_codegen_nvvm, but that dependency lives behind a default feature. Inside the workspace we turn the default feature off so the backend is built exactly once as a regular workspace member. External users get the same old behaviour because the feature defaults to on. We also no longer blindly assume the .so already exists in cuda_builder. The build script is now more defensive.
1 parent 936a86d commit acf4bd7

File tree

9 files changed

+276
-29
lines changed

9 files changed

+276
-29
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,4 @@ opt-level = 3
3131

3232
[workspace.dependencies]
3333
cuda_std = { path = "crates/cuda_std" }
34-
cuda_builder = { path = "crates/cuda_builder" }
34+
cuda_builder = { path = "crates/cuda_builder", default-features = false }

crates/cuda_builder/Cargo.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,12 @@ description = "Builder for easily building rustc_codegen_nvvm crates"
88
repository = "https://github.com/Rust-GPU/rust-cuda"
99
readme = "../../README.md"
1010

11+
[features]
12+
default = ["codegen-backend"]
13+
codegen-backend = ["rustc_codegen_nvvm"]
14+
1115
[dependencies]
12-
rustc_codegen_nvvm = { version = "0.3", path = "../rustc_codegen_nvvm" }
16+
rustc_codegen_nvvm = { version = "0.3", path = "../rustc_codegen_nvvm", optional = true }
1317
nvvm = { path = "../nvvm", version = "0.1" }
1418
serde = { version = "1.0.217", features = ["derive"] }
1519
serde_json = "1.0.138"

crates/cuda_builder/src/lib.rs

Lines changed: 264 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,22 @@ pub use nvvm::*;
44
use serde::Deserialize;
55
use std::{
66
borrow::Borrow,
7-
env, fmt,
7+
env,
8+
ffi::OsStr,
9+
fmt, fs,
810
path::{Path, PathBuf},
911
process::{Command, Stdio},
12+
thread,
13+
time::{Duration, SystemTime},
1014
};
1115

1216
#[derive(Debug)]
1317
#[non_exhaustive]
1418
pub enum CudaBuilderError {
1519
CratePathDoesntExist(PathBuf),
1620
FailedToCopyPtxFile(std::io::Error),
21+
FailedToPrepareCodegenBackend(std::io::Error),
22+
CodegenBackendDidNotStabilize,
1723
BuildFailed,
1824
}
1925

@@ -27,6 +33,12 @@ impl fmt::Display for CudaBuilderError {
2733
CudaBuilderError::FailedToCopyPtxFile(err) => {
2834
f.write_str(&format!("Failed to copy PTX file: {err:?}"))
2935
}
36+
CudaBuilderError::FailedToPrepareCodegenBackend(err) => f.write_str(&format!(
37+
"Failed to prepare rustc_codegen_nvvm backend: {err:?}"
38+
)),
39+
CudaBuilderError::CodegenBackendDidNotStabilize => f.write_str(
40+
"Failed to prepare rustc_codegen_nvvm backend: artifact never stabilized",
41+
),
3042
}
3143
}
3244
}
@@ -402,7 +414,7 @@ fn dylib_path() -> Vec<PathBuf> {
402414
}
403415
}
404416

405-
fn find_rustc_codegen_nvvm() -> PathBuf {
417+
fn find_rustc_codegen_nvvm() -> Option<PathBuf> {
406418
let filename = format!(
407419
"{}rustc_codegen_nvvm{}",
408420
env::consts::DLL_PREFIX,
@@ -411,10 +423,52 @@ fn find_rustc_codegen_nvvm() -> PathBuf {
411423
for mut path in dylib_path() {
412424
path.push(&filename);
413425
if path.is_file() {
414-
return path;
426+
return Some(path);
415427
}
416428
}
417-
panic!("Could not find {filename} in library path");
429+
430+
let profile = env::var("PROFILE").unwrap_or_else(|_| "debug".into());
431+
let workspace_target_dir = workspace_target_dir();
432+
let mut fallback_dirs = vec![
433+
workspace_target_dir.join(&profile),
434+
workspace_target_dir.join(&profile).join("deps"),
435+
workspace_target_dir.join("cuda-builder"),
436+
workspace_target_dir.join("cuda-builder").join(&profile),
437+
workspace_target_dir
438+
.join("cuda-builder")
439+
.join(&profile)
440+
.join("deps"),
441+
];
442+
443+
if let Some(target) = env::var_os("TARGET") {
444+
let target_dir = workspace_target_dir.join(target);
445+
fallback_dirs.push(target_dir.join(&profile));
446+
fallback_dirs.push(target_dir.join(&profile).join("deps"));
447+
}
448+
449+
fallback_dirs.push(workspace_target_dir.join("codegen-backends"));
450+
fallback_dirs.push(
451+
workspace_target_dir
452+
.join("cuda-builder")
453+
.join("host")
454+
.join(&profile),
455+
);
456+
fallback_dirs.push(
457+
workspace_target_dir
458+
.join("cuda-builder")
459+
.join("host")
460+
.join(&profile)
461+
.join("deps"),
462+
);
463+
464+
for dir in fallback_dirs {
465+
let candidate = dir.join(&filename);
466+
if candidate.is_file() {
467+
return Some(candidate);
468+
}
469+
}
470+
471+
None
418472
}
419473

420474
/// Joins strings together while ensuring none of the strings contain the separator.
@@ -429,7 +483,14 @@ fn join_checking_for_separators(strings: Vec<impl Borrow<str>>, sep: &str) -> St
429483
fn invoke_rustc(builder: &CudaBuilder) -> Result<PathBuf, CudaBuilderError> {
430484
// see https://github.com/EmbarkStudios/rust-gpu/blob/main/crates/spirv-builder/src/lib.rs#L385-L392
431485
// on what this does
432-
let rustc_codegen_nvvm = find_rustc_codegen_nvvm();
486+
let workspace_target_dir = workspace_target_dir();
487+
let rustc_codegen_nvvm =
488+
prepare_rustc_codegen_nvvm(&workspace_target_dir).map_err(|err| match err {
489+
PrepareBackendError::Io(io_err) => {
490+
CudaBuilderError::FailedToPrepareCodegenBackend(io_err)
491+
}
492+
PrepareBackendError::DidNotStabilize => CudaBuilderError::CodegenBackendDidNotStabilize,
493+
})?;
433494

434495
let mut rustflags = vec![
435496
format!("-Zcodegen-backend={}", rustc_codegen_nvvm.display()),
@@ -519,32 +580,30 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result<PathBuf, CudaBuilderError> {
519580
// to avoid waiting on the same lock (which effectively dead-locks us).
520581
// This also helps with e.g. RLS, which uses `--target target/rls`,
521582
// so we'll have a separate `target/rls/cuda-builder` for it.
522-
if let (Ok(profile), Some(mut dir)) = (
523-
env::var("PROFILE"),
524-
env::var_os("OUT_DIR").map(PathBuf::from),
525-
) {
526-
// Strip `$profile/build/*/out`.
527-
if dir.ends_with("out")
528-
&& dir.pop()
529-
&& dir.pop()
530-
&& dir.ends_with("build")
531-
&& dir.pop()
532-
&& dir.ends_with(profile)
533-
&& dir.pop()
534-
{
535-
cargo.arg("--target-dir").arg(dir.join("cuda-builder"));
536-
}
537-
}
583+
cargo
584+
.arg("--target-dir")
585+
.arg(workspace_target_dir.join("cuda-builder"));
538586

539587
let arch = format!("{:?}0", builder.arch);
540588
cargo.env("CUDA_ARCH", arch.strip_prefix("Compute").unwrap());
541589

542590
let cargo_encoded_rustflags = join_checking_for_separators(rustflags, "\x1f");
543591

592+
let mut dylib_paths = dylib_path();
593+
if let Some(parent) = rustc_codegen_nvvm.parent() {
594+
if !dylib_paths.iter().any(|path| path == parent) {
595+
dylib_paths.insert(0, parent.to_path_buf());
596+
}
597+
}
598+
544599
let build = cargo
545600
.stderr(Stdio::inherit())
546601
.current_dir(&builder.path_to_crate)
547602
.env("CARGO_ENCODED_RUSTFLAGS", cargo_encoded_rustflags)
603+
.env(
604+
dylib_path_envvar(),
605+
env::join_paths(dylib_paths).expect("failed to join library paths"),
606+
)
548607
.output()
549608
.expect("failed to execute cargo build");
550609

@@ -560,6 +619,190 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result<PathBuf, CudaBuilderError> {
560619
}
561620
}
562621

622+
fn workspace_target_dir() -> PathBuf {
623+
if let Some(path) = env::var_os("CARGO_TARGET_DIR") {
624+
return PathBuf::from(path);
625+
}
626+
627+
if let Some(out_dir) = env::var_os("OUT_DIR") {
628+
let mut dir = PathBuf::from(out_dir);
629+
while dir.file_name().is_some() && dir.file_name() != Some(OsStr::new("target")) {
630+
if !dir.pop() {
631+
break;
632+
}
633+
}
634+
if dir.file_name() == Some(OsStr::new("target")) {
635+
return dir;
636+
}
637+
}
638+
639+
env::var_os("CARGO_MANIFEST_DIR")
640+
.map(PathBuf::from)
641+
.map(|path| path.join("target"))
642+
.unwrap_or_else(|| PathBuf::from("target"))
643+
}
644+
645+
enum PrepareBackendError {
646+
Io(std::io::Error),
647+
DidNotStabilize,
648+
}
649+
650+
fn wait_for_backend_path() -> Result<PathBuf, PrepareBackendError> {
651+
const MAX_ATTEMPTS: usize = 600;
652+
let sleep_duration = Duration::from_millis(100);
653+
654+
for _ in 0..MAX_ATTEMPTS {
655+
if let Some(path) = find_rustc_codegen_nvvm() {
656+
return Ok(path);
657+
}
658+
thread::sleep(sleep_duration);
659+
}
660+
661+
Err(PrepareBackendError::DidNotStabilize)
662+
}
663+
664+
fn wait_for_stable_backend(
665+
source: &Path,
666+
) -> Result<(u64, Option<SystemTime>), PrepareBackendError> {
667+
const MAX_ATTEMPTS: usize = 600;
668+
let sleep_duration = Duration::from_millis(100);
669+
670+
for _ in 0..MAX_ATTEMPTS {
671+
let first = match fs::metadata(source) {
672+
Ok(metadata) => metadata,
673+
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
674+
thread::sleep(sleep_duration);
675+
continue;
676+
}
677+
Err(err) => return Err(PrepareBackendError::Io(err)),
678+
};
679+
680+
if first.len() == 0 {
681+
thread::sleep(sleep_duration);
682+
continue;
683+
}
684+
685+
let first_modified = first.modified().ok();
686+
687+
thread::sleep(sleep_duration);
688+
689+
let second = match fs::metadata(source) {
690+
Ok(metadata) => metadata,
691+
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
692+
thread::sleep(sleep_duration);
693+
continue;
694+
}
695+
Err(err) => return Err(PrepareBackendError::Io(err)),
696+
};
697+
698+
if second.len() == 0 {
699+
thread::sleep(sleep_duration);
700+
continue;
701+
}
702+
703+
let second_modified = second.modified().ok();
704+
705+
if second.len() == first.len() && second_modified == first_modified {
706+
return Ok((second.len(), second_modified));
707+
}
708+
}
709+
710+
Err(PrepareBackendError::DidNotStabilize)
711+
}
712+
713+
fn prepare_rustc_codegen_nvvm(target_dir: &Path) -> Result<PathBuf, PrepareBackendError> {
714+
let source = wait_for_backend_path()?;
715+
716+
let destination_dir = target_dir.join("codegen-backends");
717+
fs::create_dir_all(&destination_dir).map_err(PrepareBackendError::Io)?;
718+
let filename = source
719+
.file_name()
720+
.expect("rustc_codegen_nvvm backend should have a filename")
721+
.to_owned();
722+
let destination = destination_dir.join(&filename);
723+
724+
const MAX_COPY_ATTEMPTS: usize = 8;
725+
let sleep_duration = Duration::from_millis(100);
726+
727+
for attempt in 0..MAX_COPY_ATTEMPTS {
728+
let (expected_size, expected_modified) = wait_for_stable_backend(&source)?;
729+
730+
let temp_path = destination_dir.join(format!(
731+
"{}.{}-{}.tmp",
732+
filename.to_string_lossy(),
733+
std::process::id(),
734+
attempt
735+
));
736+
737+
let _ = fs::remove_file(&temp_path);
738+
739+
fs::copy(&source, &temp_path).map_err(PrepareBackendError::Io)?;
740+
741+
let temp_size = match fs::metadata(&temp_path) {
742+
Ok(metadata) => metadata.len(),
743+
Err(err) => {
744+
let _ = fs::remove_file(&temp_path);
745+
return Err(PrepareBackendError::Io(err));
746+
}
747+
};
748+
749+
if temp_size != expected_size {
750+
let _ = fs::remove_file(&temp_path);
751+
thread::sleep(sleep_duration);
752+
continue;
753+
}
754+
755+
let current_source = match fs::metadata(&source) {
756+
Ok(metadata) => metadata,
757+
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
758+
let _ = fs::remove_file(&temp_path);
759+
thread::sleep(sleep_duration);
760+
continue;
761+
}
762+
Err(err) => {
763+
let _ = fs::remove_file(&temp_path);
764+
return Err(PrepareBackendError::Io(err));
765+
}
766+
};
767+
768+
if current_source.len() != expected_size
769+
|| current_source.modified().ok() != expected_modified
770+
{
771+
let _ = fs::remove_file(&temp_path);
772+
thread::sleep(sleep_duration);
773+
continue;
774+
}
775+
776+
if let Err(err) = fs::remove_file(&destination) {
777+
if err.kind() != std::io::ErrorKind::NotFound {
778+
let _ = fs::remove_file(&temp_path);
779+
return Err(PrepareBackendError::Io(err));
780+
}
781+
}
782+
783+
match fs::rename(&temp_path, &destination) {
784+
Ok(()) => return Ok(destination),
785+
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
786+
// Destination directory was removed after we created it; try again.
787+
let _ = fs::remove_file(&temp_path);
788+
fs::create_dir_all(&destination_dir).map_err(PrepareBackendError::Io)?;
789+
thread::sleep(sleep_duration);
790+
}
791+
Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {
792+
let _ = fs::remove_file(&destination);
793+
fs::rename(&temp_path, &destination).map_err(PrepareBackendError::Io)?;
794+
return Ok(destination);
795+
}
796+
Err(err) => {
797+
let _ = fs::remove_file(&temp_path);
798+
return Err(PrepareBackendError::Io(err));
799+
}
800+
}
801+
}
802+
803+
Err(PrepareBackendError::DidNotStabilize)
804+
}
805+
563806
#[derive(Deserialize)]
564807
struct RustcOutput {
565808
reason: String,

crates/optix/examples/ex02_pipeline/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ anyhow = "1.0.44"
1313
device = { path = "./device" }
1414

1515
[build-dependencies]
16-
cuda_builder = { version = "0.3", path = "../../../cuda_builder" }
16+
cuda_builder = { workspace = true }

crates/optix/examples/ex04_mesh/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@ num-traits = "0.2.14"
1515
glam = { version = "0.30", features = ["cuda"] }
1616

1717
[build-dependencies]
18-
cuda_builder = { version = "0.3", path = "../../../cuda_builder" }
18+
cuda_builder = { workspace = true }

examples/cuda/gemm/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ ndarray-rand = "0.15.0"
1313
rand = "0.9"
1414

1515
[build-dependencies]
16-
cuda_builder = { path = "../../../crates/cuda_builder" }
16+
cuda_builder = { workspace = true }

examples/cuda/path_tracer/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,4 @@ sysinfo = "0.33.1"
2323
anyhow = "1.0.53"
2424

2525
[build-dependencies]
26-
cuda_builder = { version = "0.3", path = "../../../crates/cuda_builder" }
26+
cuda_builder = { workspace = true }

0 commit comments

Comments
 (0)