Skip to content

Commit 0912e5f

Browse files
committed
Auto merge of rust-lang#123441 - saethlin:fixed-len-file-names, r=<try>
Stabilize the size of incr comp object file names The current implementation does not produce stable-length paths, and we create the paths in a way that makes our allocation behavior is nondeterministic. I think `@eddyb` fixed a number of other cases like this in the past, and this PR fixes another one. Whether that actually matters I have no idea, but we still have bimodal behavior in rustc-perf and the non-uniformity in `find` and `ls` was bothering me. I've also removed the truncation of the mangled CGU names. Before this PR incr comp paths look like this: ``` target/debug/incremental/scratch-38izrrq90cex7/s-gux6gz0ow8-1ph76gg-ewe1xj434l26w9up5bedsojpd/261xgo1oqnd90ry5.o ``` And after, they look like this: ``` target/debug/incremental/scratch-035omutqbfkbw/s-gux6borni0-16r3v1j-6n64tmwqzchtgqzwwim5amuga/55v2re42sztc8je9bva6g8ft3.o ``` On the one hand, I'm sure this will break some people's builds because they're on Windows and only a few bytes from the path length limit. But if we're that seriously worried about the length of our file names, I have some other ideas on how to make them smaller. And last time I deleted some hash truncations from the compiler, there was a huge drop in the number if incremental compilation ICEs that were reported: rust-lang#110367
2 parents b4acbe4 + a6397f0 commit 0912e5f

File tree

4 files changed

+74
-26
lines changed

4 files changed

+74
-26
lines changed

compiler/rustc_data_structures/src/base_n.rs

+50
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::iter;
12
/// Converts unsigned integers into a string representation with some base.
23
/// Bases up to and including 36 can be used for case-insensitive things.
34
use std::str;
@@ -42,3 +43,52 @@ pub fn encode(n: u128, base: usize) -> String {
4243
push_str(n, base, &mut s);
4344
s
4445
}
46+
47+
// This trait just lets us reserve the exact right amount of space when doing fixed-length
48+
// case-insensitve encoding. Add any impls you need.
49+
pub trait Base36Encodable: Copy + Into<u128> {
50+
fn encoded_len() -> usize;
51+
}
52+
53+
impl Base36Encodable for u128 {
54+
fn encoded_len() -> usize {
55+
25
56+
}
57+
}
58+
59+
impl Base36Encodable for u64 {
60+
fn encoded_len() -> usize {
61+
13
62+
}
63+
}
64+
65+
impl Base36Encodable for u32 {
66+
fn encoded_len() -> usize {
67+
7
68+
}
69+
}
70+
71+
pub fn push_case_insensitive<N: Base36Encodable>(n: N, output: &mut String) {
72+
// SAFETY: We will only append ASCII bytes.
73+
let output = unsafe { output.as_mut_vec() };
74+
75+
// Add encoded_len '0's to the end of the String, that's the area we're going to write to.
76+
let prev_len = output.len();
77+
output.extend(iter::repeat(b'0').take(N::encoded_len()));
78+
let output = &mut output[prev_len..];
79+
80+
let base = CASE_INSENSITIVE as u128;
81+
let mut n: u128 = n.into();
82+
83+
for out in output.iter_mut().rev() {
84+
*out = BASE_64[(n % base) as usize];
85+
n /= base;
86+
}
87+
assert_eq!(n, 0);
88+
}
89+
90+
pub fn case_insensitive<N: Base36Encodable>(n: N) -> String {
91+
let mut output = String::new();
92+
push_case_insensitive(n, &mut output);
93+
output
94+
}

compiler/rustc_data_structures/src/base_n/tests.rs

+8
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
11
use super::*;
22

3+
#[test]
4+
fn limits() {
5+
assert_eq!(Ok(u128::MAX), u128::from_str_radix(&case_insensitive(u128::MAX), 36));
6+
assert_eq!(Ok(u64::MAX), u64::from_str_radix(&case_insensitive(u64::MAX), 36));
7+
assert_eq!(Ok(u32::MAX), u32::from_str_radix(&case_insensitive(u32::MAX), 36));
8+
}
9+
310
#[test]
411
fn test_encode() {
512
fn test(n: u128, base: usize) {
613
assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32));
14+
assert_eq!(Ok(n), u128::from_str_radix(&case_insensitive(n), 36));
715
}
816

917
for base in 2..37 {

compiler/rustc_incremental/src/persist/fs.rs

+15-22
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@
104104
//! implemented.
105105
106106
use crate::errors;
107+
use rustc_data_structures::base_n::Base36Encodable;
107108
use rustc_data_structures::fx::{FxHashSet, FxIndexSet};
108109
use rustc_data_structures::svh::Svh;
109110
use rustc_data_structures::unord::{UnordMap, UnordSet};
@@ -329,28 +330,21 @@ pub fn finalize_session_directory(sess: &Session, svh: Option<Svh>) {
329330

330331
debug!("finalize_session_directory() - session directory: {}", incr_comp_session_dir.display());
331332

332-
let old_sub_dir_name = incr_comp_session_dir.file_name().unwrap().to_string_lossy();
333-
assert_no_characters_lost(&old_sub_dir_name);
333+
let mut sub_dir_name = incr_comp_session_dir.file_name().unwrap().to_string_lossy().to_string();
334+
assert_no_characters_lost(&sub_dir_name);
334335

335336
// Keep the 's-{timestamp}-{random-number}' prefix, but replace the
336337
// '-working' part with the SVH of the crate
337-
let dash_indices: Vec<_> = old_sub_dir_name.match_indices('-').map(|(idx, _)| idx).collect();
338-
if dash_indices.len() != 3 {
339-
bug!(
340-
"Encountered incremental compilation session directory with \
341-
malformed name: {}",
342-
incr_comp_session_dir.display()
343-
)
344-
}
345-
346-
// State: "s-{timestamp}-{random-number}-"
347-
let mut new_sub_dir_name = String::from(&old_sub_dir_name[..=dash_indices[2]]);
338+
// We want to keep this: "s-{timestamp}-{random-number}-"
339+
sub_dir_name.truncate(2 + (u64::encoded_len() - 3) + 1 + u32::encoded_len() + 1);
340+
assert!(sub_dir_name.ends_with('-'));
341+
assert!(sub_dir_name.as_bytes().iter().filter(|b| **b == b'-').count() == 3);
348342

349343
// Append the svh
350-
base_n::push_str(svh.as_u128(), INT_ENCODE_BASE, &mut new_sub_dir_name);
344+
base_n::push_case_insensitive(svh.as_u128(), &mut sub_dir_name);
351345

352346
// Create the full path
353-
let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name);
347+
let new_path = incr_comp_session_dir.parent().unwrap().join(&*sub_dir_name);
354348
debug!("finalize_session_directory() - new path: {}", new_path.display());
355349

356350
match rename_path_with_retry(&*incr_comp_session_dir, &new_path, 3) {
@@ -446,11 +440,10 @@ fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {
446440
let random_number = thread_rng().next_u32();
447441
debug!("generate_session_dir_path: random_number = {}", random_number);
448442

449-
let directory_name = format!(
450-
"s-{}-{}-working",
451-
timestamp,
452-
base_n::encode(random_number as u128, INT_ENCODE_BASE)
453-
);
443+
// Chop the first 3 characters off the timestamp. Those 3 bytes will be zero for a while.
444+
assert_eq!(&timestamp[..3], "000");
445+
let directory_name =
446+
format!("s-{}-{}-working", &timestamp[3..], base_n::case_insensitive(random_number));
454447
debug!("generate_session_dir_path: directory_name = {}", directory_name);
455448
let directory_path = crate_dir.join(directory_name);
456449
debug!("generate_session_dir_path: directory_path = {}", directory_path.display());
@@ -582,7 +575,7 @@ fn extract_timestamp_from_session_dir(directory_name: &str) -> Result<SystemTime
582575
fn timestamp_to_string(timestamp: SystemTime) -> String {
583576
let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
584577
let micros = duration.as_secs() * 1_000_000 + (duration.subsec_nanos() as u64) / 1000;
585-
base_n::encode(micros as u128, INT_ENCODE_BASE)
578+
base_n::case_insensitive(micros)
586579
}
587580

588581
fn string_to_timestamp(s: &str) -> Result<SystemTime, &'static str> {
@@ -613,7 +606,7 @@ fn crate_path(sess: &Session) -> PathBuf {
613606
sess.cfg_version,
614607
);
615608

616-
let stable_crate_id = base_n::encode(stable_crate_id.as_u64() as u128, INT_ENCODE_BASE);
609+
let stable_crate_id = base_n::case_insensitive(stable_crate_id.as_u64());
617610

618611
let crate_name = format!("{crate_name}-{stable_crate_id}");
619612
incr_dir.join(crate_name)

compiler/rustc_middle/src/mir/mono.rs

+1-4
Original file line numberDiff line numberDiff line change
@@ -335,13 +335,10 @@ impl<'tcx> CodegenUnit<'tcx> {
335335
}
336336

337337
pub fn mangle_name(human_readable_name: &str) -> String {
338-
// We generate a 80 bit hash from the name. This should be enough to
339-
// avoid collisions and is still reasonably short for filenames.
340338
let mut hasher = StableHasher::new();
341339
human_readable_name.hash(&mut hasher);
342340
let hash: Hash128 = hasher.finish();
343-
let hash = hash.as_u128() & ((1u128 << 80) - 1);
344-
base_n::encode(hash, base_n::CASE_INSENSITIVE)
341+
base_n::case_insensitive(hash.as_u128())
345342
}
346343

347344
pub fn compute_size_estimate(&mut self) {

0 commit comments

Comments
 (0)