Skip to content

Commit 6f4a901

Browse files
committed
Stabilize the size of incr comp object file names
1 parent ff24ef9 commit 6f4a901

File tree

9 files changed

+142
-70
lines changed

9 files changed

+142
-70
lines changed

compiler/rustc_codegen_gcc/src/context.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ use gccjit::{
66
use rustc_codegen_ssa::base::wants_msvc_seh;
77
use rustc_codegen_ssa::errors as ssa_errors;
88
use rustc_codegen_ssa::traits::{BackendTypes, BaseTypeMethods, MiscMethods};
9-
use rustc_data_structures::base_n;
9+
use rustc_data_structures::base_n::ToBaseN;
10+
use rustc_data_structures::base_n::ALPHANUMERIC_ONLY;
1011
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
1112
use rustc_middle::mir::mono::CodegenUnit;
1213
use rustc_middle::span_bug;
@@ -615,7 +616,7 @@ impl<'b, 'tcx> CodegenCx<'b, 'tcx> {
615616
let mut name = String::with_capacity(prefix.len() + 6);
616617
name.push_str(prefix);
617618
name.push_str(".");
618-
base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name);
619+
name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY));
619620
name
620621
}
621622
}

compiler/rustc_codegen_llvm/src/context.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ use crate::value::Value;
1111
use rustc_codegen_ssa::base::{wants_msvc_seh, wants_wasm_eh};
1212
use rustc_codegen_ssa::errors as ssa_errors;
1313
use rustc_codegen_ssa::traits::*;
14-
use rustc_data_structures::base_n;
14+
use rustc_data_structures::base_n::ToBaseN;
15+
use rustc_data_structures::base_n::ALPHANUMERIC_ONLY;
1516
use rustc_data_structures::fx::FxHashMap;
1617
use rustc_data_structures::small_c_str::SmallCStr;
1718
use rustc_hir::def_id::DefId;
@@ -999,7 +1000,7 @@ impl CodegenCx<'_, '_> {
9991000
let mut name = String::with_capacity(prefix.len() + 6);
10001001
name.push_str(prefix);
10011002
name.push('.');
1002-
base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name);
1003+
name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY));
10031004
name
10041005
}
10051006
}
+90-24
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/// Converts unsigned integers into a string representation with some base.
22
/// Bases up to and including 36 can be used for case-insensitive things.
3-
use std::str;
3+
use std::ascii;
4+
use std::fmt;
45

56
#[cfg(test)]
67
mod tests;
@@ -9,36 +10,101 @@ pub const MAX_BASE: usize = 64;
910
pub const ALPHANUMERIC_ONLY: usize = 62;
1011
pub const CASE_INSENSITIVE: usize = 36;
1112

12-
const BASE_64: &[u8; MAX_BASE] =
13-
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
13+
const BASE_64: [ascii::Char; MAX_BASE] = {
14+
let bytes = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
15+
let Some(ascii) = bytes.as_ascii() else { panic!() };
16+
*ascii
17+
};
1418

15-
#[inline]
16-
pub fn push_str(mut n: u128, base: usize, output: &mut String) {
17-
debug_assert!(base >= 2 && base <= MAX_BASE);
18-
let mut s = [0u8; 128];
19-
let mut index = s.len();
19+
pub struct BaseNString {
20+
start: usize,
21+
buf: [ascii::Char; 128],
22+
}
23+
24+
impl std::ops::Deref for BaseNString {
25+
type Target = str;
2026

21-
let base = base as u128;
27+
fn deref(&self) -> &str {
28+
self.buf[self.start..].as_str()
29+
}
30+
}
31+
32+
impl AsRef<str> for BaseNString {
33+
fn as_ref(&self) -> &str {
34+
self.buf[self.start..].as_str()
35+
}
36+
}
37+
38+
impl fmt::Display for BaseNString {
39+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40+
f.write_str(self)
41+
}
42+
}
43+
44+
// This trait just lets us reserve the exact right amount of space when doing fixed-length
45+
// case-insensitve encoding. Add any impls you need.
46+
pub trait ToBaseN: Into<u128> {
47+
fn encoded_len(base: usize) -> usize;
48+
49+
fn to_base_fixed_len(self, base: usize) -> BaseNString {
50+
let mut encoded = self.to_base(base);
51+
encoded.start = encoded.buf.len() - Self::encoded_len(base);
52+
encoded
53+
}
2254

23-
loop {
24-
index -= 1;
25-
s[index] = BASE_64[(n % base) as usize];
26-
n /= base;
55+
fn to_base(self, base: usize) -> BaseNString {
56+
let mut output = [ascii::Char::Digit0; 128];
2757

28-
if n == 0 {
29-
break;
58+
let mut n: u128 = self.into();
59+
60+
let mut index = output.len();
61+
loop {
62+
index -= 1;
63+
output[index] = BASE_64[(n % base as u128) as usize];
64+
n /= base as u128;
65+
66+
if n == 0 {
67+
break;
68+
}
69+
}
70+
assert_eq!(n, 0);
71+
72+
BaseNString { start: index, buf: output }
73+
}
74+
}
75+
76+
impl ToBaseN for u128 {
77+
fn encoded_len(base: usize) -> usize {
78+
let mut max = u128::MAX;
79+
let mut len = 0;
80+
while max > 0 {
81+
len += 1;
82+
max /= base as u128;
3083
}
84+
len
3185
}
86+
}
3287

33-
output.push_str(unsafe {
34-
// SAFETY: `s` is populated using only valid utf8 characters from `BASE_64`
35-
str::from_utf8_unchecked(&s[index..])
36-
});
88+
impl ToBaseN for u64 {
89+
fn encoded_len(base: usize) -> usize {
90+
let mut max = u64::MAX;
91+
let mut len = 0;
92+
while max > 0 {
93+
len += 1;
94+
max /= base as u64;
95+
}
96+
len
97+
}
3798
}
3899

39-
#[inline]
40-
pub fn encode(n: u128, base: usize) -> String {
41-
let mut s = String::new();
42-
push_str(n, base, &mut s);
43-
s
100+
impl ToBaseN for u32 {
101+
fn encoded_len(base: usize) -> usize {
102+
let mut max = u32::MAX;
103+
let mut len = 0;
104+
while max > 0 {
105+
len += 1;
106+
max /= base as u32;
107+
}
108+
len
109+
}
44110
}

compiler/rustc_data_structures/src/base_n/tests.rs

+10-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
11
use super::*;
22

33
#[test]
4-
fn test_encode() {
4+
fn limits() {
5+
assert_eq!(Ok(u128::MAX), u128::from_str_radix(&u128::MAX.to_base(36), 36));
6+
assert_eq!(Ok(u64::MAX), u64::from_str_radix(&u64::MAX.to_base(36), 36));
7+
assert_eq!(Ok(u32::MAX), u32::from_str_radix(&u32::MAX.to_base(36), 36));
8+
}
9+
10+
#[test]
11+
fn test_to_base() {
512
fn test(n: u128, base: usize) {
6-
assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32));
13+
assert_eq!(Ok(n), u128::from_str_radix(&n.to_base(base), base as u32));
14+
assert_eq!(Ok(n), u128::from_str_radix(&n.to_base_fixed_len(base), base as u32));
715
}
816

917
for base in 2..37 {

compiler/rustc_data_structures/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#![doc(rust_logo)]
1717
#![feature(allocator_api)]
1818
#![feature(array_windows)]
19+
#![feature(ascii_char)]
20+
#![feature(ascii_char_variants)]
1921
#![feature(auto_traits)]
2022
#![feature(cfg_match)]
2123
#![feature(core_intrinsics)]

compiler/rustc_incremental/src/persist/fs.rs

+24-29
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,14 @@
104104
//! implemented.
105105
106106
use crate::errors;
107+
use rustc_data_structures::base_n;
108+
use rustc_data_structures::base_n::BaseNString;
109+
use rustc_data_structures::base_n::ToBaseN;
110+
use rustc_data_structures::base_n::CASE_INSENSITIVE;
111+
use rustc_data_structures::flock;
107112
use rustc_data_structures::fx::{FxHashSet, FxIndexSet};
108113
use rustc_data_structures::svh::Svh;
109114
use rustc_data_structures::unord::{UnordMap, UnordSet};
110-
use rustc_data_structures::{base_n, flock};
111115
use rustc_errors::ErrorGuaranteed;
112116
use rustc_fs_util::{link_or_copy, try_canonicalize, LinkOrCopy};
113117
use rustc_session::config::CrateType;
@@ -329,28 +333,20 @@ pub fn finalize_session_directory(sess: &Session, svh: Option<Svh>) {
329333

330334
debug!("finalize_session_directory() - session directory: {}", incr_comp_session_dir.display());
331335

332-
let old_sub_dir_name = incr_comp_session_dir.file_name().unwrap().to_string_lossy();
333-
assert_no_characters_lost(&old_sub_dir_name);
336+
let mut sub_dir_name = incr_comp_session_dir.file_name().unwrap().to_string_lossy().to_string();
337+
assert_no_characters_lost(&sub_dir_name);
334338

335-
// Keep the 's-{timestamp}-{random-number}' prefix, but replace the
336-
// '-working' part with the SVH of the crate
337-
let dash_indices: Vec<_> = old_sub_dir_name.match_indices('-').map(|(idx, _)| idx).collect();
338-
if dash_indices.len() != 3 {
339-
bug!(
340-
"Encountered incremental compilation session directory with \
341-
malformed name: {}",
342-
incr_comp_session_dir.display()
343-
)
344-
}
345-
346-
// State: "s-{timestamp}-{random-number}-"
347-
let mut new_sub_dir_name = String::from(&old_sub_dir_name[..=dash_indices[2]]);
339+
// Keep the 's-{timestamp}-{random-number}' prefix, but replace "working" with the SVH of the crate
340+
sub_dir_name.truncate(sub_dir_name.len() - "working".len());
341+
// Double-check that we kept this: "s-{timestamp}-{random-number}-"
342+
assert!(sub_dir_name.ends_with('-'), "{:?}", sub_dir_name);
343+
assert!(sub_dir_name.as_bytes().iter().filter(|b| **b == b'-').count() == 3);
348344

349-
// Append the svh
350-
base_n::push_str(svh.as_u128(), INT_ENCODE_BASE, &mut new_sub_dir_name);
345+
// Append the SVH
346+
sub_dir_name.push_str(&svh.as_u128().to_base_fixed_len(CASE_INSENSITIVE));
351347

352348
// Create the full path
353-
let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name);
349+
let new_path = incr_comp_session_dir.parent().unwrap().join(&*sub_dir_name);
354350
debug!("finalize_session_directory() - new path: {}", new_path.display());
355351

356352
match rename_path_with_retry(&*incr_comp_session_dir, &new_path, 3) {
@@ -446,11 +442,11 @@ fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {
446442
let random_number = thread_rng().next_u32();
447443
debug!("generate_session_dir_path: random_number = {}", random_number);
448444

449-
let directory_name = format!(
450-
"s-{}-{}-working",
451-
timestamp,
452-
base_n::encode(random_number as u128, INT_ENCODE_BASE)
453-
);
445+
// Chop the first 3 characters off the timestamp. Those 3 bytes will be zero for a while.
446+
let (zeroes, timestamp) = timestamp.split_at(3);
447+
assert_eq!(zeroes, "000");
448+
let directory_name =
449+
format!("s-{}-{}-working", timestamp, random_number.to_base_fixed_len(CASE_INSENSITIVE));
454450
debug!("generate_session_dir_path: directory_name = {}", directory_name);
455451
let directory_path = crate_dir.join(directory_name);
456452
debug!("generate_session_dir_path: directory_path = {}", directory_path.display());
@@ -579,10 +575,10 @@ fn extract_timestamp_from_session_dir(directory_name: &str) -> Result<SystemTime
579575
string_to_timestamp(&directory_name[dash_indices[0] + 1..dash_indices[1]])
580576
}
581577

582-
fn timestamp_to_string(timestamp: SystemTime) -> String {
578+
fn timestamp_to_string(timestamp: SystemTime) -> BaseNString {
583579
let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
584580
let micros = duration.as_secs() * 1_000_000 + (duration.subsec_nanos() as u64) / 1000;
585-
base_n::encode(micros as u128, INT_ENCODE_BASE)
581+
micros.to_base_fixed_len(CASE_INSENSITIVE)
586582
}
587583

588584
fn string_to_timestamp(s: &str) -> Result<SystemTime, &'static str> {
@@ -613,9 +609,8 @@ fn crate_path(sess: &Session) -> PathBuf {
613609
sess.cfg_version,
614610
);
615611

616-
let stable_crate_id = base_n::encode(stable_crate_id.as_u64() as u128, INT_ENCODE_BASE);
617-
618-
let crate_name = format!("{crate_name}-{stable_crate_id}");
612+
let crate_name =
613+
format!("{crate_name}-{}", stable_crate_id.as_u64().to_base_fixed_len(CASE_INSENSITIVE));
619614
incr_dir.join(crate_name)
620615
}
621616

compiler/rustc_middle/src/mir/mono.rs

+5-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
use crate::dep_graph::{DepNode, WorkProduct, WorkProductId};
22
use crate::ty::{GenericArgs, Instance, InstanceDef, SymbolName, TyCtxt};
33
use rustc_attr::InlineAttr;
4-
use rustc_data_structures::base_n;
4+
use rustc_data_structures::base_n::BaseNString;
5+
use rustc_data_structures::base_n::ToBaseN;
6+
use rustc_data_structures::base_n::CASE_INSENSITIVE;
57
use rustc_data_structures::fingerprint::Fingerprint;
68
use rustc_data_structures::fx::FxHashMap;
79
use rustc_data_structures::fx::FxIndexMap;
@@ -334,14 +336,11 @@ impl<'tcx> CodegenUnit<'tcx> {
334336
self.is_code_coverage_dead_code_cgu = true;
335337
}
336338

337-
pub fn mangle_name(human_readable_name: &str) -> String {
338-
// We generate a 80 bit hash from the name. This should be enough to
339-
// avoid collisions and is still reasonably short for filenames.
339+
pub fn mangle_name(human_readable_name: &str) -> BaseNString {
340340
let mut hasher = StableHasher::new();
341341
human_readable_name.hash(&mut hasher);
342342
let hash: Hash128 = hasher.finish();
343-
let hash = hash.as_u128() & ((1u128 << 80) - 1);
344-
base_n::encode(hash, base_n::CASE_INSENSITIVE)
343+
hash.as_u128().to_base_fixed_len(CASE_INSENSITIVE)
345344
}
346345

347346
pub fn compute_size_estimate(&mut self) {

compiler/rustc_sanitizers/src/cfi/typeid/itanium_cxx_abi/encode.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
//!
55
//! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler,
66
//! see design document in the tracking issue #89653.
7-
use rustc_data_structures::base_n;
7+
use rustc_data_structures::base_n::ToBaseN;
88
use rustc_data_structures::fx::FxHashMap;
99
use rustc_hir as hir;
1010
use rustc_middle::bug;
@@ -736,7 +736,7 @@ fn encode_ty_name(tcx: TyCtxt<'_>, def_id: DefId) -> String {
736736
/// <https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html>).
737737
fn to_disambiguator(num: u64) -> String {
738738
if let Some(num) = num.checked_sub(1) {
739-
format!("s{}_", base_n::encode(num as u128, 62))
739+
format!("s{}_", num.to_base(62))
740740
} else {
741741
"s_".to_string()
742742
}
@@ -746,7 +746,7 @@ fn to_disambiguator(num: u64) -> String {
746746
/// <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangle.seq-id>).
747747
fn to_seq_id(num: usize) -> String {
748748
if let Some(num) = num.checked_sub(1) {
749-
base_n::encode(num as u128, 36).to_uppercase()
749+
(num as u64).to_base(36).to_uppercase()
750750
} else {
751751
"".to_string()
752752
}

compiler/rustc_symbol_mangling/src/v0.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use rustc_data_structures::base_n;
1+
use rustc_data_structures::base_n::ToBaseN;
22
use rustc_data_structures::fx::FxHashMap;
33
use rustc_data_structures::intern::Interned;
44
use rustc_hir as hir;
@@ -831,7 +831,7 @@ impl<'tcx> Printer<'tcx> for SymbolMangler<'tcx> {
831831
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
832832
pub(crate) fn push_integer_62(x: u64, output: &mut String) {
833833
if let Some(x) = x.checked_sub(1) {
834-
base_n::push_str(x as u128, 62, output);
834+
output.push_str(&x.to_base(62));
835835
}
836836
output.push('_');
837837
}

0 commit comments

Comments
 (0)