Skip to content

Commit a6b7274

Browse files
committed
Auto merge of #102596 - scottmcm:option-bool-calloc, r=Mark-Simulacrum
Do the `calloc` optimization for `Option<bool>` Inspired by <https://old.reddit.com/r/rust/comments/xtiqj8/why_is_this_functional_version_faster_than_my_for/iqqy37b/>.
2 parents 0265a3e + 31cd0aa commit a6b7274

File tree

2 files changed

+40
-1
lines changed

2 files changed

+40
-1
lines changed

library/alloc/src/vec/is_zero.rs

+22
Original file line numberDiff line numberDiff line change
@@ -160,3 +160,25 @@ unsafe impl<T: IsZero> IsZero for Saturating<T> {
160160
self.0.is_zero()
161161
}
162162
}
163+
164+
macro_rules! impl_for_optional_bool {
165+
($($t:ty,)+) => {$(
166+
unsafe impl IsZero for $t {
167+
#[inline]
168+
fn is_zero(&self) -> bool {
169+
// SAFETY: This is *not* a stable layout guarantee, but
170+
// inside `core` we're allowed to rely on the current rustc
171+
// behaviour that options of bools will be one byte with
172+
// no padding, so long as they're nested less than 254 deep.
173+
let raw: u8 = unsafe { core::mem::transmute(*self) };
174+
raw == 0
175+
}
176+
}
177+
)+};
178+
}
179+
impl_for_optional_bool! {
180+
Option<bool>,
181+
Option<Option<bool>>,
182+
Option<Option<Option<bool>>>,
183+
// Could go further, but not worth the metadata overhead
184+
}

src/test/codegen/vec-calloc.rs

+18-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// compile-flags: -O
1+
// compile-flags: -O -Z merge-functions=disabled
22
// only-x86_64
33
// ignore-debug
44
// min-llvm-version: 15.0
@@ -144,6 +144,23 @@ pub fn vec_non_zero_tuple(n: usize) -> Vec<(i16, u8, char)> {
144144
vec![(0, 0, 'A'); n]
145145
}
146146

147+
// CHECK-LABEL: @vec_option_bool
148+
#[no_mangle]
149+
pub fn vec_option_bool(n: usize) -> Vec<Option<bool>> {
150+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
151+
// CHECK-NOT: call {{.*}}reserve
152+
// CHECK-NOT: call {{.*}}__rust_alloc(
153+
154+
// CHECK: call {{.*}}__rust_alloc_zeroed(
155+
156+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
157+
// CHECK-NOT: call {{.*}}reserve
158+
// CHECK-NOT: call {{.*}}__rust_alloc(
159+
160+
// CHECK: ret void
161+
vec![Some(false); n]
162+
}
163+
147164
// Ensure that __rust_alloc_zeroed gets the right attributes for LLVM to optimize it away.
148165
// CHECK: declare noalias ptr @__rust_alloc_zeroed(i64, i64 allocalign) unnamed_addr [[RUST_ALLOC_ZEROED_ATTRS:#[0-9]+]]
149166

0 commit comments

Comments
 (0)