Skip to content

Commit 67a8bdd

Browse files
committed
use the stfle instruction rather than the auxvec
the `vector_packed_decimal` facility does not have a bit in the auxvec from what I can tell, also the code is just much shorter, so a win all around
1 parent 7954c67 commit 67a8bdd

File tree

1 file changed

+43
-107
lines changed
  • crates/std_detect/src/detect/os/linux

1 file changed

+43
-107
lines changed

crates/std_detect/src/detect/os/linux/s390x.rs

+43-107
Original file line numberDiff line numberDiff line change
@@ -1,82 +1,37 @@
11
//! Run-time feature detection for s390x on Linux.
22
3-
use super::auxvec;
4-
use crate::detect::{Feature, bit, cache};
3+
use crate::detect::{Feature, cache};
54

65
/// Try to read the features from the auxiliary vector
76
pub(crate) fn detect_features() -> cache::Initializer {
8-
if let Ok(auxv) = auxvec::auxv() {
9-
let hwcap: AtHwcap = auxv.into();
10-
return hwcap.cache();
11-
}
12-
13-
cache::Initializer::default()
14-
}
15-
16-
/// These values are part of the platform-specific [asm/elf.h][kernel], and are a selection of the
17-
/// fields found in the [Facility Indications].
18-
///
19-
/// [Facility Indications]: https://www.ibm.com/support/pages/sites/default/files/2021-05/SA22-7871-10.pdf#page=63
20-
/// [kernel]: https://github.com/torvalds/linux/blob/b62cef9a5c673f1b8083159f5dc03c1c5daced2f/arch/s390/include/asm/elf.h#L129
21-
#[derive(Debug, Default, PartialEq)]
22-
struct AtHwcap {
23-
esan3: bool,
24-
zarch: bool,
25-
stfle: bool,
26-
msa: bool,
27-
ldisp: bool,
28-
eimm: bool,
29-
dfp: bool,
30-
hpage: bool,
31-
etf3eh: bool,
32-
high_gprs: bool,
33-
te: bool,
34-
vxrs: bool,
35-
vxrs_bcd: bool,
36-
vxrs_ext: bool,
37-
gs: bool,
38-
vxrs_ext2: bool,
39-
vxrs_pde: bool,
40-
sort: bool,
41-
dflt: bool,
42-
vxrs_pde2: bool,
43-
nnpa: bool,
44-
pci_mio: bool,
45-
sie: bool,
7+
ExtendedFacilityList::new().cache()
468
}
479

48-
impl From<auxvec::AuxVec> for AtHwcap {
49-
/// Reads AtHwcap from the auxiliary vector.
50-
fn from(auxv: auxvec::AuxVec) -> Self {
51-
AtHwcap {
52-
esan3: bit::test(auxv.hwcap, 0),
53-
zarch: bit::test(auxv.hwcap, 1),
54-
stfle: bit::test(auxv.hwcap, 2),
55-
msa: bit::test(auxv.hwcap, 3),
56-
ldisp: bit::test(auxv.hwcap, 4),
57-
eimm: bit::test(auxv.hwcap, 5),
58-
dfp: bit::test(auxv.hwcap, 6),
59-
hpage: bit::test(auxv.hwcap, 7),
60-
etf3eh: bit::test(auxv.hwcap, 8),
61-
high_gprs: bit::test(auxv.hwcap, 9),
62-
te: bit::test(auxv.hwcap, 10),
63-
vxrs: bit::test(auxv.hwcap, 11),
64-
vxrs_bcd: bit::test(auxv.hwcap, 12),
65-
vxrs_ext: bit::test(auxv.hwcap, 13),
66-
gs: bit::test(auxv.hwcap, 14),
67-
vxrs_ext2: bit::test(auxv.hwcap, 15),
68-
vxrs_pde: bit::test(auxv.hwcap, 16),
69-
sort: bit::test(auxv.hwcap, 17),
70-
dflt: bit::test(auxv.hwcap, 18),
71-
vxrs_pde2: bit::test(auxv.hwcap, 19),
72-
nnpa: bit::test(auxv.hwcap, 20),
73-
pci_mio: bit::test(auxv.hwcap, 21),
74-
sie: bit::test(auxv.hwcap, 22),
10+
struct ExtendedFacilityList([u64; 4]);
11+
12+
impl ExtendedFacilityList {
13+
fn new() -> Self {
14+
let mut result: [u64; 4] = [0; 4];
15+
// SAFETY: the s390x-unknown-linux-{gnu, musl} targets assume a minimum architecture level of z10,
16+
// which guarantees support for the the stfle instruction.
17+
unsafe {
18+
core::arch::asm!(
19+
"lgr %r0, {0}",
20+
// equivalently ".insn s, 0xb2b00000, 0({1})",
21+
"stfle 0({1})",
22+
in(reg) result.len() as u64 - 1,
23+
in(reg_addr) result.as_mut_ptr() ,
24+
options(nostack, preserves_flags )
25+
);
7526
}
27+
Self(result)
28+
}
29+
30+
const fn get(&self, n: usize) -> bool {
31+
// of course they number bits from the left...
32+
self.0[n / 64] & (1 << (63 - (n % 64))) != 0
7633
}
77-
}
7834

79-
impl AtHwcap {
8035
/// Initializes the cache from the feature bits.
8136
fn cache(self) -> cache::Initializer {
8237
let mut value = cache::Initializer::default();
@@ -87,45 +42,26 @@ impl AtHwcap {
8742
}
8843
};
8944

90-
// vector and related
91-
92-
// bit 129 of the extended facility list
93-
enable_feature(Feature::vector, self.vxrs);
94-
95-
// bit 135 of the extended facility list
96-
enable_feature(Feature::vector_enhancements_1, self.vxrs_ext);
97-
98-
// bit 148 of the extended facility list
99-
enable_feature(Feature::vector_enhancements_2, self.vxrs_ext2);
45+
// facility indications are taken from the IBM docs
46+
// https://www.ibm.com/support/pages/sites/default/files/2021-05/SA22-7871-10.pdf#page=63
10047

101-
// bit 134 of the extended facility list
102-
enable_feature(Feature::vector_packed_decimal, self.dfp); // TODO I think this is wrong
103-
104-
// bit 152 of the extended facility list
105-
enable_feature(Feature::vector_packed_decimal_enhancement, self.vxrs_pde);
106-
107-
// bit 192 of the extended facility list
108-
enable_feature(Feature::vector_packed_decimal_enhancement, self.vxrs_pde2);
109-
110-
// bit 165 of the extended facility list
111-
enable_feature(Feature::nnp_assist, self.nnpa);
112-
113-
// others
114-
115-
// bit 45 of the extended facility list
116-
enable_feature(Feature::high_word, self.high_gprs);
117-
118-
// bit 73 of the extended facility list
119-
enable_feature(Feature::transactional_execution, self.te);
120-
121-
// bit 133 of the extended facility list
122-
enable_feature(Feature::guarded_storage, self.gs);
123-
124-
// bit 150 of the extended facility list
125-
enable_feature(Feature::enhanced_sort, self.sort);
126-
127-
// bit 151 of the extended facility list
128-
enable_feature(Feature::deflate_conversion, self.dflt);
48+
// vector and related
49+
enable_feature(Feature::vector, self.get(129));
50+
enable_feature(Feature::vector_enhancements_1, self.get(135));
51+
enable_feature(Feature::vector_enhancements_2, self.get(148));
52+
53+
enable_feature(Feature::vector_packed_decimal, self.get(134));
54+
enable_feature(Feature::vector_packed_decimal_enhancement, self.get(152));
55+
enable_feature(Feature::vector_packed_decimal_enhancement_2, self.get(192));
56+
57+
enable_feature(Feature::nnp_assist, self.get(165));
58+
59+
// other
60+
enable_feature(Feature::high_word, self.get(45));
61+
enable_feature(Feature::transactional_execution, self.get(73));
62+
enable_feature(Feature::guarded_storage, self.get(133));
63+
enable_feature(Feature::enhanced_sort, self.get(150));
64+
enable_feature(Feature::deflate_conversion, self.get(151));
12965
}
13066
value
13167
}

0 commit comments

Comments
 (0)