Skip to content

Commit a2acb27

Browse files
committed
Implement llvm.x86.sse3.* intrinsics
1 parent 1a82975 commit a2acb27

File tree

3 files changed

+239
-0
lines changed

3 files changed

+239
-0
lines changed

src/shims/x86/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use shims::foreign_items::EmulateByNameResult;
99

1010
mod sse;
1111
mod sse2;
12+
mod sse3;
1213

1314
impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
1415
pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
@@ -88,6 +89,11 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
8889
this, link_name, abi, args, dest,
8990
);
9091
}
92+
name if name.starts_with("sse3.") => {
93+
return sse3::EvalContextExt::emulate_x86_sse3_intrinsic(
94+
this, link_name, abi, args, dest,
95+
);
96+
}
9197
_ => return Ok(EmulateByNameResult::NotSupported),
9298
}
9399
Ok(EmulateByNameResult::NeedsJumping)

src/shims/x86/sse3.rs

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
use rustc_middle::mir;
2+
use rustc_span::Symbol;
3+
use rustc_target::abi::Align;
4+
use rustc_target::spec::abi::Abi;
5+
6+
use crate::*;
7+
use shims::foreign_items::EmulateByNameResult;
8+
9+
impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
10+
pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
11+
crate::MiriInterpCxExt<'mir, 'tcx>
12+
{
13+
fn emulate_x86_sse3_intrinsic(
14+
&mut self,
15+
link_name: Symbol,
16+
abi: Abi,
17+
args: &[OpTy<'tcx, Provenance>],
18+
dest: &PlaceTy<'tcx, Provenance>,
19+
) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> {
20+
let this = self.eval_context_mut();
21+
// Prefix should have already been checked.
22+
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse3.").unwrap();
23+
24+
match unprefixed_name {
25+
// Used to implement the _mm_addsub_ps and _mm_addsub_pd functions.
26+
// Alternatingly add and subtract floating point (f32 or f64) from
27+
// `left` and `right`
28+
"addsub.ps" | "addsub.pd" => {
29+
let [left, right] =
30+
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
31+
32+
let (left, left_len) = this.operand_to_simd(left)?;
33+
let (right, right_len) = this.operand_to_simd(right)?;
34+
let (dest, dest_len) = this.place_to_simd(dest)?;
35+
36+
assert_eq!(dest_len, left_len);
37+
assert_eq!(dest_len, right_len);
38+
39+
for i in 0..dest_len {
40+
let left = this.read_immediate(&this.project_index(&left, i)?)?;
41+
let right = this.read_immediate(&this.project_index(&right, i)?)?;
42+
let dest = this.project_index(&dest, i)?;
43+
44+
// Even elements are subtracted and odd elements are added.
45+
let op = if i % 2 == 0 { mir::BinOp::Sub } else { mir::BinOp::Add };
46+
let res = this.wrapping_binary_op(op, &left, &right)?;
47+
48+
this.write_immediate(*res, &dest)?;
49+
}
50+
}
51+
// Used to implement the _mm_h{add,sub}_p{s,d} functions.
52+
// Horizontally add/subtract adjacent floating point values
53+
// in `left` and `right`.
54+
"hadd.ps" | "hadd.pd" | "hsub.ps" | "hsub.pd" => {
55+
let [left, right] =
56+
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
57+
58+
let (left, left_len) = this.operand_to_simd(left)?;
59+
let (right, right_len) = this.operand_to_simd(right)?;
60+
let (dest, dest_len) = this.place_to_simd(dest)?;
61+
62+
assert_eq!(dest_len, left_len);
63+
assert_eq!(dest_len, right_len);
64+
assert_eq!(dest_len % 2, 0);
65+
66+
let op = match unprefixed_name {
67+
"hadd.ps" | "hadd.pd" => mir::BinOp::Add,
68+
"hsub.ps" | "hsub.pd" => mir::BinOp::Sub,
69+
_ => unreachable!(),
70+
};
71+
72+
let middle = dest_len / 2;
73+
for i in 0..dest_len {
74+
let (lhs, rhs) = if i < middle {
75+
let base_i = i.checked_mul(2).unwrap();
76+
(
77+
this.read_immediate(&this.project_index(&left, base_i)?)?,
78+
this.read_immediate(
79+
&this.project_index(&left, base_i.checked_add(1).unwrap())?,
80+
)?,
81+
)
82+
} else {
83+
let base_i = i.checked_sub(middle).unwrap().checked_mul(2).unwrap();
84+
(
85+
this.read_immediate(&this.project_index(&right, base_i)?)?,
86+
this.read_immediate(
87+
&this.project_index(&right, base_i.checked_add(1).unwrap())?,
88+
)?,
89+
)
90+
};
91+
let (res, _overflow) = this.overflowing_binary_op(op, &lhs, &rhs)?;
92+
93+
this.write_immediate(*res, &this.project_index(&dest, i)?)?;
94+
}
95+
}
96+
// Used to implement the _mm_lddqu_si128 function.
97+
// Reads a 128-bit vector from an unaligned pointer. This intrinsic
98+
// is expected to perform better than a regular unaligned read when
99+
// the data crosses a cache line, but for Miri this is just a regular
100+
// unaligned read.
101+
"ldu.dq" => {
102+
let [src_ptr] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
103+
let src_ptr = this.read_pointer(src_ptr)?;
104+
let dest = dest.force_mplace(this)?;
105+
106+
this.mem_copy(
107+
src_ptr,
108+
Align::ONE,
109+
dest.ptr(),
110+
Align::ONE,
111+
dest.layout.size,
112+
/*nonoverlapping*/ true,
113+
)?;
114+
}
115+
_ => return Ok(EmulateByNameResult::NotSupported),
116+
}
117+
Ok(EmulateByNameResult::NeedsJumping)
118+
}
119+
}

tests/pass/intrinsics-x86-sse3.rs

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
// Ignore everything except x86 and x86_64
2+
// Any additional target are added to CI should be ignored here
3+
// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
4+
//@ignore-target-aarch64
5+
//@ignore-target-arm
6+
//@ignore-target-avr
7+
//@ignore-target-s390x
8+
//@ignore-target-thumbv7em
9+
//@ignore-target-wasm32
10+
//@compile-flags: -C target-feature=+sse3
11+
12+
use core::mem::transmute;
13+
#[cfg(target_arch = "x86")]
14+
use std::arch::x86::*;
15+
#[cfg(target_arch = "x86_64")]
16+
use std::arch::x86_64::*;
17+
18+
fn main() {
19+
assert!(is_x86_feature_detected!("sse3"));
20+
21+
unsafe {
22+
test_sse3();
23+
}
24+
}
25+
26+
#[target_feature(enable = "sse3")]
27+
unsafe fn test_sse3() {
28+
// Mostly copied from library/stdarch/crates/core_arch/src/x86/sse3.rs
29+
30+
#[target_feature(enable = "sse3")]
31+
unsafe fn test_mm_addsub_ps() {
32+
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
33+
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
34+
let r = _mm_addsub_ps(a, b);
35+
assert_eq_m128(r, _mm_setr_ps(99.0, 25.0, 0.0, -15.0));
36+
}
37+
test_mm_addsub_ps();
38+
39+
#[target_feature(enable = "sse3")]
40+
unsafe fn test_mm_addsub_pd() {
41+
let a = _mm_setr_pd(-1.0, 5.0);
42+
let b = _mm_setr_pd(-100.0, 20.0);
43+
let r = _mm_addsub_pd(a, b);
44+
assert_eq_m128d(r, _mm_setr_pd(99.0, 25.0));
45+
}
46+
test_mm_addsub_pd();
47+
48+
#[target_feature(enable = "sse3")]
49+
unsafe fn test_mm_hadd_ps() {
50+
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
51+
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
52+
let r = _mm_hadd_ps(a, b);
53+
assert_eq_m128(r, _mm_setr_ps(4.0, -10.0, -80.0, -5.0));
54+
}
55+
test_mm_hadd_ps();
56+
57+
#[target_feature(enable = "sse3")]
58+
unsafe fn test_mm_hadd_pd() {
59+
let a = _mm_setr_pd(-1.0, 5.0);
60+
let b = _mm_setr_pd(-100.0, 20.0);
61+
let r = _mm_hadd_pd(a, b);
62+
assert_eq_m128d(r, _mm_setr_pd(4.0, -80.0));
63+
}
64+
test_mm_hadd_pd();
65+
66+
#[target_feature(enable = "sse3")]
67+
unsafe fn test_mm_hsub_ps() {
68+
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
69+
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
70+
let r = _mm_hsub_ps(a, b);
71+
assert_eq_m128(r, _mm_setr_ps(-6.0, 10.0, -120.0, 5.0));
72+
}
73+
test_mm_hsub_ps();
74+
75+
#[target_feature(enable = "sse3")]
76+
unsafe fn test_mm_hsub_pd() {
77+
let a = _mm_setr_pd(-1.0, 5.0);
78+
let b = _mm_setr_pd(-100.0, 20.0);
79+
let r = _mm_hsub_pd(a, b);
80+
assert_eq_m128d(r, _mm_setr_pd(-6.0, -120.0));
81+
}
82+
test_mm_hsub_pd();
83+
84+
#[target_feature(enable = "sse3")]
85+
unsafe fn test_mm_lddqu_si128() {
86+
let a = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
87+
let r = _mm_lddqu_si128(&a);
88+
assert_eq_m128i(a, r);
89+
}
90+
test_mm_lddqu_si128();
91+
}
92+
93+
#[track_caller]
94+
#[target_feature(enable = "sse")]
95+
unsafe fn assert_eq_m128(a: __m128, b: __m128) {
96+
let r = _mm_cmpeq_ps(a, b);
97+
if _mm_movemask_ps(r) != 0b1111 {
98+
panic!("{:?} != {:?}", a, b);
99+
}
100+
}
101+
102+
#[track_caller]
103+
#[target_feature(enable = "sse2")]
104+
unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
105+
if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
106+
panic!("{:?} != {:?}", a, b);
107+
}
108+
}
109+
110+
#[track_caller]
111+
#[target_feature(enable = "sse2")]
112+
pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
113+
assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))
114+
}

0 commit comments

Comments
 (0)