Skip to content

Commit 1cbf555

Browse files
committed
merge from upstream
2 parents 455fbbe + 715f9ac commit 1cbf555

19 files changed

+1112
-4
lines changed

README.md

+34
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,40 @@ We can also be found on [Zulip][zulip-project-portable-simd].
1212

1313
If you are interested in support for a specific architecture, you may want [stdarch] instead.
1414

15+
## Hello World
16+
17+
Now we're gonna dip our toes into this world with a small SIMD "Hello, World!" example. Make sure your compiler is up to date and using `nightly`. We can do that by running
18+
19+
```bash
20+
rustup update -- nightly
21+
```
22+
23+
or by setting up `rustup default nightly` or else with `cargo +nightly {build,test,run}`. After updating, run
24+
```bash
25+
cargo new hellosimd
26+
```
27+
to create a new crate. Edit `hellosimd/Cargo.toml` to be
28+
```toml
29+
[package]
30+
name = "hellosimd"
31+
version = "0.1.0"
32+
edition = "2018"
33+
[dependencies]
34+
core_simd = { git = "https://github.com/rust-lang/stdsimd" }
35+
```
36+
37+
and finally write this in `src/main.rs`:
38+
```rust
39+
use core_simd::*;
40+
fn main() {
41+
let a = f32x4::splat(10.0);
42+
let b = f32x4::from_array([1.0, 2.0, 3.0, 4.0]);
43+
println!("{:?}", a + b);
44+
}
45+
```
46+
47+
Explanation: We import all the bindings from the crate with the first line. Then, we construct our SIMD vectors with methods like `splat` or `from_array`. Finally, we can use operators on them like `+` and the appropriate SIMD instructions will be carried out. When we run `cargo run` you should get `[11.0, 12.0, 13.0, 14.0]`.
48+
1549
## Code Organization
1650

1751
Currently the crate is organized so that each element type is a file, and then the 64-bit, 128-bit, 256-bit, and 512-bit vectors using those types are contained in said file.

crates/core_simd/examples/nbody.rs

+182
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
/// Benchmarks game nbody code
2+
/// Taken from the `packed_simd` crate
3+
/// Run this benchmark with `cargo test --example nbody`
4+
use core_simd::*;
5+
6+
use std::f64::consts::PI;
7+
const SOLAR_MASS: f64 = 4.0 * PI * PI;
8+
const DAYS_PER_YEAR: f64 = 365.24;
9+
10+
#[derive(Debug, Clone, Copy)]
11+
pub struct Body {
12+
pub x: f64x4,
13+
pub v: f64x4,
14+
pub mass: f64,
15+
}
16+
17+
const N_BODIES: usize = 5;
18+
const BODIES: [Body; N_BODIES] = [
19+
// sun:
20+
Body {
21+
x: f64x4::from_array([0., 0., 0., 0.]),
22+
v: f64x4::from_array([0., 0., 0., 0.]),
23+
mass: SOLAR_MASS,
24+
},
25+
// jupiter:
26+
Body {
27+
x: f64x4::from_array([
28+
4.84143144246472090e+00,
29+
-1.16032004402742839e+00,
30+
-1.03622044471123109e-01,
31+
0.,
32+
]),
33+
v: f64x4::from_array([
34+
1.66007664274403694e-03 * DAYS_PER_YEAR,
35+
7.69901118419740425e-03 * DAYS_PER_YEAR,
36+
-6.90460016972063023e-05 * DAYS_PER_YEAR,
37+
0.,
38+
]),
39+
mass: 9.54791938424326609e-04 * SOLAR_MASS,
40+
},
41+
// saturn:
42+
Body {
43+
x: f64x4::from_array([
44+
8.34336671824457987e+00,
45+
4.12479856412430479e+00,
46+
-4.03523417114321381e-01,
47+
0.,
48+
]),
49+
v: f64x4::from_array([
50+
-2.76742510726862411e-03 * DAYS_PER_YEAR,
51+
4.99852801234917238e-03 * DAYS_PER_YEAR,
52+
2.30417297573763929e-05 * DAYS_PER_YEAR,
53+
0.,
54+
]),
55+
mass: 2.85885980666130812e-04 * SOLAR_MASS,
56+
},
57+
// uranus:
58+
Body {
59+
x: f64x4::from_array([
60+
1.28943695621391310e+01,
61+
-1.51111514016986312e+01,
62+
-2.23307578892655734e-01,
63+
0.,
64+
]),
65+
v: f64x4::from_array([
66+
2.96460137564761618e-03 * DAYS_PER_YEAR,
67+
2.37847173959480950e-03 * DAYS_PER_YEAR,
68+
-2.96589568540237556e-05 * DAYS_PER_YEAR,
69+
0.,
70+
]),
71+
mass: 4.36624404335156298e-05 * SOLAR_MASS,
72+
},
73+
// neptune:
74+
Body {
75+
x: f64x4::from_array([
76+
1.53796971148509165e+01,
77+
-2.59193146099879641e+01,
78+
1.79258772950371181e-01,
79+
0.,
80+
]),
81+
v: f64x4::from_array([
82+
2.68067772490389322e-03 * DAYS_PER_YEAR,
83+
1.62824170038242295e-03 * DAYS_PER_YEAR,
84+
-9.51592254519715870e-05 * DAYS_PER_YEAR,
85+
0.,
86+
]),
87+
mass: 5.15138902046611451e-05 * SOLAR_MASS,
88+
},
89+
];
90+
91+
pub fn offset_momentum(bodies: &mut [Body; N_BODIES]) {
92+
let (sun, rest) = bodies.split_at_mut(1);
93+
let sun = &mut sun[0];
94+
for body in rest {
95+
let m_ratio = body.mass / SOLAR_MASS;
96+
sun.v -= body.v * m_ratio;
97+
}
98+
}
99+
100+
pub fn energy(bodies: &[Body; N_BODIES]) -> f64 {
101+
let mut e = 0.;
102+
for i in 0..N_BODIES {
103+
let bi = &bodies[i];
104+
e += bi.mass * (bi.v * bi.v).horizontal_sum() * 0.5;
105+
for bj in bodies.iter().take(N_BODIES).skip(i + 1) {
106+
let dx = bi.x - bj.x;
107+
e -= bi.mass * bj.mass / (dx * dx).horizontal_sum().sqrt()
108+
}
109+
}
110+
e
111+
}
112+
113+
pub fn advance(bodies: &mut [Body; N_BODIES], dt: f64) {
114+
const N: usize = N_BODIES * (N_BODIES - 1) / 2;
115+
116+
// compute distance between bodies:
117+
let mut r = [f64x4::splat(0.); N];
118+
{
119+
let mut i = 0;
120+
for j in 0..N_BODIES {
121+
for k in j + 1..N_BODIES {
122+
r[i] = bodies[j].x - bodies[k].x;
123+
i += 1;
124+
}
125+
}
126+
}
127+
128+
let mut mag = [0.0; N];
129+
for i in (0..N).step_by(2) {
130+
let d2s = f64x2::from_array([
131+
(r[i] * r[i]).horizontal_sum(),
132+
(r[i + 1] * r[i + 1]).horizontal_sum(),
133+
]);
134+
let dmags = f64x2::splat(dt) / (d2s * d2s.sqrt());
135+
mag[i] = dmags[0];
136+
mag[i + 1] = dmags[1];
137+
}
138+
139+
let mut i = 0;
140+
for j in 0..N_BODIES {
141+
for k in j + 1..N_BODIES {
142+
let f = r[i] * mag[i];
143+
bodies[j].v -= f * bodies[k].mass;
144+
bodies[k].v += f * bodies[j].mass;
145+
i += 1
146+
}
147+
}
148+
for body in bodies {
149+
body.x += dt * body.v
150+
}
151+
}
152+
153+
pub fn run(n: usize) -> (f64, f64) {
154+
let mut bodies = BODIES;
155+
offset_momentum(&mut bodies);
156+
let energy_before = energy(&bodies);
157+
for _ in 0..n {
158+
advance(&mut bodies, 0.01);
159+
}
160+
let energy_after = energy(&bodies);
161+
162+
(energy_before, energy_after)
163+
}
164+
165+
#[cfg(test)]
166+
mod tests {
167+
// Good enough for demonstration purposes, not going for strictness here.
168+
fn approx_eq_f64(a: f64, b: f64) -> bool {
169+
(a - b).abs() < 0.00001
170+
}
171+
#[test]
172+
fn test() {
173+
const OUTPUT: [f64; 2] = [-0.169075164, -0.169087605];
174+
let (energy_before, energy_after) = super::run(1000);
175+
assert!(approx_eq_f64(energy_before, OUTPUT[0]));
176+
assert!(approx_eq_f64(energy_after, OUTPUT[1]));
177+
}
178+
}
179+
180+
fn main() {
181+
// empty main to pass CI
182+
}

0 commit comments

Comments
 (0)