Skip to content

Commit 78bd1d8

Browse files
authored
Merge pull request #809 from rust-ndarray/zip-strided-for-c-and-f
Zip: Handle preferred memory layout of inhomogenous inputs better
2 parents 6953d73 + 47b3654 commit 78bd1d8

File tree

6 files changed

+324
-45
lines changed

6 files changed

+324
-45
lines changed

benches/iter.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -381,12 +381,12 @@ pub fn zip_mut_with(data: &Array3<f32>, out: &mut Array3<f32>) {
381381
fn zip_mut_with_cc(b: &mut Bencher) {
382382
let data: Array3<f32> = Array3::zeros((ISZ, ISZ, ISZ));
383383
let mut out = Array3::zeros(data.dim());
384-
b.iter(|| black_box(zip_mut_with(&data, &mut out)));
384+
b.iter(|| zip_mut_with(&data, &mut out));
385385
}
386386

387387
#[bench]
388388
fn zip_mut_with_ff(b: &mut Bencher) {
389389
let data: Array3<f32> = Array3::zeros((ISZ, ISZ, ISZ).f());
390390
let mut out = Array3::zeros(data.dim().f());
391-
b.iter(|| black_box(zip_mut_with(&data, &mut out)));
391+
b.iter(|| zip_mut_with(&data, &mut out));
392392
}

benches/zip.rs

+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#![feature(test)]
2+
extern crate test;
3+
use test::{black_box, Bencher};
4+
use ndarray::{Array3, ShapeBuilder, Zip};
5+
use ndarray::s;
6+
use ndarray::IntoNdProducer;
7+
8+
pub fn zip_copy<'a, A, P, Q>(data: P, out: Q)
9+
where P: IntoNdProducer<Item = &'a A>,
10+
Q: IntoNdProducer<Item = &'a mut A, Dim = P::Dim>,
11+
A: Copy + 'a
12+
{
13+
Zip::from(data).and(out).apply(|&i, o| {
14+
*o = i;
15+
});
16+
}
17+
18+
pub fn zip_copy_split<'a, A, P, Q>(data: P, out: Q)
19+
where P: IntoNdProducer<Item = &'a A>,
20+
Q: IntoNdProducer<Item = &'a mut A, Dim = P::Dim>,
21+
A: Copy + 'a
22+
{
23+
let z = Zip::from(data).and(out);
24+
let (z1, z2) = z.split();
25+
let (z11, z12) = z1.split();
26+
let (z21, z22) = z2.split();
27+
let f = |&i: &A, o: &mut A| *o = i;
28+
z11.apply(f);
29+
z12.apply(f);
30+
z21.apply(f);
31+
z22.apply(f);
32+
}
33+
34+
pub fn zip_indexed(data: &Array3<f32>, out: &mut Array3<f32>) {
35+
Zip::indexed(data).and(out).apply(|idx, &i, o| {
36+
let _ = black_box(idx);
37+
*o = i;
38+
});
39+
}
40+
41+
// array size in benchmarks
42+
const SZ3: (usize, usize, usize) = (100, 110, 100);
43+
44+
#[bench]
45+
fn zip_cc(b: &mut Bencher) {
46+
let data: Array3<f32> = Array3::zeros(SZ3);
47+
let mut out = Array3::zeros(data.dim());
48+
b.iter(|| zip_copy(&data, &mut out));
49+
}
50+
51+
#[bench]
52+
fn zip_cf(b: &mut Bencher) {
53+
let data: Array3<f32> = Array3::zeros(SZ3);
54+
let mut out = Array3::zeros(data.dim().f());
55+
b.iter(|| zip_copy(&data, &mut out));
56+
}
57+
58+
#[bench]
59+
fn zip_fc(b: &mut Bencher) {
60+
let data: Array3<f32> = Array3::zeros(SZ3.f());
61+
let mut out = Array3::zeros(data.dim());
62+
b.iter(|| zip_copy(&data, &mut out));
63+
}
64+
65+
#[bench]
66+
fn zip_ff(b: &mut Bencher) {
67+
let data: Array3<f32> = Array3::zeros(SZ3.f());
68+
let mut out = Array3::zeros(data.dim().f());
69+
b.iter(|| zip_copy(&data, &mut out));
70+
}
71+
72+
#[bench]
73+
fn zip_indexed_cc(b: &mut Bencher) {
74+
let data: Array3<f32> = Array3::zeros(SZ3);
75+
let mut out = Array3::zeros(data.dim());
76+
b.iter(|| zip_indexed(&data, &mut out));
77+
}
78+
79+
#[bench]
80+
fn zip_indexed_ff(b: &mut Bencher) {
81+
let data: Array3<f32> = Array3::zeros(SZ3.f());
82+
let mut out = Array3::zeros(data.dim().f());
83+
b.iter(|| zip_indexed(&data, &mut out));
84+
}
85+
86+
#[bench]
87+
fn slice_zip_cc(b: &mut Bencher) {
88+
let data: Array3<f32> = Array3::zeros(SZ3);
89+
let mut out = Array3::zeros(data.dim());
90+
let data = data.slice(s![1.., 1.., 1..]);
91+
let mut out = out.slice_mut(s![1.., 1.., 1..]);
92+
b.iter(|| zip_copy(&data, &mut out));
93+
}
94+
95+
#[bench]
96+
fn slice_zip_ff(b: &mut Bencher) {
97+
let data: Array3<f32> = Array3::zeros(SZ3.f());
98+
let mut out = Array3::zeros(data.dim().f());
99+
let data = data.slice(s![1.., 1.., 1..]);
100+
let mut out = out.slice_mut(s![1.., 1.., 1..]);
101+
b.iter(|| zip_copy(&data, &mut out));
102+
}
103+
104+
#[bench]
105+
fn slice_split_zip_cc(b: &mut Bencher) {
106+
let data: Array3<f32> = Array3::zeros(SZ3);
107+
let mut out = Array3::zeros(data.dim());
108+
let data = data.slice(s![1.., 1.., 1..]);
109+
let mut out = out.slice_mut(s![1.., 1.., 1..]);
110+
b.iter(|| zip_copy_split(&data, &mut out));
111+
}
112+
113+
#[bench]
114+
fn slice_split_zip_ff(b: &mut Bencher) {
115+
let data: Array3<f32> = Array3::zeros(SZ3.f());
116+
let mut out = Array3::zeros(data.dim().f());
117+
let data = data.slice(s![1.., 1.., 1..]);
118+
let mut out = out.slice_mut(s![1.., 1.., 1..]);
119+
b.iter(|| zip_copy_split(&data, &mut out));
120+
}

src/layout/layoutfmt.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
use super::Layout;
1010

11-
const LAYOUT_NAMES: &[&str] = &["C", "F"];
11+
const LAYOUT_NAMES: &[&str] = &["C", "F", "c", "f"];
1212

1313
use std::fmt;
1414

src/layout/mod.rs

+113-22
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,145 @@
11
mod layoutfmt;
22

3-
// public struct but users don't interact with it
3+
// Layout it a bitset used for internal layout description of
4+
// arrays, producers and sets of producers.
5+
// The type is public but users don't interact with it.
46
#[doc(hidden)]
57
/// Memory layout description
68
#[derive(Copy, Clone)]
79
pub struct Layout(u32);
810

911
impl Layout {
1012
#[inline(always)]
11-
pub(crate) fn new(x: u32) -> Self {
12-
Layout(x)
13+
pub(crate) fn is(self, flag: u32) -> bool {
14+
self.0 & flag != 0
1315
}
1416

17+
/// Return layout common to both inputs
1518
#[inline(always)]
16-
pub(crate) fn is(self, flag: u32) -> bool {
17-
self.0 & flag != 0
19+
pub(crate) fn intersect(self, other: Layout) -> Layout {
20+
Layout(self.0 & other.0)
1821
}
22+
23+
/// Return a layout that simultaneously "is" what both of the inputs are
1924
#[inline(always)]
20-
pub(crate) fn and(self, flag: Layout) -> Layout {
21-
Layout(self.0 & flag.0)
25+
pub(crate) fn also(self, other: Layout) -> Layout {
26+
Layout(self.0 | other.0)
2227
}
2328

2429
#[inline(always)]
25-
pub(crate) fn flag(self) -> u32 {
26-
self.0
30+
pub(crate) fn one_dimensional() -> Layout {
31+
Layout::c().also(Layout::f())
2732
}
28-
}
2933

30-
impl Layout {
31-
#[doc(hidden)]
3234
#[inline(always)]
33-
pub fn one_dimensional() -> Layout {
34-
Layout(CORDER | FORDER)
35+
pub(crate) fn c() -> Layout {
36+
Layout(CORDER | CPREFER)
3537
}
36-
#[doc(hidden)]
38+
3739
#[inline(always)]
38-
pub fn c() -> Layout {
39-
Layout(CORDER)
40+
pub(crate) fn f() -> Layout {
41+
Layout(FORDER | FPREFER)
4042
}
41-
#[doc(hidden)]
43+
4244
#[inline(always)]
43-
pub fn f() -> Layout {
44-
Layout(FORDER)
45+
pub(crate) fn cpref() -> Layout {
46+
Layout(CPREFER)
4547
}
48+
49+
#[inline(always)]
50+
pub(crate) fn fpref() -> Layout {
51+
Layout(FPREFER)
52+
}
53+
4654
#[inline(always)]
47-
#[doc(hidden)]
48-
pub fn none() -> Layout {
55+
pub(crate) fn none() -> Layout {
4956
Layout(0)
5057
}
58+
59+
/// A simple "score" method which scores positive for preferring C-order, negative for F-order
60+
/// Subject to change when we can describe other layouts
61+
pub(crate) fn tendency(self) -> i32 {
62+
(self.is(CORDER) as i32 - self.is(FORDER) as i32) +
63+
(self.is(CPREFER) as i32 - self.is(FPREFER) as i32)
64+
65+
}
5166
}
5267

5368
pub const CORDER: u32 = 0b01;
5469
pub const FORDER: u32 = 0b10;
70+
pub const CPREFER: u32 = 0b0100;
71+
pub const FPREFER: u32 = 0b1000;
72+
73+
74+
#[cfg(test)]
75+
mod tests {
76+
use super::*;
77+
use crate::imp_prelude::*;
78+
use crate::NdProducer;
79+
80+
type M = Array2<f32>;
81+
82+
#[test]
83+
fn contig_layouts() {
84+
let a = M::zeros((5, 5));
85+
let b = M::zeros((5, 5).f());
86+
let ac = a.view().layout();
87+
let af = b.view().layout();
88+
assert!(ac.is(CORDER) && ac.is(CPREFER));
89+
assert!(!ac.is(FORDER) && !ac.is(FPREFER));
90+
assert!(!af.is(CORDER) && !af.is(CPREFER));
91+
assert!(af.is(FORDER) && af.is(FPREFER));
92+
}
93+
94+
#[test]
95+
fn stride_layouts() {
96+
let a = M::zeros((5, 5));
97+
98+
{
99+
let v1 = a.slice(s![1.., ..]).layout();
100+
let v2 = a.slice(s![.., 1..]).layout();
101+
102+
assert!(v1.is(CORDER) && v1.is(CPREFER));
103+
assert!(!v1.is(FORDER) && !v1.is(FPREFER));
104+
assert!(!v2.is(CORDER) && v2.is(CPREFER));
105+
assert!(!v2.is(FORDER) && !v2.is(FPREFER));
106+
}
107+
108+
let b = M::zeros((5, 5).f());
109+
110+
{
111+
let v1 = b.slice(s![1.., ..]).layout();
112+
let v2 = b.slice(s![.., 1..]).layout();
113+
114+
assert!(!v1.is(CORDER) && !v1.is(CPREFER));
115+
assert!(!v1.is(FORDER) && v1.is(FPREFER));
116+
assert!(!v2.is(CORDER) && !v2.is(CPREFER));
117+
assert!(v2.is(FORDER) && v2.is(FPREFER));
118+
}
119+
}
120+
121+
#[test]
122+
fn skip_layouts() {
123+
let a = M::zeros((5, 5));
124+
{
125+
let v1 = a.slice(s![..;2, ..]).layout();
126+
let v2 = a.slice(s![.., ..;2]).layout();
127+
128+
assert!(!v1.is(CORDER) && v1.is(CPREFER));
129+
assert!(!v1.is(FORDER) && !v1.is(FPREFER));
130+
assert!(!v2.is(CORDER) && !v2.is(CPREFER));
131+
assert!(!v2.is(FORDER) && !v2.is(FPREFER));
132+
}
133+
134+
let b = M::zeros((5, 5).f());
135+
{
136+
let v1 = b.slice(s![..;2, ..]).layout();
137+
let v2 = b.slice(s![.., ..;2]).layout();
138+
139+
assert!(!v1.is(CORDER) && !v1.is(CPREFER));
140+
assert!(!v1.is(FORDER) && !v1.is(FPREFER));
141+
assert!(!v2.is(CORDER) && !v2.is(CPREFER));
142+
assert!(!v2.is(FORDER) && v2.is(FPREFER));
143+
}
144+
}
145+
}

0 commit comments

Comments
 (0)