Skip to content

Commit dc9592b

Browse files
fix: type corrections and proper copying of result data from device to host
1 parent 80cf8ff commit dc9592b

File tree

1 file changed

+10
-4
lines changed
  • samples/introduction/matmul/src

1 file changed

+10
-4
lines changed

samples/introduction/matmul/src/main.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ fn matrix_multiply(block_size: usize, dimsA: (usize, usize, usize), dimsB: (usiz
4747
d_c.as_device_ptr(),
4848
d_a.as_device_ptr(),
4949
d_b.as_device_ptr(),
50-
dimsA.0 as u32,
51-
dimsB.0 as u32
50+
dimsA.0 as usize,
51+
dimsB.0 as usize
5252
))?;
5353
}
5454

@@ -67,8 +67,8 @@ fn matrix_multiply(block_size: usize, dimsA: (usize, usize, usize), dimsB: (usiz
6767
d_c.as_device_ptr(),
6868
d_a.as_device_ptr(),
6969
d_b.as_device_ptr(),
70-
dimsA.0 as u32,
71-
dimsB.0 as u32,
70+
dimsA.0 as usize,
71+
dimsB.0 as usize,
7272
))?;
7373
}
7474
}
@@ -90,6 +90,12 @@ fn matrix_multiply(block_size: usize, dimsA: (usize, usize, usize), dimsB: (usiz
9090
let gigaFlops = (flopsPerMatrixMul / (avg_time)) / 1000.0;
9191
println!("Performance = {} GFlop/s", gigaFlops);
9292

93+
unsafe{
94+
d_c
95+
.async_copy_to(&mut h_c, &stream)
96+
.expect("Could not copy from device to host!");
97+
}
98+
9399
// checking computed result
94100
// test relative error by the formula
95101
// |<x, y>_cpu - <x, y>_gpu| / |<x, y>_cpu|

0 commit comments

Comments
 (0)