Skip to content

Commit 360c227

Browse files
chore: cargo fmt
1 parent d2a0874 commit 360c227

File tree

1 file changed

+38
-17
lines changed
  • samples/introduction/matmul/src

1 file changed

+38
-17
lines changed

samples/introduction/matmul/src/main.rs

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@ use cust::stream::{Stream, StreamFlags};
88

99
static PTX: &str = include_str!(concat!(env!("OUT_DIR"), "/kernels.ptx"));
1010

11-
fn matrix_multiply(block_size: usize, dims_a: (usize, usize, usize), dims_b: (usize, usize, usize)) -> Result<(), cust::error::CudaError> {
11+
fn matrix_multiply(
12+
block_size: usize,
13+
dims_a: (usize, usize, usize),
14+
dims_b: (usize, usize, usize),
15+
) -> Result<(), cust::error::CudaError> {
1216
let dims_c = (dims_b.0, dims_a.1, 1);
1317
let size_a = dims_a.0 * dims_a.1;
1418
let h_a = LockedBuffer::new(&1.0f32, size_a).expect("host array couldn't be initialized!");
@@ -19,19 +23,24 @@ fn matrix_multiply(block_size: usize, dims_a: (usize, usize, usize), dims_b: (us
1923
let stream = Stream::new(StreamFlags::NON_BLOCKING, None).expect("Stream couldn't be init!");
2024

2125
let size_c = dims_b.0 * dims_a.1;
22-
let mut h_c =
23-
LockedBuffer::new(&0.0f32, size_c).expect("host array couldn't be initialized!");
26+
let mut h_c = LockedBuffer::new(&0.0f32, size_c).expect("host array couldn't be initialized!");
2427

2528
let start_event = Event::new(EventFlags::DEFAULT)?;
2629
let stop_event = Event::new(EventFlags::DEFAULT)?;
2730

28-
let d_a = DeviceBuffer::from_slice(h_a.as_slice()).expect("device array couldn't be initialized!");
29-
let d_b = DeviceBuffer::from_slice(h_b.as_slice()).expect("device array couldn't be initialized!");
30-
let d_c = DeviceBuffer::from_slice(h_c.as_slice()).expect("device array couldn't be initialized!");
31-
31+
let d_a =
32+
DeviceBuffer::from_slice(h_a.as_slice()).expect("device array couldn't be initialized!");
33+
let d_b =
34+
DeviceBuffer::from_slice(h_b.as_slice()).expect("device array couldn't be initialized!");
35+
let d_c =
36+
DeviceBuffer::from_slice(h_c.as_slice()).expect("device array couldn't be initialized!");
37+
3238
stream.synchronize().expect("Stream couldn't synchronize!");
3339
let threads = BlockSize::xy(block_size as u32, block_size as u32);
34-
let grid = GridSize::xy((dims_b.0 / (threads.x as usize)).try_into().unwrap(), (dims_a.1 / (threads.y as usize)).try_into().unwrap());
40+
let grid = GridSize::xy(
41+
(dims_b.0 / (threads.x as usize)).try_into().unwrap(),
42+
(dims_a.1 / (threads.y as usize)).try_into().unwrap(),
43+
);
3544

3645
println!("Computing result using CUDA Kernel...");
3746

@@ -57,7 +66,7 @@ fn matrix_multiply(block_size: usize, dims_a: (usize, usize, usize), dims_b: (us
5766
.record(&stream)
5867
.expect("Failed to record start_event in the CUDA stream!");
5968

60-
const N_ITER : u32 = 300;
69+
const N_ITER: u32 = 300;
6170

6271
for _ in 0..N_ITER {
6372
unsafe {
@@ -75,22 +84,26 @@ fn matrix_multiply(block_size: usize, dims_a: (usize, usize, usize), dims_b: (us
7584
.record(&stream)
7685
.expect("Failed to record stop_event in the CUDA stream!");
7786

78-
stop_event.synchronize().expect("Stream couldn't synchronize!");
87+
stop_event
88+
.synchronize()
89+
.expect("Stream couldn't synchronize!");
7990

8091
let gpu_time: u128 = stop_event
8192
.elapsed(&start_event)
8293
.expect("Failed to calculate duration of GPU operations!")
8394
.as_micros();
8495

8596
let avg_time = gpu_time as f32 / N_ITER as f32;
86-
println!("Average time spent executing by the GPU: {} microseconds", avg_time);
97+
println!(
98+
"Average time spent executing by the GPU: {} microseconds",
99+
avg_time
100+
);
87101
let flops_per_matrix_mul = 2.0 * (dims_a.0 as f32) * (dims_a.1 as f32) * (dims_b.0 as f32);
88102
let giga_flops = (flops_per_matrix_mul / (avg_time)) / 1000.0;
89103
println!("Performance = {} GFlop/s", giga_flops);
90104

91-
unsafe{
92-
d_c
93-
.async_copy_to(&mut h_c, &stream)
105+
unsafe {
106+
d_c.async_copy_to(&mut h_c, &stream)
94107
.expect("Could not copy from device to host!");
95108
}
96109
stream.synchronize().expect("Stream couldn't synchronize!");
@@ -108,14 +121,22 @@ fn matrix_multiply(block_size: usize, dims_a: (usize, usize, usize), dims_b: (us
108121
let rel_err = abs_err / abs_val.max(dot_length * machine_epsilon);
109122

110123
if rel_err > 1e-6 {
111-
println!("Error at index {}: CPU = {}, GPU = {}, rel_err = {}", i, dims_a.0 as f32 * 0.01f32, h_c[i], rel_err);
124+
println!(
125+
"Error at index {}: CPU = {}, GPU = {}, rel_err = {}",
126+
i,
127+
dims_a.0 as f32 * 0.01f32,
128+
h_c[i],
129+
rel_err
130+
);
112131
correct = false;
113132
}
114133
}
115134

116135
if correct {
117136
println!("Result = PASS");
118-
println!("NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.");
137+
println!(
138+
"NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled."
139+
);
119140
} else {
120141
println!("Result = FAIL");
121142
return Err(cust::error::CudaError::UnknownError);
@@ -130,7 +151,7 @@ fn main() -> Result<(), cust::error::CudaError> {
130151
let device = Device::get_device(0).expect("Couldn't find Cuda supported devices!");
131152
println!("Device Name: {}", device.name().unwrap());
132153

133-
let block_size: u32 = 32;
154+
let block_size: u32 = 32;
134155
let dims_a: (usize, usize, usize) = (block_size as usize, block_size as usize, 1);
135156
let dims_b: (usize, usize, usize) = (block_size as usize, block_size as usize, 1);
136157

0 commit comments

Comments
 (0)