@@ -8,7 +8,11 @@ use cust::stream::{Stream, StreamFlags};
88
99static PTX : & str = include_str ! ( concat!( env!( "OUT_DIR" ) , "/kernels.ptx" ) ) ;
1010
11- fn matrix_multiply ( block_size : usize , dims_a : ( usize , usize , usize ) , dims_b : ( usize , usize , usize ) ) -> Result < ( ) , cust:: error:: CudaError > {
11+ fn matrix_multiply (
12+ block_size : usize ,
13+ dims_a : ( usize , usize , usize ) ,
14+ dims_b : ( usize , usize , usize ) ,
15+ ) -> Result < ( ) , cust:: error:: CudaError > {
1216 let dims_c = ( dims_b. 0 , dims_a. 1 , 1 ) ;
1317 let size_a = dims_a. 0 * dims_a. 1 ;
1418 let h_a = LockedBuffer :: new ( & 1.0f32 , size_a) . expect ( "host array couldn't be initialized!" ) ;
@@ -19,19 +23,24 @@ fn matrix_multiply(block_size: usize, dims_a: (usize, usize, usize), dims_b: (us
1923 let stream = Stream :: new ( StreamFlags :: NON_BLOCKING , None ) . expect ( "Stream couldn't be init!" ) ;
2024
2125 let size_c = dims_b. 0 * dims_a. 1 ;
22- let mut h_c =
23- LockedBuffer :: new ( & 0.0f32 , size_c) . expect ( "host array couldn't be initialized!" ) ;
26+ let mut h_c = LockedBuffer :: new ( & 0.0f32 , size_c) . expect ( "host array couldn't be initialized!" ) ;
2427
2528 let start_event = Event :: new ( EventFlags :: DEFAULT ) ?;
2629 let stop_event = Event :: new ( EventFlags :: DEFAULT ) ?;
2730
28- let d_a = DeviceBuffer :: from_slice ( h_a. as_slice ( ) ) . expect ( "device array couldn't be initialized!" ) ;
29- let d_b = DeviceBuffer :: from_slice ( h_b. as_slice ( ) ) . expect ( "device array couldn't be initialized!" ) ;
30- let d_c = DeviceBuffer :: from_slice ( h_c. as_slice ( ) ) . expect ( "device array couldn't be initialized!" ) ;
31-
31+ let d_a =
32+ DeviceBuffer :: from_slice ( h_a. as_slice ( ) ) . expect ( "device array couldn't be initialized!" ) ;
33+ let d_b =
34+ DeviceBuffer :: from_slice ( h_b. as_slice ( ) ) . expect ( "device array couldn't be initialized!" ) ;
35+ let d_c =
36+ DeviceBuffer :: from_slice ( h_c. as_slice ( ) ) . expect ( "device array couldn't be initialized!" ) ;
37+
3238 stream. synchronize ( ) . expect ( "Stream couldn't synchronize!" ) ;
3339 let threads = BlockSize :: xy ( block_size as u32 , block_size as u32 ) ;
34- let grid = GridSize :: xy ( ( dims_b. 0 / ( threads. x as usize ) ) . try_into ( ) . unwrap ( ) , ( dims_a. 1 / ( threads. y as usize ) ) . try_into ( ) . unwrap ( ) ) ;
40+ let grid = GridSize :: xy (
41+ ( dims_b. 0 / ( threads. x as usize ) ) . try_into ( ) . unwrap ( ) ,
42+ ( dims_a. 1 / ( threads. y as usize ) ) . try_into ( ) . unwrap ( ) ,
43+ ) ;
3544
3645 println ! ( "Computing result using CUDA Kernel..." ) ;
3746
@@ -57,7 +66,7 @@ fn matrix_multiply(block_size: usize, dims_a: (usize, usize, usize), dims_b: (us
5766 . record ( & stream)
5867 . expect ( "Failed to record start_event in the CUDA stream!" ) ;
5968
60- const N_ITER : u32 = 300 ;
69+ const N_ITER : u32 = 300 ;
6170
6271 for _ in 0 ..N_ITER {
6372 unsafe {
@@ -75,22 +84,26 @@ fn matrix_multiply(block_size: usize, dims_a: (usize, usize, usize), dims_b: (us
7584 . record ( & stream)
7685 . expect ( "Failed to record stop_event in the CUDA stream!" ) ;
7786
78- stop_event. synchronize ( ) . expect ( "Stream couldn't synchronize!" ) ;
87+ stop_event
88+ . synchronize ( )
89+ . expect ( "Stream couldn't synchronize!" ) ;
7990
8091 let gpu_time: u128 = stop_event
8192 . elapsed ( & start_event)
8293 . expect ( "Failed to calculate duration of GPU operations!" )
8394 . as_micros ( ) ;
8495
8596 let avg_time = gpu_time as f32 / N_ITER as f32 ;
86- println ! ( "Average time spent executing by the GPU: {} microseconds" , avg_time) ;
97+ println ! (
98+ "Average time spent executing by the GPU: {} microseconds" ,
99+ avg_time
100+ ) ;
87101 let flops_per_matrix_mul = 2.0 * ( dims_a. 0 as f32 ) * ( dims_a. 1 as f32 ) * ( dims_b. 0 as f32 ) ;
88102 let giga_flops = ( flops_per_matrix_mul / ( avg_time) ) / 1000.0 ;
89103 println ! ( "Performance = {} GFlop/s" , giga_flops) ;
90104
91- unsafe {
92- d_c
93- . async_copy_to ( & mut h_c, & stream)
105+ unsafe {
106+ d_c. async_copy_to ( & mut h_c, & stream)
94107 . expect ( "Could not copy from device to host!" ) ;
95108 }
96109 stream. synchronize ( ) . expect ( "Stream couldn't synchronize!" ) ;
@@ -108,14 +121,22 @@ fn matrix_multiply(block_size: usize, dims_a: (usize, usize, usize), dims_b: (us
108121 let rel_err = abs_err / abs_val. max ( dot_length * machine_epsilon) ;
109122
110123 if rel_err > 1e-6 {
111- println ! ( "Error at index {}: CPU = {}, GPU = {}, rel_err = {}" , i, dims_a. 0 as f32 * 0.01f32 , h_c[ i] , rel_err) ;
124+ println ! (
125+ "Error at index {}: CPU = {}, GPU = {}, rel_err = {}" ,
126+ i,
127+ dims_a. 0 as f32 * 0.01f32 ,
128+ h_c[ i] ,
129+ rel_err
130+ ) ;
112131 correct = false ;
113132 }
114133 }
115134
116135 if correct {
117136 println ! ( "Result = PASS" ) ;
118- println ! ( "NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled." ) ;
137+ println ! (
138+ "NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled."
139+ ) ;
119140 } else {
120141 println ! ( "Result = FAIL" ) ;
121142 return Err ( cust:: error:: CudaError :: UnknownError ) ;
@@ -130,7 +151,7 @@ fn main() -> Result<(), cust::error::CudaError> {
130151 let device = Device :: get_device ( 0 ) . expect ( "Couldn't find Cuda supported devices!" ) ;
131152 println ! ( "Device Name: {}" , device. name( ) . unwrap( ) ) ;
132153
133- let block_size: u32 = 32 ;
154+ let block_size: u32 = 32 ;
134155 let dims_a: ( usize , usize , usize ) = ( block_size as usize , block_size as usize , 1 ) ;
135156 let dims_b: ( usize , usize , usize ) = ( block_size as usize , block_size as usize , 1 ) ;
136157
0 commit comments