-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdeviceFunctions.cu
67 lines (54 loc) · 1.38 KB
/
deviceFunctions.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#include <stdlib.h>
#include <stdio.h>
__device__ int get_global_index(void)
{
return blockIdx.x * blockDim.x + threadIdx.x;
}
__device__ int get_constant(void)
{
return 7;
}
__global__ void kernel1(int *array)
{
int index = get_global_index();
array[index] = get_constant();
}
__global__ void kernel2(int *array)
{
int index = get_global_index();
array[index] = get_global_index();
}
int main(void)
{
int num_elements = 256;
int num_bytes = num_elements * sizeof(int);
int *device_array = 0;
int *host_array = 0;
// allocate memory
host_array = (int*)malloc(num_bytes);
cudaMalloc((void**)&device_array, num_bytes);
int block_size = 128;
int grid_size = num_elements / block_size;
// launch kernel1 and inspect its results
kernel1<<<grid_size,block_size>>>(device_array);
cudaMemcpy(host_array, device_array, num_bytes, cudaMemcpyDeviceToHost);
printf("kernel1 results:\n");
for(int i = 0; i < num_elements; ++i)
{
printf("%d ", host_array[i]);
}
printf("\n\n");
// launch kernel2 and inspect its results
kernel2<<<grid_size,block_size>>>(device_array);
cudaMemcpy(host_array, device_array, num_bytes, cudaMemcpyDeviceToHost);
printf("kernel2 results:\n");
for(int i = 0; i < num_elements; ++i)
{
printf("%d ", host_array[i]);
}
printf("\n\n");
// deallocate memory
free(host_array);
cudaFree(device_array);
return 0;
}