|
26 | 26 | #include "kernel.cuh" |
27 | 27 |
|
28 | 28 | __global__ void jamcrc_kernel_wrapper(const void *data, uint32_t *result, const uint64_t length, const uint32_t previousCrc32) { |
29 | | - const uint64_t blockId = blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x; |
30 | | - const uint64_t threadsPerBlock = blockDim.x; |
31 | | - uint64_t id = blockId * threadsPerBlock + threadIdx.x; |
| 29 | + const uint64_t blockId = blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x; |
| 30 | + const uint64_t threadsPerBlock = blockDim.x; |
| 31 | + uint64_t id = blockId * threadsPerBlock + threadIdx.x; |
32 | 32 |
|
33 | | - if (id == 0) { |
34 | | - *result = jamcrc_kernel(data, length, previousCrc32); |
35 | | - } |
| 33 | + if (id == 0) { |
| 34 | + *result = jamcrc_kernel(data, length, previousCrc32); |
| 35 | + } |
36 | 36 | } |
37 | 37 |
|
38 | 38 | __device__ uint32_t jamcrc_kernel(const void *data, uint64_t length, const uint32_t previousCrc32) { |
39 | | - uint32_t crc = ~previousCrc32; |
40 | | - uint8_t *current = (uint8_t *)data; |
41 | | - while (length--) |
42 | | - crc = (crc >> 8) ^ crc32_lookup[(crc & 0xFF) ^ *current++]; |
43 | | - return crc; |
| 39 | + uint32_t crc = ~previousCrc32; |
| 40 | + uint8_t *current = (uint8_t *)data; |
| 41 | + while (length--) |
| 42 | + crc = (crc >> 8) ^ crc32_lookup[(crc & 0xFF) ^ *current++]; |
| 43 | + return crc; |
44 | 44 | } |
45 | 45 |
|
46 | | -__global__ void runner_kernel(uint32_t *crc_result, uint64_t *index_result, uint64_t array_size, uint64_t a, uint64_t b) { |
47 | | - const uint64_t blockId = blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x; |
48 | | - const uint64_t threadsPerBlock = blockDim.x; |
49 | | - uint64_t id = blockId * threadsPerBlock + threadIdx.x; |
50 | | - |
51 | | - id = id + a; |
| 46 | +__global__ void runner_kernel(uint32_t *crc_result, uint64_t *index_result, uint64_t array_size, uint32_t* array_index, uint64_t a, uint64_t b) { |
| 47 | + const uint64_t blockId = blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x; |
| 48 | + const uint64_t threadsPerBlock = blockDim.x; |
| 49 | + uint64_t id = blockId * threadsPerBlock + threadIdx.x; |
52 | 50 |
|
53 | | - if (id >= a && id <= b) { |
54 | | - // printf("id: %llu a: %llu b: %llu\n", id, a, b); |
55 | | - // Allocate memory for the array |
56 | | - uint8_t array[29] = {0}; |
| 51 | + id = id + a; |
57 | 52 |
|
58 | | - uint64_t size = 0; |
59 | | - // Generate the array from index (id) |
60 | | - find_string_inv_kernel(array, id, &size); |
| 53 | + if (id >= a && id <= b) { |
| 54 | + // printf("id: %llu a: %llu b: %llu\n", id, a, b); |
| 55 | + // Allocate memory for the array |
| 56 | + uint8_t array[29] = {0}; |
61 | 57 |
|
62 | | - // Calculate the JAMCRC |
63 | | - const uint32_t result = jamcrc_kernel(array, size, 0); |
64 | | - // printf("id: %llu, size: %llu, array: %s, crc: 0x%x\n", id, size, array, result); |
| 58 | + uint64_t size = 0; |
| 59 | + // Generate the array from index (id) |
| 60 | + find_string_inv_kernel(array, id, &size); |
65 | 61 |
|
66 | | - bool found = false; |
67 | | - for (uint8_t i = 0; i < 87; i++) { |
68 | | - if (result == cheat_list[i]) { |
69 | | - found = true; |
70 | | - break; |
71 | | - } |
72 | | - } |
| 62 | + // Calculate the JAMCRC |
| 63 | + const uint32_t result = jamcrc_kernel(array, size, 0); |
| 64 | + // printf("id: %llu, size: %llu, array: %s, crc: 0x%x\n", id, size, array, result); |
73 | 65 |
|
74 | | - if (!found) { |
75 | | - return; |
76 | | - } |
| 66 | + bool found = false; |
| 67 | + for (uint8_t i = 0; i < 87; i++) { |
| 68 | + if (result == cheat_list[i]) { |
| 69 | + found = true; |
| 70 | + break; |
| 71 | + } |
| 72 | + } |
77 | 73 |
|
78 | | - // Todo: Avoid datarace |
79 | | - //__syncthreads(); |
| 74 | + if (!found) { |
| 75 | + return; |
| 76 | + } |
80 | 77 |
|
81 | | - for (uint64_t i = 0; i < array_size; i++) { |
82 | | - if (crc_result[i] == 0 && index_result[i] == 0) { |
83 | | - crc_result[i] = result; |
84 | | - index_result[i] = id; |
85 | | - // printf("Found %d at %d\n", result, id); |
86 | | - break; |
87 | | - } |
| 78 | + //__syncthreads(); |
| 79 | + uint32_t local_array_index = atomicAdd(array_index, 1); |
| 80 | + if (local_array_index >= array_size) { |
| 81 | + return; |
| 82 | + } |
| 83 | + crc_result[local_array_index] = result; |
| 84 | + index_result[local_array_index] = id; |
88 | 85 | } |
89 | | - } |
90 | 86 | } |
91 | 87 |
|
92 | 88 | __device__ void find_string_inv_kernel(uint8_t *array, uint64_t n, uint64_t *terminator_index) { |
93 | | - const uint32_t string_size_alphabet = 27; |
94 | | - |
95 | | - const uint8_t alpha[string_size_alphabet] = {"ABCDEFGHIJKLMNOPQRSTUVWXYZ"}; |
96 | | - // If n < 27 |
97 | | - if (n < 26) { |
98 | | - array[0] = alpha[n]; |
99 | | - array[1] = '\0'; |
100 | | - *terminator_index = 1; |
101 | | - return; |
102 | | - } |
103 | | - // If n > 27 |
104 | | - uint64_t i = 0; |
105 | | - while (n > 0) { |
106 | | - array[i] = alpha[(--n) % 26]; |
107 | | - n /= 26; |
108 | | - ++i; |
109 | | - } |
110 | | - array[i] = '\0'; |
111 | | - *terminator_index = i; |
| 89 | + // If n < 27 |
| 90 | + if (n < 26) { |
| 91 | + array[0] = alpha[n]; |
| 92 | + array[1] = '\0'; |
| 93 | + *terminator_index = 1; |
| 94 | + return; |
| 95 | + } |
| 96 | + // If n > 27 |
| 97 | + uint64_t i = 0; |
| 98 | + while (n > 0) { |
| 99 | + array[i] = alpha[(--n) % 26]; |
| 100 | + n /= 26; |
| 101 | + ++i; |
| 102 | + } |
| 103 | + array[i] = '\0'; |
| 104 | + *terminator_index = i; |
112 | 105 | } |
0 commit comments