We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 4cdb50b commit 0ead512Copy full SHA for 0ead512
src/device/execution.jl
@@ -95,7 +95,8 @@ end
95
function launch_configuration(backend::AbstractGPUBackend, heuristic;
96
elements::Int, elements_per_thread::Int)
97
threads = clamp(elements, 1, heuristic.threads)
98
- blocks = max(cld(elements, threads), 1)
+ blocks = max(cld(elements, threads), heuristic.blocks)
99
+ threads = cld(elements, blocks)
100
101
if elements_per_thread > 1 && blocks > heuristic.blocks
102
# we want to launch more blocks than required, so prefer a grid-stride loop instead
0 commit comments