mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-18 23:57:39 +08:00
Improved the performance of large outer reductions on cuda
This commit is contained in:
parent
56a3ada670
commit
68ac5c1738
@ -281,7 +281,7 @@ __global__ void OuterReductionKernel(Reducer reducer, const Self input, Index nu
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Do the reduction.
|
// Do the reduction.
|
||||||
const Index max_iter = num_preserved_coeffs * numext::maxi<Index>(1, (num_coeffs_to_reduce - NumPerThread + 1));
|
const Index max_iter = num_preserved_coeffs * divup<Index>(num_coeffs_to_reduce, NumPerThread);
|
||||||
for (Index i = thread_id; i < max_iter; i += num_threads) {
|
for (Index i = thread_id; i < max_iter; i += num_threads) {
|
||||||
const Index input_col = i % num_preserved_coeffs;
|
const Index input_col = i % num_preserved_coeffs;
|
||||||
const Index input_row = (i / num_preserved_coeffs) * NumPerThread;
|
const Index input_row = (i / num_preserved_coeffs) * NumPerThread;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user