mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-03 03:35:11 +08:00
Small improvement to the full reduction of fp16
This commit is contained in:
parent
0b9e3dcd06
commit
0eb69b7552
@ -193,16 +193,18 @@ static __global__ void FullReductionKernelHalfFloat(Reducer reducer, const Self
|
||||
__syncthreads();
|
||||
|
||||
if (gridDim.x == 1 && first_index == 0) {
|
||||
reducer.reduce(__low2half(*scratch), output);
|
||||
reducer.reduce(__high2half(*scratch), output);
|
||||
half tmp = __low2half(*scratch);
|
||||
reducer.reduce(__high2half(*scratch), &tmp);
|
||||
*output = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Op>
|
||||
__global__ void ReductionCleanupKernelHalfFloat(Op& reducer, half* output, half2* scratch) {
|
||||
eigen_assert(threadIdx.x == 1);
|
||||
reducer.reduce(__low2half(*scratch), output);
|
||||
reducer.reduce(__high2half(*scratch), output);
|
||||
half tmp = __low2half(*scratch);
|
||||
reducer.reduce(__high2half(*scratch), &tmp);
|
||||
*output = tmp;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user