mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-04 04:05:19 +08:00
Small improvement to the full reduction of fp16
This commit is contained in:
parent
0b9e3dcd06
commit
0eb69b7552
@ -193,16 +193,18 @@ static __global__ void FullReductionKernelHalfFloat(Reducer reducer, const Self
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
if (gridDim.x == 1 && first_index == 0) {
|
if (gridDim.x == 1 && first_index == 0) {
|
||||||
reducer.reduce(__low2half(*scratch), output);
|
half tmp = __low2half(*scratch);
|
||||||
reducer.reduce(__high2half(*scratch), output);
|
reducer.reduce(__high2half(*scratch), &tmp);
|
||||||
|
*output = tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op>
|
template <typename Op>
|
||||||
__global__ void ReductionCleanupKernelHalfFloat(Op& reducer, half* output, half2* scratch) {
|
__global__ void ReductionCleanupKernelHalfFloat(Op& reducer, half* output, half2* scratch) {
|
||||||
eigen_assert(threadIdx.x == 1);
|
eigen_assert(threadIdx.x == 1);
|
||||||
reducer.reduce(__low2half(*scratch), output);
|
half tmp = __low2half(*scratch);
|
||||||
reducer.reduce(__high2half(*scratch), output);
|
reducer.reduce(__high2half(*scratch), &tmp);
|
||||||
|
*output = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user