mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-22 01:29:35 +08:00
Get rid of nested template specialization in TensorReductionGpu.h, which was broken by c6953f799b01d36f4236b64f351cc1446e0abe17.
This commit is contained in:
parent
c6953f799b
commit
61fc78bbda
@ -80,8 +80,8 @@ __device__ inline double atomicExchCustom(double* address, double val) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef EIGEN_HAS_GPU_FP16
|
#ifdef EIGEN_HAS_GPU_FP16
|
||||||
template <template <typename T> class R>
|
template <typename R>
|
||||||
__device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer) {
|
__device__ inline void atomicReduce(half2* output, half2 accum, R& reducer) {
|
||||||
unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
|
unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
|
||||||
unsigned int newval = oldval;
|
unsigned int newval = oldval;
|
||||||
reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval));
|
reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval));
|
||||||
@ -99,9 +99,8 @@ __device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// reduction should be associative since reduction is not atomic in wide vector but atomic in half2 operations
|
// reduction should be associative since reduction is not atomic in wide vector but atomic in half2 operations
|
||||||
template <template <typename T> class R>
|
template <typename R>
|
||||||
__device__ inline void atomicReduce(Packet4h2* output, Packet4h2 accum,
|
__device__ inline void atomicReduce(Packet4h2* output, Packet4h2 accum, R& reducer) {
|
||||||
R<half>& reducer) {
|
|
||||||
half2* houtput=reinterpret_cast<half2*>(output);
|
half2* houtput=reinterpret_cast<half2*>(output);
|
||||||
half2* haccum=reinterpret_cast<half2*>(&accum);
|
half2* haccum=reinterpret_cast<half2*>(&accum);
|
||||||
for(int i=0;i<4;++i){
|
for(int i=0;i<4;++i){
|
||||||
|
Loading…
x
Reference in New Issue
Block a user