Get rid of nested template specialization in TensorReductionGpu.h, which was broken by c6953f799b01d36f4236b64f351cc1446e0abe17.

2025-09-18 04:13:14 +08:00 · 2020-10-13 23:53:11 +00:00 · 2020-10-13 23:53:11 +00:00 · 61fc78bbda
commit 61fc78bbda
parent c6953f799b
1 changed files with 4 additions and 5 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
@ -80,8 +80,8 @@ __device__ inline double atomicExchCustom(double* address, double val) {
 }
 #ifdef EIGEN_HAS_GPU_FP16
-template <template <typename T> class R>
+template <typename R>
-__device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer) {
+__device__ inline void atomicReduce(half2* output, half2 accum, R& reducer) {
  unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
  unsigned int newval = oldval;
  reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval));
@ -99,9 +99,8 @@ __device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer
  }
 }
 // reduction should be associative since reduction is not atomic in wide vector but atomic in half2 operations
-template <template <typename T> class R>
+template <typename R>
-__device__ inline void atomicReduce(Packet4h2* output, Packet4h2 accum,
+__device__ inline void atomicReduce(Packet4h2* output, Packet4h2 accum, R& reducer) {
                                    R<half>& reducer) {
  half2* houtput=reinterpret_cast<half2*>(output);
  half2* haccum=reinterpret_cast<half2*>(&accum);
  for(int i=0;i<4;++i){