diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 885295f0a..97f4b34b3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -322,6 +322,12 @@ struct OuterReducer { template __global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*); + +template +__global__ void ReductionInitKernelHalfFloat(R, const S, I, half2*); +template +__global__ void FullReductionKernelHalfFloat(R, const S, I, half*, half2*); + template __global__ void InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); @@ -618,6 +624,8 @@ struct TensorEvaluator, Device> #endif #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) template friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*); + template friend void internal::ReductionInitKernelHalfFloat(R, const S, I, half2*); + template friend void internal::FullReductionKernelHalfFloat(R, const S, I, half*, half2*); template friend void internal::InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); template friend void internal::OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index fd2587dd5..9186dffe4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -67,6 +67,30 @@ __device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) #endif } + +template