Fixed CUDA compilation errors

2025-07-06 05:05:12 +08:00 · 2015-11-11 14:36:33 -08:00 · 2015-11-11 14:36:33 -08:00 · 5cb18e5b5e
commit 5cb18e5b5e
parent 228edfe616
2 changed files with 7 additions and 1 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@ -229,7 +229,7 @@ inline void TensorExecutor<Expression, GpuDevice, false>::run(const Expression&

 /*static*/
 template<typename Expression>
-inline void TensorExecutor<Expression, GpuDevice, false>::run(const Expression& expr, const GpuDevice& device)
+inline void TensorExecutor<Expression, GpuDevice, true>::run(const Expression& expr, const GpuDevice& device)
 {
  TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
  const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@ -336,6 +336,12 @@ struct FullReducer<Self, Op, ThreadPoolDevice, true> {
 };
 #endif

+
+#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
+template <int B, int N, typename S, typename R, typename I>
+__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*);
+#endif
+
 }  // end namespace internal