diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 95fc9fec6..bc06ca1f0 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -229,7 +229,7 @@ inline void TensorExecutor<Expression, GpuDevice, false>::run(const Expression&
 
 /*static*/
 template<typename Expression>
-inline void TensorExecutor<Expression, GpuDevice, false>::run(const Expression& expr, const GpuDevice& device)
+inline void TensorExecutor<Expression, GpuDevice, true>::run(const Expression& expr, const GpuDevice& device)
 {
   TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
   const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index d4e88fabd..c1cdb98a4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -336,6 +336,12 @@ struct FullReducer<Self, Op, ThreadPoolDevice, true> {
 };
 #endif
 
+
+#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
+template <int B, int N, typename S, typename R, typename I>
+__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*);
+#endif
+
 }  // end namespace internal