From c36c09169e1545e287293f3f145fa5a25b47b84a Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Wed, 24 Feb 2016 17:07:25 -0800
Subject: [PATCH] Fixed a typo in the reduction code that could prevent large
 full reductionsx from running properly on old cuda devices.

---
 unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index 22aea5ea4..f7c1a5cf4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -515,7 +515,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
     // Use the FullReducer if possible.
     if (RunningFullReduction && internal::FullReducer<Self, Op, Device>::HasOptimizedImplementation &&
         ((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) ||
-         (internal::array_prod(m_impl.dimensions()) > 1024 * 1024))) {
+         (!RunningOnGPU && (internal::array_prod(m_impl.dimensions()) > 1024 * 1024)))) {
 
       bool need_assign = false;
       if (!data) {