From 7f1c29fb0c26e92f31446926c441e13b0f6aec68 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 11 Nov 2015 15:22:50 -0800 Subject: [PATCH] Make it possible for a vectorized tensor expression to be executed in a CUDA kernel. --- unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index bc06ca1f0..956672771 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -50,6 +50,7 @@ class TensorExecutor { public: typedef typename Expression::Index Index; + EIGEN_DEVICE_FUNC static inline void run(const Expression& expr, const DefaultDevice& device = DefaultDevice()) { TensorEvaluator evaluator(expr, device); @@ -57,7 +58,7 @@ class TensorExecutor if (needs_assign) { const Index size = array_prod(evaluator.dimensions()); - static const int PacketSize = unpacket_traits::PacketReturnType>::size; + const int PacketSize = unpacket_traits::PacketReturnType>::size; const Index VectorizedSize = (size / PacketSize) * PacketSize; for (Index i = 0; i < VectorizedSize; i += PacketSize) {