Make it possible for a vectorized tensor expression to be executed in a CUDA kernel.

2025-09-27 00:33:24 +08:00 · 2015-11-11 15:22:50 -08:00 · 2015-11-11 15:22:50 -08:00 · 7f1c29fb0c
commit 7f1c29fb0c
parent 4f471146fb
1 changed files with 2 additions and 1 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@ -50,6 +50,7 @@ class TensorExecutor<Expression, DefaultDevice, true>
 {
 public:
  typedef typename Expression::Index Index;
+  EIGEN_DEVICE_FUNC
  static inline void run(const Expression& expr, const DefaultDevice& device = DefaultDevice())
  {
    TensorEvaluator<Expression, DefaultDevice> evaluator(expr, device);
@ -57,7 +58,7 @@ class TensorExecutor<Expression, DefaultDevice, true>
    if (needs_assign)
    {
      const Index size = array_prod(evaluator.dimensions());
-      static const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size;
+      const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size;
      const Index VectorizedSize = (size / PacketSize) * PacketSize;

      for (Index i = 0; i < VectorizedSize; i += PacketSize) {