mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 19:29:02 +08:00
Make it possible for a vectorized tensor expression to be executed in a CUDA kernel.
This commit is contained in:
parent
4f471146fb
commit
7f1c29fb0c
@ -50,6 +50,7 @@ class TensorExecutor<Expression, DefaultDevice, true>
|
||||
{
|
||||
public:
|
||||
typedef typename Expression::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(const Expression& expr, const DefaultDevice& device = DefaultDevice())
|
||||
{
|
||||
TensorEvaluator<Expression, DefaultDevice> evaluator(expr, device);
|
||||
@ -57,7 +58,7 @@ class TensorExecutor<Expression, DefaultDevice, true>
|
||||
if (needs_assign)
|
||||
{
|
||||
const Index size = array_prod(evaluator.dimensions());
|
||||
static const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size;
|
||||
const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size;
|
||||
const Index VectorizedSize = (size / PacketSize) * PacketSize;
|
||||
|
||||
for (Index i = 0; i < VectorizedSize; i += PacketSize) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user