mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
Make it possible for a vectorized tensor expression to be executed in a CUDA kernel.
This commit is contained in:
parent
4f471146fb
commit
7f1c29fb0c
@ -50,6 +50,7 @@ class TensorExecutor<Expression, DefaultDevice, true>
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
typedef typename Expression::Index Index;
|
typedef typename Expression::Index Index;
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
static inline void run(const Expression& expr, const DefaultDevice& device = DefaultDevice())
|
static inline void run(const Expression& expr, const DefaultDevice& device = DefaultDevice())
|
||||||
{
|
{
|
||||||
TensorEvaluator<Expression, DefaultDevice> evaluator(expr, device);
|
TensorEvaluator<Expression, DefaultDevice> evaluator(expr, device);
|
||||||
@ -57,7 +58,7 @@ class TensorExecutor<Expression, DefaultDevice, true>
|
|||||||
if (needs_assign)
|
if (needs_assign)
|
||||||
{
|
{
|
||||||
const Index size = array_prod(evaluator.dimensions());
|
const Index size = array_prod(evaluator.dimensions());
|
||||||
static const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size;
|
const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size;
|
||||||
const Index VectorizedSize = (size / PacketSize) * PacketSize;
|
const Index VectorizedSize = (size / PacketSize) * PacketSize;
|
||||||
|
|
||||||
for (Index i = 0; i < VectorizedSize; i += PacketSize) {
|
for (Index i = 0; i < VectorizedSize; i += PacketSize) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user