mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-03 01:04:23 +08:00
Fixed the evaluation of expressions involving tensors of 2 or 3 elements on CUDA devices.
This commit is contained in:
parent
1d3c8306f8
commit
b33cf92878
@ -168,11 +168,10 @@ __launch_bounds__(1024)
|
|||||||
const Index PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size;
|
const Index PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size;
|
||||||
const Index vectorized_step_size = step_size * PacketSize;
|
const Index vectorized_step_size = step_size * PacketSize;
|
||||||
const Index vectorized_size = (size / PacketSize) * PacketSize;
|
const Index vectorized_size = (size / PacketSize) * PacketSize;
|
||||||
Index i = first_index * PacketSize;
|
for (Index i = first_index * PacketSize; i < vectorized_size; i += vectorized_step_size) {
|
||||||
for ( ; i < vectorized_size; i += vectorized_step_size) {
|
|
||||||
eval.evalPacket(i);
|
eval.evalPacket(i);
|
||||||
}
|
}
|
||||||
for ( ; i < size; i += step_size) {
|
for (Index i = vectorized_size + first_index; i < size; i += step_size) {
|
||||||
eval.evalScalar(i);
|
eval.evalScalar(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user