mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-21 09:09:36 +08:00
quick temporary fix for a perf issue we just identified with
vectorization.... now the sum benchmark runs 3x faster with vectorization than without.
This commit is contained in:
parent
32596c5e9e
commit
03d19f3bae
@ -194,9 +194,22 @@ struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling>
|
||||
// do the vectorizable part of the sum
|
||||
if(size >= packetSize)
|
||||
{
|
||||
asm("#begin");
|
||||
|
||||
PacketScalar packet_res;
|
||||
packet_res = mat.template packet<Aligned>(0, 0);
|
||||
int index;
|
||||
if(Derived::IsVectorAtCompileTime)
|
||||
{
|
||||
for(index = packetSize; index<alignedSize ; index+=packetSize)
|
||||
{
|
||||
const int row = Derived::RowsAtCompileTime==1 ? 0 : index;
|
||||
const int col = Derived::RowsAtCompileTime==1 ? index : 0;
|
||||
packet_res = ei_padd(packet_res, mat.template packet<Aligned>(row, col));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(index = packetSize; index<alignedSize ; index+=packetSize)
|
||||
{
|
||||
// FIXME the following is not really efficient
|
||||
@ -204,7 +217,9 @@ struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling>
|
||||
const int col = rowMajor ? index%innerSize : index/innerSize;
|
||||
packet_res = ei_padd(packet_res, mat.template packet<Aligned>(row, col));
|
||||
}
|
||||
}
|
||||
res = ei_predux(packet_res);
|
||||
asm("#end");
|
||||
|
||||
// now we must do the rest without vectorization.
|
||||
if(alignedSize == size) return res;
|
||||
|
Loading…
x
Reference in New Issue
Block a user