mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
quick temporary fix for a perf issue we just identified with
vectorization.... now the sum benchmark runs 3x faster with vectorization than without.
This commit is contained in:
parent
32596c5e9e
commit
03d19f3bae
@ -194,17 +194,32 @@ struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling>
|
|||||||
// do the vectorizable part of the sum
|
// do the vectorizable part of the sum
|
||||||
if(size >= packetSize)
|
if(size >= packetSize)
|
||||||
{
|
{
|
||||||
|
asm("#begin");
|
||||||
|
|
||||||
PacketScalar packet_res;
|
PacketScalar packet_res;
|
||||||
packet_res = mat.template packet<Aligned>(0, 0);
|
packet_res = mat.template packet<Aligned>(0, 0);
|
||||||
int index;
|
int index;
|
||||||
for(index = packetSize; index<alignedSize ; index+=packetSize)
|
if(Derived::IsVectorAtCompileTime)
|
||||||
{
|
{
|
||||||
// FIXME the following is not really efficient
|
for(index = packetSize; index<alignedSize ; index+=packetSize)
|
||||||
const int row = rowMajor ? index/innerSize : index%innerSize;
|
{
|
||||||
const int col = rowMajor ? index%innerSize : index/innerSize;
|
const int row = Derived::RowsAtCompileTime==1 ? 0 : index;
|
||||||
packet_res = ei_padd(packet_res, mat.template packet<Aligned>(row, col));
|
const int col = Derived::RowsAtCompileTime==1 ? index : 0;
|
||||||
|
packet_res = ei_padd(packet_res, mat.template packet<Aligned>(row, col));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for(index = packetSize; index<alignedSize ; index+=packetSize)
|
||||||
|
{
|
||||||
|
// FIXME the following is not really efficient
|
||||||
|
const int row = rowMajor ? index/innerSize : index%innerSize;
|
||||||
|
const int col = rowMajor ? index%innerSize : index/innerSize;
|
||||||
|
packet_res = ei_padd(packet_res, mat.template packet<Aligned>(row, col));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
res = ei_predux(packet_res);
|
res = ei_predux(packet_res);
|
||||||
|
asm("#end");
|
||||||
|
|
||||||
// now we must do the rest without vectorization.
|
// now we must do the rest without vectorization.
|
||||||
if(alignedSize == size) return res;
|
if(alignedSize == size) return res;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user