quick temporary fix for a perf issue we just identified with

vectorization....
now the sum benchmark runs 3x faster with vectorization than without.
This commit is contained in:
Benoit Jacob 2008-06-23 11:23:05 +00:00
parent 32596c5e9e
commit 03d19f3bae

View File

@ -194,9 +194,22 @@ struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling>
// do the vectorizable part of the sum
if(size >= packetSize)
{
asm("#begin");
PacketScalar packet_res;
packet_res = mat.template packet<Aligned>(0, 0);
int index;
if(Derived::IsVectorAtCompileTime)
{
for(index = packetSize; index<alignedSize ; index+=packetSize)
{
const int row = Derived::RowsAtCompileTime==1 ? 0 : index;
const int col = Derived::RowsAtCompileTime==1 ? index : 0;
packet_res = ei_padd(packet_res, mat.template packet<Aligned>(row, col));
}
}
else
{
for(index = packetSize; index<alignedSize ; index+=packetSize)
{
// FIXME the following is not really efficient
@ -204,7 +217,9 @@ struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling>
const int col = rowMajor ? index%innerSize : index/innerSize;
packet_res = ei_padd(packet_res, mat.template packet<Aligned>(row, col));
}
}
res = ei_predux(packet_res);
asm("#end");
// now we must do the rest without vectorization.
if(alignedSize == size) return res;