mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-24 02:29:33 +08:00
make use of ei_pmadd in dot-product: will further improve performance
on architectures having a packed-mul-add assembly instruction.
This commit is contained in:
parent
3b94436d2f
commit
5b0da4b778
@ -108,8 +108,9 @@ struct ei_dot_vec_unroller
|
|||||||
|
|
||||||
inline static PacketScalar run(const Derived1& v1, const Derived2& v2)
|
inline static PacketScalar run(const Derived1& v1, const Derived2& v2)
|
||||||
{
|
{
|
||||||
return ei_padd(
|
return ei_pmadd(
|
||||||
ei_pmul(v1.template packet<Aligned>(row1, col1), v2.template packet<Aligned>(row2, col2)),
|
v1.template packet<Aligned>(row1, col1),
|
||||||
|
v2.template packet<Aligned>(row2, col2),
|
||||||
ei_dot_vec_unroller<Derived1, Derived2, Index+ei_packet_traits<Scalar>::size, Stop>::run(v1, v2)
|
ei_dot_vec_unroller<Derived1, Derived2, Index+ei_packet_traits<Scalar>::size, Stop>::run(v1, v2)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -192,12 +193,10 @@ struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
|
|||||||
const int col1 = rowVector1 ? index : 0;
|
const int col1 = rowVector1 ? index : 0;
|
||||||
const int row2 = rowVector2 ? 0 : index;
|
const int row2 = rowVector2 ? 0 : index;
|
||||||
const int col2 = rowVector2 ? index : 0;
|
const int col2 = rowVector2 ? index : 0;
|
||||||
packet_res = ei_padd(
|
packet_res = ei_pmadd(
|
||||||
packet_res,
|
v1.template packet<Aligned>(row1, col1),
|
||||||
ei_pmul(
|
v2.template packet<Aligned>(row2, col2),
|
||||||
v1.template packet<Aligned>(row1, col1),
|
packet_res
|
||||||
v2.template packet<Aligned>(row2, col2)
|
|
||||||
)
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
res = ei_predux(packet_res);
|
res = ei_predux(packet_res);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user