mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
Improve mixing of complex and real in the vectorized path of apply_rotation_in_the_plane
This commit is contained in:
parent
f75dfdda7e
commit
d9084ac8e1
@ -302,8 +302,12 @@ template<typename VectorX, typename VectorY, typename OtherScalar>
|
|||||||
void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
|
void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
|
||||||
{
|
{
|
||||||
typedef typename VectorX::Scalar Scalar;
|
typedef typename VectorX::Scalar Scalar;
|
||||||
enum { PacketSize = packet_traits<Scalar>::size };
|
enum {
|
||||||
|
PacketSize = packet_traits<Scalar>::size,
|
||||||
|
OtherPacketSize = packet_traits<OtherScalar>::size
|
||||||
|
};
|
||||||
typedef typename packet_traits<Scalar>::type Packet;
|
typedef typename packet_traits<Scalar>::type Packet;
|
||||||
|
typedef typename packet_traits<OtherScalar>::type OtherPacket;
|
||||||
eigen_assert(xpr_x.size() == xpr_y.size());
|
eigen_assert(xpr_x.size() == xpr_y.size());
|
||||||
Index size = xpr_x.size();
|
Index size = xpr_x.size();
|
||||||
Index incrx = xpr_x.derived().innerStride();
|
Index incrx = xpr_x.derived().innerStride();
|
||||||
@ -321,6 +325,7 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
|
|||||||
|
|
||||||
if(VectorX::SizeAtCompileTime == Dynamic &&
|
if(VectorX::SizeAtCompileTime == Dynamic &&
|
||||||
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
|
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
|
||||||
|
(PacketSize == OtherPacketSize) &&
|
||||||
((incrx==1 && incry==1) || PacketSize == 1))
|
((incrx==1 && incry==1) || PacketSize == 1))
|
||||||
{
|
{
|
||||||
// both vectors are sequentially stored in memory => vectorization
|
// both vectors are sequentially stored in memory => vectorization
|
||||||
@ -329,9 +334,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
|
|||||||
Index alignedStart = internal::first_default_aligned(y, size);
|
Index alignedStart = internal::first_default_aligned(y, size);
|
||||||
Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
|
Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
|
||||||
|
|
||||||
const Packet pc = pset1<Packet>(c);
|
const OtherPacket pc = pset1<OtherPacket>(c);
|
||||||
const Packet ps = pset1<Packet>(s);
|
const OtherPacket ps = pset1<OtherPacket>(s);
|
||||||
conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj;
|
conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
|
||||||
|
conj_helper<OtherPacket,Packet,false,false> pm;
|
||||||
|
|
||||||
for(Index i=0; i<alignedStart; ++i)
|
for(Index i=0; i<alignedStart; ++i)
|
||||||
{
|
{
|
||||||
@ -350,8 +356,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
|
|||||||
{
|
{
|
||||||
Packet xi = pload<Packet>(px);
|
Packet xi = pload<Packet>(px);
|
||||||
Packet yi = pload<Packet>(py);
|
Packet yi = pload<Packet>(py);
|
||||||
pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
|
pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
||||||
pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
|
pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
||||||
px += PacketSize;
|
px += PacketSize;
|
||||||
py += PacketSize;
|
py += PacketSize;
|
||||||
}
|
}
|
||||||
@ -365,10 +371,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
|
|||||||
Packet xi1 = ploadu<Packet>(px+PacketSize);
|
Packet xi1 = ploadu<Packet>(px+PacketSize);
|
||||||
Packet yi = pload <Packet>(py);
|
Packet yi = pload <Packet>(py);
|
||||||
Packet yi1 = pload <Packet>(py+PacketSize);
|
Packet yi1 = pload <Packet>(py+PacketSize);
|
||||||
pstoreu(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
|
pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
||||||
pstoreu(px+PacketSize, padd(pmul(pc,xi1),pcj.pmul(ps,yi1)));
|
pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));
|
||||||
pstore (py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
|
pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
||||||
pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pmul(ps,xi1)));
|
pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));
|
||||||
px += Peeling*PacketSize;
|
px += Peeling*PacketSize;
|
||||||
py += Peeling*PacketSize;
|
py += Peeling*PacketSize;
|
||||||
}
|
}
|
||||||
@ -376,8 +382,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
|
|||||||
{
|
{
|
||||||
Packet xi = ploadu<Packet>(x+peelingEnd);
|
Packet xi = ploadu<Packet>(x+peelingEnd);
|
||||||
Packet yi = pload <Packet>(y+peelingEnd);
|
Packet yi = pload <Packet>(y+peelingEnd);
|
||||||
pstoreu(x+peelingEnd, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
|
pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
||||||
pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
|
pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -393,19 +399,21 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
|
|||||||
/*** fixed-size vectorized path ***/
|
/*** fixed-size vectorized path ***/
|
||||||
else if(VectorX::SizeAtCompileTime != Dynamic &&
|
else if(VectorX::SizeAtCompileTime != Dynamic &&
|
||||||
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
|
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
|
||||||
|
(PacketSize == OtherPacketSize) &&
|
||||||
(EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment
|
(EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment
|
||||||
{
|
{
|
||||||
const Packet pc = pset1<Packet>(c);
|
const OtherPacket pc = pset1<OtherPacket>(c);
|
||||||
const Packet ps = pset1<Packet>(s);
|
const OtherPacket ps = pset1<OtherPacket>(s);
|
||||||
conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj;
|
conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
|
||||||
|
conj_helper<OtherPacket,Packet,false,false> pm;
|
||||||
Scalar* EIGEN_RESTRICT px = x;
|
Scalar* EIGEN_RESTRICT px = x;
|
||||||
Scalar* EIGEN_RESTRICT py = y;
|
Scalar* EIGEN_RESTRICT py = y;
|
||||||
for(Index i=0; i<size; i+=PacketSize)
|
for(Index i=0; i<size; i+=PacketSize)
|
||||||
{
|
{
|
||||||
Packet xi = pload<Packet>(px);
|
Packet xi = pload<Packet>(px);
|
||||||
Packet yi = pload<Packet>(py);
|
Packet yi = pload<Packet>(py);
|
||||||
pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
|
pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
||||||
pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
|
pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
||||||
px += PacketSize;
|
px += PacketSize;
|
||||||
py += PacketSize;
|
py += PacketSize;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user