mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Implement true compile-time "if" for apply_rotation_in_the_plane. This fixes a compilation issue for vectorized real type with missing vectorization for complexes, e.g. AVX512.
This commit is contained in:
parent
80142362ac
commit
b35d1ce4a5
@ -309,16 +309,144 @@ inline void MatrixBase<Derived>::applyOnTheRight(Index p, Index q, const JacobiR
|
|||||||
}
|
}
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
|
template<typename Scalar, typename OtherScalar,
|
||||||
|
int SizeAtCompileTime, int MinAlignment, bool Vectorizable>
|
||||||
|
struct apply_rotation_in_the_plane_selector
|
||||||
|
{
|
||||||
|
static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
|
||||||
|
{
|
||||||
|
for(Index i=0; i<size; ++i)
|
||||||
|
{
|
||||||
|
Scalar xi = *x;
|
||||||
|
Scalar yi = *y;
|
||||||
|
*x = c * xi + numext::conj(s) * yi;
|
||||||
|
*y = -s * xi + numext::conj(c) * yi;
|
||||||
|
x += incrx;
|
||||||
|
y += incry;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Scalar, typename OtherScalar,
|
||||||
|
int SizeAtCompileTime, int MinAlignment>
|
||||||
|
struct apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,true /* vectorizable */>
|
||||||
|
{
|
||||||
|
static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
|
||||||
|
{
|
||||||
|
enum {
|
||||||
|
PacketSize = packet_traits<Scalar>::size,
|
||||||
|
OtherPacketSize = packet_traits<OtherScalar>::size
|
||||||
|
};
|
||||||
|
typedef typename packet_traits<Scalar>::type Packet;
|
||||||
|
typedef typename packet_traits<OtherScalar>::type OtherPacket;
|
||||||
|
|
||||||
|
/*** dynamic-size vectorized paths ***/
|
||||||
|
if(SizeAtCompileTime == Dynamic && ((incrx==1 && incry==1) || PacketSize == 1))
|
||||||
|
{
|
||||||
|
// both vectors are sequentially stored in memory => vectorization
|
||||||
|
enum { Peeling = 2 };
|
||||||
|
|
||||||
|
Index alignedStart = internal::first_default_aligned(y, size);
|
||||||
|
Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
|
||||||
|
|
||||||
|
const OtherPacket pc = pset1<OtherPacket>(c);
|
||||||
|
const OtherPacket ps = pset1<OtherPacket>(s);
|
||||||
|
conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
|
||||||
|
conj_helper<OtherPacket,Packet,false,false> pm;
|
||||||
|
|
||||||
|
for(Index i=0; i<alignedStart; ++i)
|
||||||
|
{
|
||||||
|
Scalar xi = x[i];
|
||||||
|
Scalar yi = y[i];
|
||||||
|
x[i] = c * xi + numext::conj(s) * yi;
|
||||||
|
y[i] = -s * xi + numext::conj(c) * yi;
|
||||||
|
}
|
||||||
|
|
||||||
|
Scalar* EIGEN_RESTRICT px = x + alignedStart;
|
||||||
|
Scalar* EIGEN_RESTRICT py = y + alignedStart;
|
||||||
|
|
||||||
|
if(internal::first_default_aligned(x, size)==alignedStart)
|
||||||
|
{
|
||||||
|
for(Index i=alignedStart; i<alignedEnd; i+=PacketSize)
|
||||||
|
{
|
||||||
|
Packet xi = pload<Packet>(px);
|
||||||
|
Packet yi = pload<Packet>(py);
|
||||||
|
pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
||||||
|
pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
||||||
|
px += PacketSize;
|
||||||
|
py += PacketSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize);
|
||||||
|
for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize)
|
||||||
|
{
|
||||||
|
Packet xi = ploadu<Packet>(px);
|
||||||
|
Packet xi1 = ploadu<Packet>(px+PacketSize);
|
||||||
|
Packet yi = pload <Packet>(py);
|
||||||
|
Packet yi1 = pload <Packet>(py+PacketSize);
|
||||||
|
pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
||||||
|
pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));
|
||||||
|
pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
||||||
|
pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));
|
||||||
|
px += Peeling*PacketSize;
|
||||||
|
py += Peeling*PacketSize;
|
||||||
|
}
|
||||||
|
if(alignedEnd!=peelingEnd)
|
||||||
|
{
|
||||||
|
Packet xi = ploadu<Packet>(x+peelingEnd);
|
||||||
|
Packet yi = pload <Packet>(y+peelingEnd);
|
||||||
|
pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
||||||
|
pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(Index i=alignedEnd; i<size; ++i)
|
||||||
|
{
|
||||||
|
Scalar xi = x[i];
|
||||||
|
Scalar yi = y[i];
|
||||||
|
x[i] = c * xi + numext::conj(s) * yi;
|
||||||
|
y[i] = -s * xi + numext::conj(c) * yi;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*** fixed-size vectorized path ***/
|
||||||
|
else if(SizeAtCompileTime != Dynamic && MinAlignment>0) // FIXME should be compared to the required alignment
|
||||||
|
{
|
||||||
|
const OtherPacket pc = pset1<OtherPacket>(c);
|
||||||
|
const OtherPacket ps = pset1<OtherPacket>(s);
|
||||||
|
conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
|
||||||
|
conj_helper<OtherPacket,Packet,false,false> pm;
|
||||||
|
Scalar* EIGEN_RESTRICT px = x;
|
||||||
|
Scalar* EIGEN_RESTRICT py = y;
|
||||||
|
for(Index i=0; i<size; i+=PacketSize)
|
||||||
|
{
|
||||||
|
Packet xi = pload<Packet>(px);
|
||||||
|
Packet yi = pload<Packet>(py);
|
||||||
|
pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
||||||
|
pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
||||||
|
px += PacketSize;
|
||||||
|
py += PacketSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*** non-vectorized path ***/
|
||||||
|
else
|
||||||
|
{
|
||||||
|
apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,false>::run(x,incrx,y,incry,size,c,s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template<typename VectorX, typename VectorY, typename OtherScalar>
|
template<typename VectorX, typename VectorY, typename OtherScalar>
|
||||||
void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
|
void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
|
||||||
{
|
{
|
||||||
typedef typename VectorX::Scalar Scalar;
|
typedef typename VectorX::Scalar Scalar;
|
||||||
enum {
|
const bool Vectorizable = (VectorX::Flags & VectorY::Flags & PacketAccessBit)
|
||||||
PacketSize = packet_traits<Scalar>::size,
|
&& (int(packet_traits<Scalar>::size) == int(packet_traits<OtherScalar>::size));
|
||||||
OtherPacketSize = packet_traits<OtherScalar>::size
|
|
||||||
};
|
|
||||||
typedef typename packet_traits<Scalar>::type Packet;
|
|
||||||
typedef typename packet_traits<OtherScalar>::type OtherPacket;
|
|
||||||
eigen_assert(xpr_x.size() == xpr_y.size());
|
eigen_assert(xpr_x.size() == xpr_y.size());
|
||||||
Index size = xpr_x.size();
|
Index size = xpr_x.size();
|
||||||
Index incrx = xpr_x.derived().innerStride();
|
Index incrx = xpr_x.derived().innerStride();
|
||||||
@ -332,117 +460,11 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
|
|||||||
if (c==OtherScalar(1) && s==OtherScalar(0))
|
if (c==OtherScalar(1) && s==OtherScalar(0))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*** dynamic-size vectorized paths ***/
|
apply_rotation_in_the_plane_selector<
|
||||||
|
Scalar,OtherScalar,
|
||||||
if(VectorX::SizeAtCompileTime == Dynamic &&
|
VectorX::SizeAtCompileTime,
|
||||||
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
|
EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment),
|
||||||
(PacketSize == OtherPacketSize) &&
|
Vectorizable>::run(x,incrx,y,incry,size,c,s);
|
||||||
((incrx==1 && incry==1) || PacketSize == 1))
|
|
||||||
{
|
|
||||||
// both vectors are sequentially stored in memory => vectorization
|
|
||||||
enum { Peeling = 2 };
|
|
||||||
|
|
||||||
Index alignedStart = internal::first_default_aligned(y, size);
|
|
||||||
Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
|
|
||||||
|
|
||||||
const OtherPacket pc = pset1<OtherPacket>(c);
|
|
||||||
const OtherPacket ps = pset1<OtherPacket>(s);
|
|
||||||
conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
|
|
||||||
conj_helper<OtherPacket,Packet,false,false> pm;
|
|
||||||
|
|
||||||
for(Index i=0; i<alignedStart; ++i)
|
|
||||||
{
|
|
||||||
Scalar xi = x[i];
|
|
||||||
Scalar yi = y[i];
|
|
||||||
x[i] = c * xi + numext::conj(s) * yi;
|
|
||||||
y[i] = -s * xi + numext::conj(c) * yi;
|
|
||||||
}
|
|
||||||
|
|
||||||
Scalar* EIGEN_RESTRICT px = x + alignedStart;
|
|
||||||
Scalar* EIGEN_RESTRICT py = y + alignedStart;
|
|
||||||
|
|
||||||
if(internal::first_default_aligned(x, size)==alignedStart)
|
|
||||||
{
|
|
||||||
for(Index i=alignedStart; i<alignedEnd; i+=PacketSize)
|
|
||||||
{
|
|
||||||
Packet xi = pload<Packet>(px);
|
|
||||||
Packet yi = pload<Packet>(py);
|
|
||||||
pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
|
||||||
pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
|
||||||
px += PacketSize;
|
|
||||||
py += PacketSize;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize);
|
|
||||||
for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize)
|
|
||||||
{
|
|
||||||
Packet xi = ploadu<Packet>(px);
|
|
||||||
Packet xi1 = ploadu<Packet>(px+PacketSize);
|
|
||||||
Packet yi = pload <Packet>(py);
|
|
||||||
Packet yi1 = pload <Packet>(py+PacketSize);
|
|
||||||
pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
|
||||||
pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));
|
|
||||||
pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
|
||||||
pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));
|
|
||||||
px += Peeling*PacketSize;
|
|
||||||
py += Peeling*PacketSize;
|
|
||||||
}
|
|
||||||
if(alignedEnd!=peelingEnd)
|
|
||||||
{
|
|
||||||
Packet xi = ploadu<Packet>(x+peelingEnd);
|
|
||||||
Packet yi = pload <Packet>(y+peelingEnd);
|
|
||||||
pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
|
||||||
pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for(Index i=alignedEnd; i<size; ++i)
|
|
||||||
{
|
|
||||||
Scalar xi = x[i];
|
|
||||||
Scalar yi = y[i];
|
|
||||||
x[i] = c * xi + numext::conj(s) * yi;
|
|
||||||
y[i] = -s * xi + numext::conj(c) * yi;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*** fixed-size vectorized path ***/
|
|
||||||
else if(VectorX::SizeAtCompileTime != Dynamic &&
|
|
||||||
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
|
|
||||||
(PacketSize == OtherPacketSize) &&
|
|
||||||
(EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment
|
|
||||||
{
|
|
||||||
const OtherPacket pc = pset1<OtherPacket>(c);
|
|
||||||
const OtherPacket ps = pset1<OtherPacket>(s);
|
|
||||||
conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
|
|
||||||
conj_helper<OtherPacket,Packet,false,false> pm;
|
|
||||||
Scalar* EIGEN_RESTRICT px = x;
|
|
||||||
Scalar* EIGEN_RESTRICT py = y;
|
|
||||||
for(Index i=0; i<size; i+=PacketSize)
|
|
||||||
{
|
|
||||||
Packet xi = pload<Packet>(px);
|
|
||||||
Packet yi = pload<Packet>(py);
|
|
||||||
pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
|
|
||||||
pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
|
|
||||||
px += PacketSize;
|
|
||||||
py += PacketSize;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*** non-vectorized path ***/
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for(Index i=0; i<size; ++i)
|
|
||||||
{
|
|
||||||
Scalar xi = *x;
|
|
||||||
Scalar yi = *y;
|
|
||||||
*x = c * xi + numext::conj(s) * yi;
|
|
||||||
*y = -s * xi + numext::conj(c) * yi;
|
|
||||||
x += incrx;
|
|
||||||
y += incry;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
Loading…
x
Reference in New Issue
Block a user