mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-10-22 13:01:06 +08:00
Optimize ApplyOnTheRight for Jacobi rotations when FMA is available
This commit is contained in:
parent
3bd0bfe0e0
commit
565db1c603
@ -17,6 +17,9 @@
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, const PACKET_CPLX& y, const PACKET_CPLX& c) const { \
|
||||
return padd(c, this->pmul(x, y)); \
|
||||
} \
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmsub(const PACKET_REAL& x, const PACKET_CPLX& y, const PACKET_CPLX& c) const { \
|
||||
return psub(this->pmul(x, y), c); \
|
||||
} \
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, const PACKET_CPLX& y) const { \
|
||||
return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x, y.v)); \
|
||||
} \
|
||||
@ -27,6 +30,9 @@
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, const PACKET_REAL& y, const PACKET_CPLX& c) const { \
|
||||
return padd(c, this->pmul(x, y)); \
|
||||
} \
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmsub(const PACKET_CPLX& x, const PACKET_REAL& y, const PACKET_CPLX& c) const { \
|
||||
return psub(this->pmul(x, y), c); \
|
||||
} \
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, const PACKET_REAL& y) const { \
|
||||
return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x.v, y)); \
|
||||
} \
|
||||
@ -76,6 +82,11 @@ struct conj_helper {
|
||||
return this->pmul(x, y) + c;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType pmsub(const LhsType& x, const RhsType& y,
|
||||
const ResultType& c) const {
|
||||
return this->pmul(x, y) - c;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType pmul(const LhsType& x, const RhsType& y) const {
|
||||
return conj_if<ConjLhs>()(x) * conj_if<ConjRhs>()(y);
|
||||
}
|
||||
@ -104,6 +115,10 @@ struct conj_helper<Packet, Packet, ConjLhs, ConjRhs> {
|
||||
return Eigen::internal::pmadd(conj_if<ConjLhs>().pconj(x), conj_if<ConjRhs>().pconj(y), c);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pmsub(const Packet& x, const Packet& y, const Packet& c) const {
|
||||
return Eigen::internal::pmsub(conj_if<ConjLhs>().pconj(x), conj_if<ConjRhs>().pconj(y), c);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pmul(const Packet& x, const Packet& y) const {
|
||||
return Eigen::internal::pmul(conj_if<ConjLhs>().pconj(x), conj_if<ConjRhs>().pconj(y));
|
||||
}
|
||||
@ -116,6 +131,9 @@ struct conj_helper<Packet, Packet, true, true> {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pmadd(const Packet& x, const Packet& y, const Packet& c) const {
|
||||
return Eigen::internal::pmadd(pconj(x), pconj(y), c);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pmsub(const Packet& x, const Packet& y, const Packet& c) const {
|
||||
return Eigen::internal::pmsub(pconj(x), pconj(y), c);
|
||||
}
|
||||
// We save a conjuation by using the identity conj(a)*conj(b) = conj(a*b).
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pmul(const Packet& x, const Packet& y) const {
|
||||
return pconj(Eigen::internal::pmul(x, y));
|
||||
|
@ -335,8 +335,8 @@ struct apply_rotation_in_the_plane_selector<Scalar, OtherScalar, SizeAtCompileTi
|
||||
for (Index i = alignedStart; i < alignedEnd; i += PacketSize) {
|
||||
Packet xi = pload<Packet>(px);
|
||||
Packet yi = pload<Packet>(py);
|
||||
pstore(px, padd(pm.pmul(pc, xi), pcj.pmul(ps, yi)));
|
||||
pstore(py, psub(pcj.pmul(pc, yi), pm.pmul(ps, xi)));
|
||||
pstore(px, pm.pmadd(pc, xi, pcj.pmul(ps, yi)));
|
||||
pstore(py, pcj.pmsub(pc, yi, pm.pmul(ps, xi)));
|
||||
px += PacketSize;
|
||||
py += PacketSize;
|
||||
}
|
||||
@ -347,18 +347,18 @@ struct apply_rotation_in_the_plane_selector<Scalar, OtherScalar, SizeAtCompileTi
|
||||
Packet xi1 = ploadu<Packet>(px + PacketSize);
|
||||
Packet yi = pload<Packet>(py);
|
||||
Packet yi1 = pload<Packet>(py + PacketSize);
|
||||
pstoreu(px, padd(pm.pmul(pc, xi), pcj.pmul(ps, yi)));
|
||||
pstoreu(px + PacketSize, padd(pm.pmul(pc, xi1), pcj.pmul(ps, yi1)));
|
||||
pstore(py, psub(pcj.pmul(pc, yi), pm.pmul(ps, xi)));
|
||||
pstore(py + PacketSize, psub(pcj.pmul(pc, yi1), pm.pmul(ps, xi1)));
|
||||
pstoreu(px, pm.pmadd(pc, xi, pcj.pmul(ps, yi)));
|
||||
pstoreu(px + PacketSize, pm.pmadd(pc, xi1, pcj.pmul(ps, yi1)));
|
||||
pstore(py, pcj.pmsub(pc, yi, pm.pmul(ps, xi)));
|
||||
pstore(py + PacketSize, pcj.pmsub(pc, yi1, pm.pmul(ps, xi1)));
|
||||
px += Peeling * PacketSize;
|
||||
py += Peeling * PacketSize;
|
||||
}
|
||||
if (alignedEnd != peelingEnd) {
|
||||
Packet xi = ploadu<Packet>(x + peelingEnd);
|
||||
Packet yi = pload<Packet>(y + peelingEnd);
|
||||
pstoreu(x + peelingEnd, padd(pm.pmul(pc, xi), pcj.pmul(ps, yi)));
|
||||
pstore(y + peelingEnd, psub(pcj.pmul(pc, yi), pm.pmul(ps, xi)));
|
||||
pstoreu(x + peelingEnd, pm.pmadd(pc, xi, pcj.pmul(ps, yi)));
|
||||
pstore(y + peelingEnd, pcj.pmsub(pc, yi, pm.pmul(ps, xi)));
|
||||
}
|
||||
}
|
||||
|
||||
@ -381,8 +381,8 @@ struct apply_rotation_in_the_plane_selector<Scalar, OtherScalar, SizeAtCompileTi
|
||||
for (Index i = 0; i < size; i += PacketSize) {
|
||||
Packet xi = pload<Packet>(px);
|
||||
Packet yi = pload<Packet>(py);
|
||||
pstore(px, padd(pm.pmul(pc, xi), pcj.pmul(ps, yi)));
|
||||
pstore(py, psub(pcj.pmul(pc, yi), pm.pmul(ps, xi)));
|
||||
pstore(px, pm.pmadd(pc, xi, pcj.pmul(ps, yi)));
|
||||
pstore(py, pcj.pmsub(pc, yi, pm.pmul(ps, xi)));
|
||||
px += PacketSize;
|
||||
py += PacketSize;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user