Get rid of code duplication for conj_helper. For packets where LhsType=RhsType a single generic implementation suffices. For scalars, the generic implementation of pconj automatically forwards to numext::conj, so much of the existing specialization can be avoided. For mixed types we still need specializations.

This commit is contained in:
Rasmus Munk Larsen 2021-06-24 15:47:48 -07:00
parent 4ad30a73fc
commit 52a5f98212
9 changed files with 105 additions and 635 deletions

View File

@ -167,39 +167,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const P
Packet2cf(_mm256_extractf128_ps(a.v, 1))));
}
template<> struct conj_helper<Packet4cf, Packet4cf, false,true>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
{
return internal::pmul(a, pconj(b));
}
};
template<> struct conj_helper<Packet4cf, Packet4cf, true,false>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
{
return internal::pmul(pconj(a), b);
}
};
template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
{
return pconj(internal::pmul(a, b));
}
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
@ -350,39 +317,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const
Packet1cd(_mm256_extractf128_pd(a.v,1))));
}
template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
{
return internal::pmul(a, pconj(b));
}
};
template<> struct conj_helper<Packet2cd, Packet2cd, true,false>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
{
return internal::pmul(pconj(a), b);
}
};
template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
{
return pconj(internal::pmul(a, b));
}
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)

View File

@ -153,39 +153,6 @@ EIGEN_STRONG_INLINE Packet4cf predux_half_dowto4<Packet8cf>(const Packet8cf& a)
return Packet4cf(res);
}
template<> struct conj_helper<Packet8cf, Packet8cf, false,true>
{
EIGEN_STRONG_INLINE Packet8cf pmadd(const Packet8cf& x, const Packet8cf& y, const Packet8cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet8cf pmul(const Packet8cf& a, const Packet8cf& b) const
{
return internal::pmul(a, pconj(b));
}
};
template<> struct conj_helper<Packet8cf, Packet8cf, true,false>
{
EIGEN_STRONG_INLINE Packet8cf pmadd(const Packet8cf& x, const Packet8cf& y, const Packet8cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet8cf pmul(const Packet8cf& a, const Packet8cf& b) const
{
return internal::pmul(pconj(a), b);
}
};
template<> struct conj_helper<Packet8cf, Packet8cf, true,true>
{
EIGEN_STRONG_INLINE Packet8cf pmadd(const Packet8cf& x, const Packet8cf& y, const Packet8cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet8cf pmul(const Packet8cf& a, const Packet8cf& b) const
{
return pconj(internal::pmul(a, b));
}
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet8cf,Packet16f)
template<> EIGEN_STRONG_INLINE Packet8cf pdiv<Packet8cf>(const Packet8cf& a, const Packet8cf& b)

View File

@ -206,45 +206,12 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
return pfirst<Packet2cf>(prod);
}
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return internal::pmul(a, pconj(b));
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return internal::pmul(pconj(a), b);
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return pconj(internal::pmul(a, b));
}
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for AltiVec
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a, b);
Packet2cf res = pmul(a, pconj(b));
Packet4f s = pmul<Packet4f>(b.v, b.v);
return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
}
@ -404,45 +371,12 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Pack
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
return internal::pmul(a, pconj(b));
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
return internal::pmul(pconj(a), b);
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
return pconj(internal::pmul(a, b));
}
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for AltiVec
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
Packet1cd res = pmul(a,pconj(b));
Packet2d s = pmul<Packet2d>(b.v, b.v);
return Packet1cd(pdiv(res.v, padd<Packet2d>(s, vec_perm(s, s, p16uc_REVERSE64))));
}

View File

@ -11,19 +11,97 @@
#ifndef EIGEN_ARCH_CONJ_HELPER_H
#define EIGEN_ARCH_CONJ_HELPER_H
#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL) \
template<> struct conj_helper<PACKET_REAL, PACKET_CPLX, false,false> { \
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, const PACKET_CPLX& y, const PACKET_CPLX& c) const \
{ return padd(c, pmul(x,y)); } \
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, const PACKET_CPLX& y) const \
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x, y.v)); } \
}; \
\
template<> struct conj_helper<PACKET_CPLX, PACKET_REAL, false,false> { \
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, const PACKET_REAL& y, const PACKET_CPLX& c) const \
{ return padd(c, pmul(x,y)); } \
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, const PACKET_REAL& y) const \
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x.v, y)); } \
#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL) \
template <> \
struct conj_helper<PACKET_REAL, PACKET_CPLX, false, false> { \
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, \
const PACKET_CPLX& y, \
const PACKET_CPLX& c) const { \
return padd(c, this->pmul(x, y)); \
} \
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, \
const PACKET_CPLX& y) const { \
return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x, y.v)); \
} \
}; \
\
template <> \
struct conj_helper<PACKET_CPLX, PACKET_REAL, false, false> { \
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, \
const PACKET_REAL& y, \
const PACKET_CPLX& c) const { \
return padd(c, this->pmul(x, y)); \
} \
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, \
const PACKET_REAL& y) const { \
return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x.v, y)); \
} \
};
#endif // EIGEN_ARCH_CONJ_HELPER_H
namespace Eigen {
namespace internal {
template<bool Conjugate> struct conj_if;
template<> struct conj_if<true> {
template<typename T>
inline T operator()(const T& x) const { return numext::conj(x); }
template<typename T>
inline T pconj(const T& x) const { return internal::pconj(x); }
};
template<> struct conj_if<false> {
template<typename T>
inline const T& operator()(const T& x) const { return x; }
template<typename T>
inline const T& pconj(const T& x) const { return x; }
};
// Generic implementation.
template<typename LhsType, typename RhsType, bool ConjLhs, bool ConjRhs>
struct conj_helper
{
typedef typename ScalarBinaryOpTraits<LhsType,RhsType>::ReturnType ResultType;
EIGEN_STRONG_INLINE ResultType pmadd(const LhsType& x, const RhsType& y, const ResultType& c) const
{ return Eigen::internal::pmadd(conj_if<ConjLhs>().pconj(x), conj_if<ConjRhs>().pconj(y), c); }
EIGEN_STRONG_INLINE ResultType pmul(const LhsType& x, const RhsType& y) const
{ return Eigen::internal::pmul(conj_if<ConjLhs>().pconj(x), conj_if<ConjRhs>().pconj(y)); }
};
template<typename LhsType, typename RhsType>
struct conj_helper<LhsType, RhsType, true, true>
{
typedef typename ScalarBinaryOpTraits<LhsType,RhsType>::ReturnType ResultType;
EIGEN_STRONG_INLINE ResultType pmadd(const LhsType& x, const RhsType& y, const ResultType& c) const
{ return Eigen::internal::pmadd(pconj(x), pconj(y), c); }
// We save a conjuation by using the identity conj(a)*conj(b) = conj(a*b).
EIGEN_STRONG_INLINE ResultType pmul(const LhsType& x, const RhsType& y) const
{ return pconj(Eigen::internal::pmul(x, y)); }
};
// Generic implementation for mixed products of complex scalar types.
template<typename RealScalar,bool Conj> struct conj_helper<std::complex<RealScalar>, RealScalar, Conj,false>
{
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const RealScalar& y, const Scalar& c) const
{ return c + conj_if<Conj>().pconj(x) * y; }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const RealScalar& y) const
{ return conj_if<Conj>().pconj(x) * y; }
};
template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::complex<RealScalar>, false,Conj>
{
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const RealScalar& x, const Scalar& y, const Scalar& c) const
{ return c + pmul(x,y); }
EIGEN_STRONG_INLINE Scalar pmul(const RealScalar& x, const Scalar& y) const
{ return x * conj_if<Conj>().pconj(y); }
};
} // namespace internal
} // namespace Eigen
#endif // EIGEN_ARCH_CONJ_HELPER_H

View File

@ -305,42 +305,6 @@ EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a
(a.v[0] * a.v[3]) + (a.v[1] * a.v[2]));
}
template <>
struct conj_helper<Packet2cf, Packet2cf, false, true> {
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y,
const Packet2cf& c) const {
return padd(pmul(x, y), c);
}
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const {
return internal::pmul(a, pconj(b));
}
};
template <>
struct conj_helper<Packet2cf, Packet2cf, true, false> {
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y,
const Packet2cf& c) const {
return padd(pmul(x, y), c);
}
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const {
return internal::pmul(pconj(a), b);
}
};
template <>
struct conj_helper<Packet2cf, Packet2cf, true, true> {
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y,
const Packet2cf& c) const {
return padd(pmul(x, y), c);
}
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const {
return pconj(internal::pmul(a, b));
}
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf, Packet4f)
template <>
@ -644,42 +608,6 @@ EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd&
return pfirst(a);
}
template <>
struct conj_helper<Packet1cd, Packet1cd, false, true> {
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y,
const Packet1cd& c) const {
return padd(pmul(x, y), c);
}
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const {
return internal::pmul(a, pconj(b));
}
};
template <>
struct conj_helper<Packet1cd, Packet1cd, true, false> {
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y,
const Packet1cd& c) const {
return padd(pmul(x, y), c);
}
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const {
return internal::pmul(pconj(a), b);
}
};
template <>
struct conj_helper<Packet1cd, Packet1cd, true, true> {
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y,
const Packet1cd& c) const {
return padd(pmul(x, y), c);
}
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const {
return pconj(internal::pmul(a, b));
}
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd, Packet2d)
template <>

View File

@ -342,67 +342,13 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
return s;
}
template<> struct conj_helper<Packet1cf,Packet1cf,false,true>
{
EIGEN_STRONG_INLINE Packet1cf pmadd(const Packet1cf& x, const Packet1cf& y, const Packet1cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cf pmul(const Packet1cf& a, const Packet1cf& b) const
{ return internal::pmul(a, pconj(b)); }
};
template<> struct conj_helper<Packet1cf,Packet1cf,true,false>
{
EIGEN_STRONG_INLINE Packet1cf pmadd(const Packet1cf& x, const Packet1cf& y, const Packet1cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cf pmul(const Packet1cf& a, const Packet1cf& b) const
{ return internal::pmul(pconj(a), b); }
};
template<> struct conj_helper<Packet1cf,Packet1cf,true,true>
{
EIGEN_STRONG_INLINE Packet1cf pmadd(const Packet1cf& x, const Packet1cf& y, const Packet1cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cf pmul(const Packet1cf& a, const Packet1cf& b) const
{ return pconj(internal::pmul(a,b)); }
};
template<> struct conj_helper<Packet2cf,Packet2cf,false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{ return internal::pmul(a, pconj(b)); }
};
template<> struct conj_helper<Packet2cf,Packet2cf,true,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{ return internal::pmul(pconj(a), b); }
};
template<> struct conj_helper<Packet2cf,Packet2cf,true,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{ return pconj(internal::pmul(a,b)); }
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cf,Packet2f)
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
template<> EIGEN_STRONG_INLINE Packet1cf pdiv<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
{
// TODO optimize it for NEON
Packet1cf res = conj_helper<Packet1cf, Packet1cf, false, true>().pmul(a,b);
Packet1cf res = pmul(a, pconj(b));
Packet2f s, rev_s;
// this computes the norm
@ -414,7 +360,7 @@ template<> EIGEN_STRONG_INLINE Packet1cf pdiv<Packet1cf>(const Packet1cf& a, con
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for NEON
Packet2cf res = conj_helper<Packet2cf, Packet2cf, false, true>().pmul(a,b);
Packet2cf res = pmul(a,pconj(b));
Packet4f s, rev_s;
// this computes the norm
@ -603,39 +549,12 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Pack
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{ return internal::pmul(a, pconj(b)); }
};
template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{ return internal::pmul(pconj(a), b); }
};
template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{ return pconj(internal::pmul(a,b)); }
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for NEON
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
Packet1cd res = pmul(a,pconj(b));
Packet2d s = pmul<Packet2d>(b.v, b.v);
Packet2d rev_s = preverse<Packet2d>(s);

View File

@ -165,74 +165,21 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
}
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
EIGEN_STRONG_INLINE Packet2cf pcplxflip/* <Packet2cf> */(const Packet2cf& x)
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return internal::pmul(a, pconj(b));
#else
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
vec4f_swizzle1(b.v, 1, 0, 3, 2))));
#endif
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return internal::pmul(pconj(a), b);
#else
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
#endif
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return pconj(internal::pmul(a, b));
#else
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
vec4f_swizzle1(b.v, 1, 0, 3, 2))));
#endif
}
};
return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
}
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for SSE3 and 4
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
Packet2cf res = pmul(a, pconj(b));
__m128 s = _mm_mul_ps(b.v,b.v);
return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,vec4f_swizzle1(s, 1, 0, 3, 2))));
}
EIGEN_STRONG_INLINE Packet2cf pcplxflip/* <Packet2cf> */(const Packet2cf& x)
{
return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
}
//---------- double ----------
@ -348,66 +295,12 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const
return pfirst(a);
}
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return internal::pmul(a, pconj(b));
#else
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
vec2d_swizzle1(b.v, 1, 0))));
#endif
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return internal::pmul(pconj(a), b);
#else
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
vec2d_swizzle1(b.v, 1, 0)), mask)));
#endif
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
#ifdef EIGEN_VECTORIZE_SSE3
return pconj(internal::pmul(a, b));
#else
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
vec2d_swizzle1(b.v, 1, 0))));
#endif
}
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for SSE3 and 4
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
Packet1cd res = pmul(a,pconj(b));
__m128d s = _mm_mul_pd(b.v,b.v);
return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
}

View File

@ -165,45 +165,12 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const
{
return pfirst(a);
}
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
return internal::pmul(a, pconj(b));
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
return internal::pmul(pconj(a), b);
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
{
return pconj(internal::pmul(a, b));
}
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for AltiVec
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
Packet1cd res = pmul(a,pconj(b));
Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
}
@ -337,39 +304,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
return res;
}
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return internal::pmul(a, pconj(b));
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return internal::pmul(pconj(a), b);
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return pconj(internal::pmul(a, b));
}
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
@ -456,45 +390,12 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
return pfirst<Packet2cf>(prod);
}
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return internal::pmul(a, pconj(b));
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return internal::pmul(pconj(a), b);
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
{
return pconj(internal::pmul(a, b));
}
};
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for AltiVec
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a, b);
Packet2cf res = pmul(a, pconj(b));
Packet4f s = pmul<Packet4f>(b.v, b.v);
return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
}

View File

@ -39,90 +39,6 @@ template<typename Index,
typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version=Specialized>
struct general_matrix_vector_product;
template<bool Conjugate> struct conj_if;
template<> struct conj_if<true> {
template<typename T>
inline T operator()(const T& x) const { return numext::conj(x); }
template<typename T>
inline T pconj(const T& x) const { return internal::pconj(x); }
};
template<> struct conj_if<false> {
template<typename T>
inline const T& operator()(const T& x) const { return x; }
template<typename T>
inline const T& pconj(const T& x) const { return x; }
};
// Generic implementation for custom complex types.
template<typename LhsScalar, typename RhsScalar, bool ConjLhs, bool ConjRhs>
struct conj_helper
{
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar>::ReturnType Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const LhsScalar& x, const RhsScalar& y, const Scalar& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Scalar pmul(const LhsScalar& x, const RhsScalar& y) const
{ return conj_if<ConjLhs>()(x) * conj_if<ConjRhs>()(y); }
};
template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
{
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); }
};
template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, false,true>
{
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
{ return c + pmul(x,y); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
{ return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::imag(x)*numext::real(y) - numext::real(x)*numext::imag(y)); }
};
template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,false>
{
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
{ return c + pmul(x,y); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
{ return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
};
template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,true>
{
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
{ return c + pmul(x,y); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
{ return Scalar(numext::real(x)*numext::real(y) - numext::imag(x)*numext::imag(y), - numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
};
template<typename RealScalar,bool Conj> struct conj_helper<std::complex<RealScalar>, RealScalar, Conj,false>
{
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const RealScalar& y, const Scalar& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const RealScalar& y) const
{ return conj_if<Conj>()(x)*y; }
};
template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::complex<RealScalar>, false,Conj>
{
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const RealScalar& x, const Scalar& y, const Scalar& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Scalar pmul(const RealScalar& x, const Scalar& y) const
{ return x*conj_if<Conj>()(y); }
};
template<typename From,typename To> struct get_factor {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); }
};