mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-13 01:43:13 +08:00
SSE/AVX Complex FMA
This commit is contained in:
parent
c21a80be3d
commit
648bce6cae
@ -455,6 +455,74 @@ EIGEN_STRONG_INLINE Packet4cf pexp<Packet4cf>(const Packet4cf& a) {
|
||||
return pexp_complex<Packet4cf>(a);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
// std::complex<float>
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) {
|
||||
__m256 a_odd = _mm256_movehdup_ps(a.v);
|
||||
__m256 a_even = _mm256_moveldup_ps(a.v);
|
||||
__m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
__m256 result = _mm256_fmaddsub_ps(a_even, b.v, _mm256_fmaddsub_ps(a_odd, b_swap, c.v));
|
||||
return Packet4cf(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pmsub(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) {
|
||||
__m256 a_odd = _mm256_movehdup_ps(a.v);
|
||||
__m256 a_even = _mm256_moveldup_ps(a.v);
|
||||
__m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
__m256 result = _mm256_fmaddsub_ps(a_even, b.v, _mm256_fmsubadd_ps(a_odd, b_swap, c.v));
|
||||
return Packet4cf(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pnmadd(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) {
|
||||
__m256 a_odd = _mm256_movehdup_ps(a.v);
|
||||
__m256 a_even = _mm256_moveldup_ps(a.v);
|
||||
__m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
__m256 result = _mm256_fmaddsub_ps(a_odd, b_swap, _mm256_fmaddsub_ps(a_even, b.v, c.v));
|
||||
return Packet4cf(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pnmsub(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) {
|
||||
__m256 a_odd = _mm256_movehdup_ps(a.v);
|
||||
__m256 a_even = _mm256_moveldup_ps(a.v);
|
||||
__m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
__m256 result = _mm256_fmaddsub_ps(a_odd, b_swap, _mm256_fmsubadd_ps(a_even, b.v, c.v));
|
||||
return Packet4cf(result);
|
||||
}
|
||||
// std::complex<double>
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) {
|
||||
__m256d a_odd = _mm256_permute_pd(a.v, 0xF);
|
||||
__m256d a_even = _mm256_movedup_pd(a.v);
|
||||
__m256d b_swap = _mm256_permute_pd(b.v, 0x5);
|
||||
__m256d result = _mm256_fmaddsub_pd(a_even, b.v, _mm256_fmaddsub_pd(a_odd, b_swap, c.v));
|
||||
return Packet2cd(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pmsub(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) {
|
||||
__m256d a_odd = _mm256_permute_pd(a.v, 0xF);
|
||||
__m256d a_even = _mm256_movedup_pd(a.v);
|
||||
__m256d b_swap = _mm256_permute_pd(b.v, 0x5);
|
||||
__m256d result = _mm256_fmaddsub_pd(a_even, b.v, _mm256_fmsubadd_pd(a_odd, b_swap, c.v));
|
||||
return Packet2cd(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pnmadd(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) {
|
||||
__m256d a_odd = _mm256_permute_pd(a.v, 0xF);
|
||||
__m256d a_even = _mm256_movedup_pd(a.v);
|
||||
__m256d b_swap = _mm256_permute_pd(b.v, 0x5);
|
||||
__m256d result = _mm256_fmaddsub_pd(a_odd, b_swap, _mm256_fmaddsub_pd(a_even, b.v, c.v));
|
||||
return Packet2cd(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pnmsub(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) {
|
||||
__m256d a_odd = _mm256_permute_pd(a.v, 0xF);
|
||||
__m256d a_even = _mm256_movedup_pd(a.v);
|
||||
__m256d b_swap = _mm256_permute_pd(b.v, 0x5);
|
||||
__m256d result = _mm256_fmaddsub_pd(a_odd, b_swap, _mm256_fmsubadd_pd(a_even, b.v, c.v));
|
||||
return Packet2cd(result);
|
||||
}
|
||||
#endif
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -445,6 +445,74 @@ EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a) {
|
||||
return pexp_complex<Packet2cf>(a);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
// std::complex<float>
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) {
|
||||
__m128 a_odd = _mm_movehdup_ps(a.v);
|
||||
__m128 a_even = _mm_moveldup_ps(a.v);
|
||||
__m128 b_swap = _mm_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
__m128 result = _mm_fmaddsub_ps(a_even, b.v, _mm_fmaddsub_ps(a_odd, b_swap, c.v));
|
||||
return Packet2cf(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pmsub(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) {
|
||||
__m128 a_odd = _mm_movehdup_ps(a.v);
|
||||
__m128 a_even = _mm_moveldup_ps(a.v);
|
||||
__m128 b_swap = _mm_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
__m128 result = _mm_fmaddsub_ps(a_even, b.v, _mm_fmsubadd_ps(a_odd, b_swap, c.v));
|
||||
return Packet2cf(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pnmadd(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) {
|
||||
__m128 a_odd = _mm_movehdup_ps(a.v);
|
||||
__m128 a_even = _mm_moveldup_ps(a.v);
|
||||
__m128 b_swap = _mm_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
__m128 result = _mm_fmaddsub_ps(a_odd, b_swap, _mm_fmaddsub_ps(a_even, b.v, c.v));
|
||||
return Packet2cf(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pnmsub(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) {
|
||||
__m128 a_odd = _mm_movehdup_ps(a.v);
|
||||
__m128 a_even = _mm_moveldup_ps(a.v);
|
||||
__m128 b_swap = _mm_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
__m128 result = _mm_fmaddsub_ps(a_odd, b_swap, _mm_fmsubadd_ps(a_even, b.v, c.v));
|
||||
return Packet2cf(result);
|
||||
}
|
||||
// std::complex<double>
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) {
|
||||
__m128d a_odd = _mm_permute_pd(a.v, 0xF);
|
||||
__m128d a_even = _mm_movedup_pd(a.v);
|
||||
__m128d b_swap = _mm_permute_pd(b.v, 0x5);
|
||||
__m128d result = _mm_fmaddsub_pd(a_even, b.v, _mm_fmaddsub_pd(a_odd, b_swap, c.v));
|
||||
return Packet1cd(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pmsub(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) {
|
||||
__m128d a_odd = _mm_permute_pd(a.v, 0xF);
|
||||
__m128d a_even = _mm_movedup_pd(a.v);
|
||||
__m128d b_swap = _mm_permute_pd(b.v, 0x5);
|
||||
__m128d result = _mm_fmaddsub_pd(a_even, b.v, _mm_fmsubadd_pd(a_odd, b_swap, c.v));
|
||||
return Packet1cd(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pnmadd(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) {
|
||||
__m128d a_odd = _mm_permute_pd(a.v, 0xF);
|
||||
__m128d a_even = _mm_movedup_pd(a.v);
|
||||
__m128d b_swap = _mm_permute_pd(b.v, 0x5);
|
||||
__m128d result = _mm_fmaddsub_pd(a_odd, b_swap, _mm_fmaddsub_pd(a_even, b.v, c.v));
|
||||
return Packet1cd(result);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pnmsub(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) {
|
||||
__m128d a_odd = _mm_permute_pd(a.v, 0xF);
|
||||
__m128d a_even = _mm_movedup_pd(a.v);
|
||||
__m128d b_swap = _mm_permute_pd(b.v, 0x5);
|
||||
__m128d result = _mm_fmaddsub_pd(a_odd, b_swap, _mm_fmsubadd_pd(a_even, b.v, c.v));
|
||||
return Packet1cd(result);
|
||||
}
|
||||
#endif
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user