Add packet sin and cos to Altivec/VSX and NEON

This commit is contained in:
Gael Guennebaud 2018-11-30 16:21:33 +01:00
parent 69ace742be
commit c785464430
4 changed files with 35 additions and 8 deletions

View File

@ -30,6 +30,18 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
return pexp_float(_x); return pexp_float(_x);
} }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f psin<Packet4f>(const Packet4f& _x)
{
return psin_float(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f pcos<Packet4f>(const Packet4f& _x)
{
return pcos_float(_x);
}
#ifndef EIGEN_COMP_CLANG #ifndef EIGEN_COMP_CLANG
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f prsqrt<Packet4f>(const Packet4f& x) Packet4f prsqrt<Packet4f>(const Packet4f& x)

View File

@ -146,8 +146,8 @@ template<> struct packet_traits<float> : default_packet_traits
HasMin = 1, HasMin = 1,
HasMax = 1, HasMax = 1,
HasAbs = 1, HasAbs = 1,
HasSin = 0, HasSin = EIGEN_FAST_MATH,
HasCos = 0, HasCos = EIGEN_FAST_MATH,
HasLog = 1, HasLog = 1,
HasExp = 1, HasExp = 1,
#ifdef __VSX__ #ifdef __VSX__
@ -437,6 +437,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const
Packet4f c = reinterpret_cast<Packet4f>(vec_cmpge(a,b)); Packet4f c = reinterpret_cast<Packet4f>(vec_cmpge(a,b));
return vec_nor(c,c); return vec_nor(c,c);
} }
template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmpeq(a,b)); }
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }

View File

@ -15,15 +15,27 @@ namespace Eigen {
namespace internal { namespace internal {
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f pexp<Packet4f>(const Packet4f& _x) Packet4f pexp<Packet4f>(const Packet4f& x)
{ {
return pexp_float(_x); return pexp_float(x);
} }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f plog<Packet4f>(const Packet4f& _x) Packet4f plog<Packet4f>(const Packet4f& x)
{ {
return plog_float(_x); return plog_float(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f psin<Packet4f>(const Packet4f& x)
{
return psin_float(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f pcos<Packet4f>(const Packet4f& x)
{
return pcos_float(x);
} }
} // end namespace internal } // end namespace internal

View File

@ -111,8 +111,8 @@ template<> struct packet_traits<float> : default_packet_traits
HasDiv = 1, HasDiv = 1,
HasFloor = 1, HasFloor = 1,
// FIXME check the Has* // FIXME check the Has*
HasSin = 0, HasSin = EIGEN_FAST_MATH,
HasCos = 0, HasCos = EIGEN_FAST_MATH,
HasLog = 1, HasLog = 1,
HasExp = 1, HasExp = 1,
HasSqrt = 0 HasSqrt = 0
@ -268,6 +268,8 @@ template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4
template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return vreinterpretq_f32_u32(vceqq_f32(a,b)); } template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return vreinterpretq_f32_u32(vceqq_f32(a,b)); }
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { return vreinterpretq_f32_u32(vmvnq_u32(vcgeq_f32(a,b))); } template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { return vreinterpretq_f32_u32(vmvnq_u32(vcgeq_f32(a,b))); }
template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return vreinterpretq_s32_u32(vceqq_s32(a,b)); }
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
{ {
const Packet4f cst_1 = pset1<Packet4f>(1.0f); const Packet4f cst_1 = pset1<Packet4f>(1.0f);