diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index 78d1e5628..cbe20d50e 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -46,7 +46,7 @@ template struct ei_functor_traits > { enum { Cost = NumTraits::AddCost, - PacketAccess = ei_packet_traits::size>1 + PacketAccess = ei_packet_traits::HasAdd }; }; @@ -69,7 +69,7 @@ template struct ei_functor_traits > { enum { Cost = NumTraits::MulCost, - PacketAccess = ei_packet_traits::size>1 + PacketAccess = ei_packet_traits::HasMul }; }; @@ -92,7 +92,7 @@ template struct ei_functor_traits > { enum { Cost = NumTraits::AddCost, - PacketAccess = ei_packet_traits::size>1 + PacketAccess = ei_packet_traits::HasMin }; }; @@ -115,7 +115,7 @@ template struct ei_functor_traits > { enum { Cost = NumTraits::AddCost, - PacketAccess = ei_packet_traits::size>1 + PacketAccess = ei_packet_traits::HasMax }; }; @@ -158,7 +158,7 @@ template struct ei_functor_traits > { enum { Cost = NumTraits::AddCost, - PacketAccess = ei_packet_traits::size>1 + PacketAccess = ei_packet_traits::HasSub }; }; @@ -178,10 +178,7 @@ template struct ei_functor_traits > { enum { Cost = 2 * NumTraits::MulCost, - PacketAccess = ei_packet_traits::size>1 - #if (defined EIGEN_VECTORIZE) - && !NumTraits::IsInteger - #endif + PacketAccess = ei_packet_traits::HasDiv }; }; @@ -203,7 +200,7 @@ template struct ei_functor_traits > { enum { Cost = NumTraits::AddCost, - PacketAccess = int(ei_packet_traits::size)>1 }; + PacketAccess = ei_packet_traits::HasNegate }; }; /** \internal @@ -224,7 +221,7 @@ struct ei_functor_traits > { enum { Cost = NumTraits::AddCost, - PacketAccess = int(ei_packet_traits::size)>1 + PacketAccess = ei_packet_traits::HasAbs }; }; @@ -243,7 +240,7 @@ template struct ei_scalar_abs2_op { }; template struct ei_functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = int(ei_packet_traits::size)>1 }; }; +{ enum { Cost = NumTraits::MulCost, PacketAccess = ei_packet_traits::HasAbs2 }; }; /** \internal * \brief Template functor to compute the conjugate of a complex value @@ -254,14 +251,14 @@ template struct ei_scalar_conjugate_op { EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_conjugate_op) EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return ei_conj(a); } template - EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return a; } + EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a) const { return ei_pconj(a); } }; template struct ei_functor_traits > { enum { Cost = NumTraits::IsComplex ? NumTraits::AddCost : 0, - PacketAccess = int(ei_packet_traits::size)>1 + PacketAccess = ei_packet_traits::HasConj }; }; @@ -398,7 +395,7 @@ struct ei_scalar_multiple_op { }; template struct ei_functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = ei_packet_traits::size>1 }; }; +{ enum { Cost = NumTraits::MulCost, PacketAccess = ei_packet_traits::HasMul }; }; template struct ei_scalar_multiple2_op { @@ -425,7 +422,7 @@ struct ei_scalar_quotient1_impl { }; template struct ei_functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = ei_packet_traits::size>1 }; }; +{ enum { Cost = NumTraits::MulCost, PacketAccess = ei_packet_traits::HasMul }; }; template struct ei_scalar_quotient1_impl { @@ -472,6 +469,7 @@ struct ei_scalar_constant_op { }; template struct ei_functor_traits > +// FIXME replace this packet test by a safe one { enum { Cost = 1, PacketAccess = ei_packet_traits::size>1, IsRepeatable = true }; }; template struct ei_scalar_identity_op { @@ -543,7 +541,7 @@ struct ei_linspaced_op_impl // nested expressions). template struct ei_linspaced_op; template struct ei_functor_traits< ei_linspaced_op > -{ enum { Cost = 1, PacketAccess = ei_packet_traits::size>1, IsRepeatable = true }; }; +{ enum { Cost = 1, PacketAccess = ei_packet_traits::HasSetLinear, IsRepeatable = true }; }; template struct ei_linspaced_op { typedef typename ei_packet_traits::type PacketScalar; @@ -588,7 +586,7 @@ struct ei_scalar_add_op { }; template struct ei_functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = ei_packet_traits::size>1 }; }; +{ enum { Cost = NumTraits::AddCost, PacketAccess = ei_packet_traits::HasAdd }; }; /** \internal * \brief Template functor to compute the square root of a scalar @@ -676,7 +674,7 @@ struct ei_scalar_inverse_op { }; template struct ei_functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = int(ei_packet_traits::size)>1 }; }; +{ enum { Cost = NumTraits::MulCost, PacketAccess = ei_packet_traits::HasDiv }; }; /** \internal * \brief Template functor to compute the square of a scalar @@ -692,7 +690,7 @@ struct ei_scalar_square_op { }; template struct ei_functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = int(ei_packet_traits::size)>1 }; }; +{ enum { Cost = NumTraits::MulCost, PacketAccess = ei_packet_traits::HasMul }; }; /** \internal * \brief Template functor to compute the cube of a scalar @@ -708,7 +706,7 @@ struct ei_scalar_cube_op { }; template struct ei_functor_traits > -{ enum { Cost = 2*NumTraits::MulCost, PacketAccess = int(ei_packet_traits::size)>1 }; }; +{ enum { Cost = 2*NumTraits::MulCost, PacketAccess = ei_packet_traits::HasMul }; }; // default functor traits for STL functors: diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 643e12e34..6cd288c55 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -58,8 +58,11 @@ struct ei_default_packet_traits HasMul = 1, HasNegate = 1, HasAbs = 1, + HasAbs2 = 1, HasMin = 1, HasMax = 1, + HasConj = 1, + HasSetLinear = 1, HasDiv = 0, HasSqrt = 0, @@ -105,6 +108,10 @@ ei_psub(const Packet& a, template inline Packet ei_pnegate(const Packet& a) { return -a; } +/** \internal \returns conj(a) (coeff-wise) */ +template inline Packet +ei_pconj(const Packet& a) { return ei_conj(a); } + /** \internal \returns a * b (coeff-wise) */ template inline Packet ei_pmul(const Packet& a, diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index ab8bf8b84..3f7a04b7d 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -37,6 +37,18 @@ typedef __m128d Packet1cd; template<> struct ei_packet_traits > : ei_default_packet_traits { typedef Packet2cf type; enum {size=2}; + enum { + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasNegate = 1, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasSetLinear = 0 + }; }; template<> struct ei_unpacket_traits { typedef std::complex type; enum {size=2}; }; @@ -56,7 +68,11 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a) { const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000)); return Packet2cf(_mm_xor_ps(a.v,mask)); - +} +template<> EIGEN_STRONG_INLINE Packet2cf ei_pconj(const Packet2cf& a) +{ + const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000)); + return Packet2cf(_mm_xor_ps(a.v,mask)); } template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul(const Packet2cf& a, const Packet2cf& b) diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 29375bdae..8a3cdf679 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -62,6 +62,7 @@ template<> struct ei_packet_traits : ei_default_packet_traits { typedef Packet4f type; enum {size=4}; enum { + HasDiv = 1, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, HasLog = 1, @@ -70,7 +71,12 @@ template<> struct ei_packet_traits : ei_default_packet_traits }; }; template<> struct ei_packet_traits : ei_default_packet_traits -{ typedef Packet2d type; enum {size=2}; }; +{ + typedef Packet2d type; enum {size=2}; + enum { + HasDiv = 1 + }; +}; template<> struct ei_packet_traits : ei_default_packet_traits { typedef Packet4i type; enum {size=4}; }; diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 4d9a09708..4a2ebb713 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -773,8 +773,8 @@ struct ei_gemm_pack_lhs // 4 5 6 7 16 17 18 19 25 28 // 8 9 10 11 20 21 22 23 26 29 // . . . . . . . . . . -template -struct ei_gemm_pack_rhs +template +struct ei_gemm_pack_rhs { typedef typename ei_packet_traits::type Packet; enum { PacketSize = ei_packet_traits::size }; @@ -782,6 +782,7 @@ struct ei_gemm_pack_rhs Index stride=0, Index offset=0) { ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); + ei_conj_if::IsComplex && Conjugate> cj; bool hasAlpha = alpha != Scalar(1); Index packet_cols = (cols/nr) * nr; Index count = 0; @@ -796,19 +797,19 @@ struct ei_gemm_pack_rhs if (hasAlpha) for(Index k=0; k if (hasAlpha) for(Index k=0; k }; // this version is optimized for row major matrices -template -struct ei_gemm_pack_rhs +template +struct ei_gemm_pack_rhs { enum { PacketSize = ei_packet_traits::size }; void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Scalar alpha, Index depth, Index cols, Index stride=0, Index offset=0) { ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); + ei_conj_if::IsComplex && Conjugate> cj; bool hasAlpha = alpha != Scalar(1); Index packet_cols = (cols/nr) * nr; Index count = 0; @@ -858,10 +860,10 @@ struct ei_gemm_pack_rhs for(Index k=0; k for(Index k=0; k const Scalar* b0 = &rhs[j2]; for(Index k=0; k lhs(_lhs,lhsStride); ei_const_blas_data_mapper rhs(_rhs,rhsStride); - if (ConjugateRhs) - alpha = ei_conj(alpha); - typedef typename ei_packet_traits::type PacketType; typedef ei_product_blocking_traits Blocking; @@ -83,9 +80,9 @@ static void run(Index rows, Index cols, Index depth, Index mc = std::min(rows,blocking.mc()); // cache block size along the M direction //Index nc = blocking.nc(); // cache block size along the N direction - ei_gemm_pack_rhs pack_rhs; - ei_gemm_pack_lhs pack_lhs; - ei_gebp_kernel > gebp; + ei_gemm_pack_lhs pack_lhs; + ei_gemm_pack_rhs pack_rhs; + ei_gebp_kernel gebp; #ifdef EIGEN_HAS_OPENMP if(info) diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 139ea73d2..cd0ed0ede 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -29,10 +29,15 @@ // implement and control fast level 2 and level 3 BLAS-like routines. // forward declarations -template + +// Provides scalar/packet-wise product and product with accumulation +// with optional conjugation of the arguments. +template struct ei_conj_helper; + +template > struct ei_gebp_kernel; -template +template struct ei_gemm_pack_rhs; template @@ -53,10 +58,6 @@ template struct ei_conj_helper; - template<> struct ei_conj_helper { template