From ce5669dbf9d3ed3502531a73b43184859d5881d2 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 26 Mar 2009 12:50:24 +0000 Subject: [PATCH] * enable vectorization of sin, cos, etc. by default with an option to disable them (-DEIGEN_FAST_MATH=0) * add a specialization of MatrixBase::operator*(RealScalar) for fast "matrix of complex" times scalar products (even more useful for autodiff scalar types) --- Eigen/src/Array/CwiseOperators.h | 4 ++-- Eigen/src/Array/Functors.h | 28 +++++++++++++++++++---- Eigen/src/Core/Functors.h | 12 ++++++++++ Eigen/src/Core/MatrixBase.h | 10 ++++++++ Eigen/src/Core/util/ForwardDeclarations.h | 2 ++ Eigen/src/Core/util/Macros.h | 17 ++++++++++---- Eigen/src/Core/util/Meta.h | 8 +++++++ Eigen/src/Core/util/XprHelper.h | 22 ++++++++++++++++++ 8 files changed, 92 insertions(+), 11 deletions(-) diff --git a/Eigen/src/Array/CwiseOperators.h b/Eigen/src/Array/CwiseOperators.h index 4b6346daa..9fcfbc9c0 100644 --- a/Eigen/src/Array/CwiseOperators.h +++ b/Eigen/src/Array/CwiseOperators.h @@ -82,7 +82,7 @@ Cwise::log() const * Example: \include Cwise_cos.cpp * Output: \verbinclude Cwise_cos.out * - * \sa sin(), exp() + * \sa sin(), exp(), EIGEN_FAST_MATH */ template inline const EIGEN_CWISE_UNOP_RETURN_TYPE(ei_scalar_cos_op) @@ -99,7 +99,7 @@ Cwise::cos() const * Example: \include Cwise_sin.cpp * Output: \verbinclude Cwise_sin.out * - * \sa cos(), exp() + * \sa cos(), exp(), EIGEN_FAST_MATH */ template inline const EIGEN_CWISE_UNOP_RETURN_TYPE(ei_scalar_sin_op) diff --git a/Eigen/src/Array/Functors.h b/Eigen/src/Array/Functors.h index 0aae7fd2c..9759ebf2a 100644 --- a/Eigen/src/Array/Functors.h +++ b/Eigen/src/Array/Functors.h @@ -73,10 +73,12 @@ struct ei_functor_traits > */ template struct ei_scalar_exp_op EIGEN_EMPTY_STRUCT { inline const Scalar operator() (const Scalar& a) const { return ei_exp(a); } + typedef typename ei_packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return ei_pexp(a); } }; template struct ei_functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; +{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = ei_packet_traits::HasExp }; }; /** \internal * @@ -88,10 +90,12 @@ struct ei_functor_traits > */ template struct ei_scalar_log_op EIGEN_EMPTY_STRUCT { inline const Scalar operator() (const Scalar& a) const { return ei_log(a); } + typedef typename ei_packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return ei_plog(a); } }; template struct ei_functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; +{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = ei_packet_traits::HasLog }; }; /** \internal * @@ -102,11 +106,18 @@ struct ei_functor_traits > * \sa class CwiseUnaryOp, Cwise::cos() */ template struct ei_scalar_cos_op EIGEN_EMPTY_STRUCT { - inline const Scalar operator() (const Scalar& a) const { return ei_cos(a); } + inline Scalar operator() (const Scalar& a) const { return ei_cos(a); } + typedef typename ei_packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return ei_pcos(a); } }; template struct ei_functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; +{ + enum { + Cost = 5 * NumTraits::MulCost, + PacketAccess = ei_packet_traits::HasCos && EIGEN_FAST_MATH + }; +}; /** \internal * @@ -118,10 +129,17 @@ struct ei_functor_traits > */ template struct ei_scalar_sin_op EIGEN_EMPTY_STRUCT { inline const Scalar operator() (const Scalar& a) const { return ei_sin(a); } + typedef typename ei_packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return ei_psin(a); } }; template struct ei_functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; +{ + enum { + Cost = 5 * NumTraits::MulCost, + PacketAccess = ei_packet_traits::HasSin && EIGEN_FAST_MATH + }; +}; /** \internal * diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index f411a254b..38371cf2a 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -327,6 +327,18 @@ template struct ei_functor_traits > { enum { Cost = NumTraits::MulCost, PacketAccess = ei_packet_traits::size>1 }; }; +template +struct ei_scalar_multiple2_op { + typedef typename ei_scalar_product_traits::ReturnType result_type; + EIGEN_STRONG_INLINE ei_scalar_multiple2_op(const ei_scalar_multiple2_op& other) : m_other(other.m_other) { } + EIGEN_STRONG_INLINE ei_scalar_multiple2_op(const Scalar2& other) : m_other(other) { } + EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; } + const Scalar2 m_other; +}; +template +struct ei_functor_traits > +{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; + template struct ei_scalar_quotient1_impl { typedef typename ei_packet_traits::type PacketScalar; diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 72fa0edb7..3db257361 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -53,10 +53,17 @@ * */ template class MatrixBase +#ifndef EIGEN_PARSED_BY_DOXYGEN + : public ei_special_scalar_op_base::Scalar, + typename NumTraits::Scalar>::Real> +#endif // not EIGEN_PARSED_BY_DOXYGEN { public: #ifndef EIGEN_PARSED_BY_DOXYGEN + using ei_special_scalar_op_base::Scalar, + typename NumTraits::Scalar>::Real>::operator*; + class InnerIterator; typedef typename ei_traits::Scalar Scalar; @@ -324,6 +331,9 @@ template class MatrixBase Derived& operator/=(const Scalar& other); const ScalarMultipleReturnType operator*(const Scalar& scalar) const; + #ifdef EIGEN_PARSED_BY_DOXYGEN + const ScalarMultipleReturnType operator*(const RealScalar& scalar) const; + #endif const CwiseUnaryOp::Scalar>, Derived> operator/(const Scalar& scalar) const; diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index ae079a29a..59205ce2e 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -90,6 +90,8 @@ template struct ei_scalar_add_op; template struct ei_scalar_constant_op; template struct ei_scalar_identity_op; +template struct ei_scalar_multiple2_op; + struct IOFormat; template diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 69fa1db18..43c66d980 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -60,15 +60,15 @@ #define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ColMajor #endif -/** \internal Defines the maximal loop size to enable meta unrolling of loops. - * Note that the value here is expressed in Eigen's own notion of "number of FLOPS", - * it does not correspond to the number of iterations or the number of instructions +/** Defines the maximal loop size to enable meta unrolling of loops. + * Note that the value here is expressed in Eigen's own notion of "number of FLOPS", + * it does not correspond to the number of iterations or the number of instructions */ #ifndef EIGEN_UNROLLING_LIMIT #define EIGEN_UNROLLING_LIMIT 100 #endif -/** \internal Define the maximal size in Bytes of blocks fitting in CPU cache. +/** Defines the maximal size in Bytes of blocks fitting in CPU cache. * The current value is set to generate blocks of 256x256 for float * * Typically for a single-threaded application you would set that to 25% of the size of your CPU caches in bytes @@ -82,6 +82,15 @@ #error EIGEN_TUNE_FOR_L2_CACHE_SIZE is now called EIGEN_TUNE_FOR_CPU_CACHE_SIZE. #endif +/** Allows to disable some optimizations which might affect the accuracy of the result. + * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them. + * They currently include: + * - single precision Cwise::sin() and Cwise::cos() when SSE vectorization is enabled. + */ +#ifndef EIGEN_FAST_MATH +#define EIGEN_FAST_MATH 1 +#endif + #define USING_PART_OF_NAMESPACE_EIGEN \ EIGEN_USING_MATRIX_TYPEDEFS \ using Eigen::Matrix; \ diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index c65c52ef4..120cde3e0 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -64,6 +64,14 @@ template struct ei_cleantype { typedef typename ei_cleant template struct ei_cleantype { typedef typename ei_cleantype::type type; }; template struct ei_cleantype { typedef typename ei_cleantype::type type; }; +/** \internal Allows to enable/disable an overload + * according to a compile time condition. + */ +template struct ei_enable_if; + +template struct ei_enable_if +{ typedef T type; }; + /** \internal * Convenient struct to get the result type of a unary or binary functor. * diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 60d686949..d78e3ea8c 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -198,6 +198,28 @@ template struct ei_are_flags_consistent }; }; +/** \internal Helper base class to add a scalar multiple operator + * overloads for complex types */ +template::ret > +struct ei_special_scalar_op_base +{ + // dummy operator* so that the + // "using ei_special_scalar_op_base::operator*" compiles + void operator*() const; +}; + +template +struct ei_special_scalar_op_base +{ + const CwiseUnaryOp, Derived> + operator*(const OtherScalar& scalar) const + { + return CwiseUnaryOp, Derived> + (*static_cast(this), ei_scalar_multiple2_op(scalar)); + } +}; + /** \internal Gives the type of a sub-matrix or sub-vector of a matrix of type \a ExpressionType and size \a Size * TODO: could be a good idea to define a big ReturnType struct ?? */