diff --git a/Eigen/Core b/Eigen/Core index 25cb87930..1f3a6504e 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -371,6 +371,7 @@ using std::ptrdiff_t; #include "src/Core/arch/Default/Settings.h" +#include "src/Core/functors/TernaryFunctors.h" #include "src/Core/functors/BinaryFunctors.h" #include "src/Core/functors/UnaryFunctors.h" #include "src/Core/functors/NullaryFunctors.h" @@ -403,6 +404,7 @@ using std::ptrdiff_t; #include "src/Core/PlainObjectBase.h" #include "src/Core/Matrix.h" #include "src/Core/Array.h" +#include "src/Core/CwiseTernaryOp.h" #include "src/Core/CwiseBinaryOp.h" #include "src/Core/CwiseUnaryOp.h" #include "src/Core/CwiseNullaryOp.h" diff --git a/Eigen/Eigenvalues b/Eigen/Eigenvalues index ea93eb303..216a6d51d 100644 --- a/Eigen/Eigenvalues +++ b/Eigen/Eigenvalues @@ -32,6 +32,7 @@ * \endcode */ +#include "src/misc/RealSvd2x2.h" #include "src/Eigenvalues/Tridiagonalization.h" #include "src/Eigenvalues/RealSchur.h" #include "src/Eigenvalues/EigenSolver.h" diff --git a/Eigen/SVD b/Eigen/SVD index b353f3f54..565d9c90d 100644 --- a/Eigen/SVD +++ b/Eigen/SVD @@ -31,6 +31,7 @@ * \endcode */ +#include "src/misc/RealSvd2x2.h" #include "src/SVD/UpperBidiagonalization.h" #include "src/SVD/SVDBase.h" #include "src/SVD/JacobiSVD.h" diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h index c0af4aa9d..7c2e0de16 100644 --- a/Eigen/src/Core/Array.h +++ b/Eigen/src/Core/Array.h @@ -149,7 +149,7 @@ class Array #if EIGEN_HAS_RVALUE_REFERENCES EIGEN_DEVICE_FUNC - Array(Array&& other) + Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible::value) : Base(std::move(other)) { Base::_check_template_params(); @@ -157,7 +157,7 @@ class Array Base::_set_noalias(other); } EIGEN_DEVICE_FUNC - Array& operator=(Array&& other) + Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) { other.swap(*this); return *this; diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 3d9c37bf6..62851a0c2 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -176,7 +176,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator-=(const ArrayBase &other) { - call_assignment(derived(), other.derived(), internal::sub_assign_op()); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -189,7 +189,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator+=(const ArrayBase& other) { - call_assignment(derived(), other.derived(), internal::add_assign_op()); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 4b914ac0c..1df717bac 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -116,9 +116,9 @@ private: : 1, UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize, MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic - && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit), + && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit), MayUnrollInner = int(InnerSize) != Dynamic - && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit) + && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit) }; public: @@ -687,7 +687,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstX template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) { - call_dense_assignment_loop(dst, src, internal::assign_op()); + call_dense_assignment_loop(dst, src, internal::assign_op()); } /*************************************************************************** @@ -722,13 +722,13 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(Dst& dst, const Src& src) { - call_assignment(dst, src, internal::assign_op()); + call_assignment(dst, src, internal::assign_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(const Dst& dst, const Src& src) { - call_assignment(dst, src, internal::assign_op()); + call_assignment(dst, src, internal::assign_op()); } // Deal with "assume-aliasing" @@ -787,7 +787,7 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment_no_alias(Dst& dst, const Src& src) { - call_assignment_no_alias(dst, src, internal::assign_op()); + call_assignment_no_alias(dst, src, internal::assign_op()); } template @@ -809,7 +809,7 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) { - call_assignment_no_alias_no_transpose(dst, src, internal::assign_op()); + call_assignment_no_alias_no_transpose(dst, src, internal::assign_op()); } // forward declaration @@ -838,7 +838,7 @@ template< typename DstXprType, typename SrcXprType, typename Functor, typename S struct Assignment { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); src.evalTo(dst); diff --git a/Eigen/src/Core/CommaInitializer.h b/Eigen/src/Core/CommaInitializer.h index 2abc6605c..787743b8f 100644 --- a/Eigen/src/Core/CommaInitializer.h +++ b/Eigen/src/Core/CommaInitializer.h @@ -80,8 +80,11 @@ struct CommaInitializer EIGEN_DEVICE_FUNC CommaInitializer& operator,(const DenseBase& other) { - if(other.cols()==0 || other.rows()==0) + if(other.rows()==0) + { + m_col += other.cols(); return *this; + } if (m_col==m_xpr.cols()) { m_row+=m_currentBlockRows; @@ -90,7 +93,7 @@ struct CommaInitializer eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows() && "Too many rows passed to comma initializer (operator<<)"); } - eigen_assert(m_col struct storage_kind_to_shape { typedef Transp // We currently distinguish the following kind of evaluators: // - unary_evaluator for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate) // - binary_evaluator for expression taking two arguments (CwiseBinaryOp) +// - ternary_evaluator for expression taking three arguments (CwiseTernaryOp) // - product_evaluator for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching. // - mapbase_evaluator for Map, Block, Ref // - block_evaluator for Block (special dispatching to a mapbase_evaluator or unary_evaluator) +template< typename T, + typename Arg1Kind = typename evaluator_traits::Kind, + typename Arg2Kind = typename evaluator_traits::Kind, + typename Arg3Kind = typename evaluator_traits::Kind, + typename Arg1Scalar = typename traits::Scalar, + typename Arg2Scalar = typename traits::Scalar, + typename Arg3Scalar = typename traits::Scalar> struct ternary_evaluator; + template< typename T, typename LhsKind = typename evaluator_traits::Kind, typename RhsKind = typename evaluator_traits::Kind, @@ -442,6 +451,96 @@ protected: evaluator m_argImpl; }; +// -------------------- CwiseTernaryOp -------------------- + +// this is a ternary expression +template +struct evaluator > + : public ternary_evaluator > +{ + typedef CwiseTernaryOp XprType; + typedef ternary_evaluator > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +template +struct ternary_evaluator, IndexBased, IndexBased> + : evaluator_base > +{ + typedef CwiseTernaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, + + Arg1Flags = evaluator::Flags, + Arg2Flags = evaluator::Flags, + Arg3Flags = evaluator::Flags, + SameType = is_same::value && is_same::value, + StorageOrdersAgree = (int(Arg1Flags)&RowMajorBit)==(int(Arg2Flags)&RowMajorBit) && (int(Arg1Flags)&RowMajorBit)==(int(Arg3Flags)&RowMajorBit), + Flags0 = (int(Arg1Flags) | int(Arg2Flags) | int(Arg3Flags)) & ( + HereditaryBits + | (int(Arg1Flags) & int(Arg2Flags) & int(Arg3Flags) & + ( (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (Arg1Flags & RowMajorBit), + Alignment = EIGEN_PLAIN_ENUM_MIN( + EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment), + evaluator::Alignment) + }; + + EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_arg1Impl(xpr.arg1()), + m_arg2Impl(xpr.arg2()), + m_arg3Impl(xpr.arg3()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_functor.packetOp(m_arg1Impl.template packet(row, col), + m_arg2Impl.template packet(row, col), + m_arg3Impl.template packet(row, col)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_functor.packetOp(m_arg1Impl.template packet(index), + m_arg2Impl.template packet(index), + m_arg3Impl.template packet(index)); + } + +protected: + const TernaryOp m_functor; + evaluator m_arg1Impl; + evaluator m_arg2Impl; + evaluator m_arg3Impl; +}; + // -------------------- CwiseBinaryOp -------------------- // this is a binary expression diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index 39820fd7d..aa3297354 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -160,7 +160,7 @@ template EIGEN_STRONG_INLINE Derived & MatrixBase::operator-=(const MatrixBase &other) { - call_assignment(derived(), other.derived(), internal::sub_assign_op()); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -173,7 +173,7 @@ template EIGEN_STRONG_INLINE Derived & MatrixBase::operator+=(const MatrixBase& other) { - call_assignment(derived(), other.derived(), internal::add_assign_op()); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } diff --git a/Eigen/src/Core/CwiseTernaryOp.h b/Eigen/src/Core/CwiseTernaryOp.h new file mode 100644 index 000000000..9f3576fec --- /dev/null +++ b/Eigen/src/Core/CwiseTernaryOp.h @@ -0,0 +1,197 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2016 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_TERNARY_OP_H +#define EIGEN_CWISE_TERNARY_OP_H + +namespace Eigen { + +namespace internal { +template +struct traits > { + // we must not inherit from traits since it has + // the potential to cause problems with MSVC + typedef typename remove_all::type Ancestor; + typedef typename traits::XprKind XprKind; + enum { + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime + }; + + // even though we require Arg1, Arg2, and Arg3 to have the same scalar type + // (see CwiseTernaryOp constructor), + // we still want to handle the case when the result type is different. + typedef typename result_of::type Scalar; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + + typedef typename Arg1::Nested Arg1Nested; + typedef typename Arg2::Nested Arg2Nested; + typedef typename Arg3::Nested Arg3Nested; + typedef typename remove_reference::type _Arg1Nested; + typedef typename remove_reference::type _Arg2Nested; + typedef typename remove_reference::type _Arg3Nested; + enum { Flags = _Arg1Nested::Flags & RowMajorBit }; +}; +} // end namespace internal + +template +class CwiseTernaryOpImpl; + +/** \class CwiseTernaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise ternary operator is + * applied to two expressions + * + * \tparam TernaryOp template functor implementing the operator + * \tparam Arg1Type the type of the first argument + * \tparam Arg2Type the type of the second argument + * \tparam Arg3Type the type of the third argument + * + * This class represents an expression where a coefficient-wise ternary + * operator is applied to three expressions. + * It is the return type of ternary operators, by which we mean only those + * ternary operators where + * all three arguments are Eigen expressions. + * For example, the return type of betainc(matrix1, matrix2, matrix3) is a + * CwiseTernaryOp. + * + * Most of the time, this is the only way that it is used, so you typically + * don't have to name + * CwiseTernaryOp types explicitly. + * + * \sa MatrixBase::ternaryExpr(const MatrixBase &, const + * MatrixBase &, const CustomTernaryOp &) const, class CwiseBinaryOp, + * class CwiseUnaryOp, class CwiseNullaryOp + */ +template +class CwiseTernaryOp : public CwiseTernaryOpImpl< + TernaryOp, Arg1Type, Arg2Type, Arg3Type, + typename internal::traits::StorageKind>, + internal::no_assignment_operator +{ + public: + typedef typename internal::remove_all::type Arg1; + typedef typename internal::remove_all::type Arg2; + typedef typename internal::remove_all::type Arg3; + + typedef typename CwiseTernaryOpImpl< + TernaryOp, Arg1Type, Arg2Type, Arg3Type, + typename internal::traits::StorageKind>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseTernaryOp) + + typedef typename internal::ref_selector::type Arg1Nested; + typedef typename internal::ref_selector::type Arg2Nested; + typedef typename internal::ref_selector::type Arg3Nested; + typedef typename internal::remove_reference::type _Arg1Nested; + typedef typename internal::remove_reference::type _Arg2Nested; + typedef typename internal::remove_reference::type _Arg3Nested; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2, + const Arg3& a3, + const TernaryOp& func = TernaryOp()) + : m_arg1(a1), m_arg2(a2), m_arg3(a3), m_functor(func) { + // require the sizes to match + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3) + + // The index types should match + EIGEN_STATIC_ASSERT((internal::is_same< + typename internal::traits::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same< + typename internal::traits::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + + eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() && + a1.rows() == a3.rows() && a1.cols() == a3.cols()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rows() const { + // return the fixed size type if available to enable compile time + // optimizations + if (internal::traits::type>:: + RowsAtCompileTime == Dynamic && + internal::traits::type>:: + RowsAtCompileTime == Dynamic) + return m_arg3.rows(); + else if (internal::traits::type>:: + RowsAtCompileTime == Dynamic && + internal::traits::type>:: + RowsAtCompileTime == Dynamic) + return m_arg2.rows(); + else + return m_arg1.rows(); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index cols() const { + // return the fixed size type if available to enable compile time + // optimizations + if (internal::traits::type>:: + ColsAtCompileTime == Dynamic && + internal::traits::type>:: + ColsAtCompileTime == Dynamic) + return m_arg3.cols(); + else if (internal::traits::type>:: + ColsAtCompileTime == Dynamic && + internal::traits::type>:: + ColsAtCompileTime == Dynamic) + return m_arg2.cols(); + else + return m_arg1.cols(); + } + + /** \returns the first argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg1Nested& arg1() const { return m_arg1; } + /** \returns the first argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg2Nested& arg2() const { return m_arg2; } + /** \returns the third argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg3Nested& arg3() const { return m_arg3; } + /** \returns the functor representing the ternary operation */ + EIGEN_DEVICE_FUNC + const TernaryOp& functor() const { return m_functor; } + + protected: + Arg1Nested m_arg1; + Arg2Nested m_arg2; + Arg3Nested m_arg3; + const TernaryOp m_functor; +}; + +// Generic API dispatcher +template +class CwiseTernaryOpImpl + : public internal::generic_xpr_base< + CwiseTernaryOp >::type { + public: + typedef typename internal::generic_xpr_base< + CwiseTernaryOp >::type Base; +}; + +} // end namespace Eigen + +#endif // EIGEN_CWISE_TERNARY_OP_H diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index 5a9e3abd4..d6f89bced 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -71,18 +71,17 @@ class DiagonalBase : public EigenBase return InverseReturnType(diagonal().cwiseInverse()); } - typedef DiagonalWrapper, const DiagonalVectorType> > ScalarMultipleReturnType; EIGEN_DEVICE_FUNC - inline const ScalarMultipleReturnType + inline const DiagonalWrapper operator*(const Scalar& scalar) const { - return ScalarMultipleReturnType(diagonal() * scalar); + return DiagonalWrapper(diagonal() * scalar); } EIGEN_DEVICE_FUNC - friend inline const ScalarMultipleReturnType + friend inline const DiagonalWrapper operator*(const Scalar& scalar, const DiagonalBase& other) { - return ScalarMultipleReturnType(other.diagonal() * scalar); + return DiagonalWrapper(scalar * other.diagonal()); } }; @@ -320,16 +319,16 @@ template<> struct AssignmentKind { typedef Diagonal2De template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> struct Assignment { - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { dst.setZero(); dst.diagonal() = src.diagonal(); } - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) { dst.diagonal() += src.diagonal(); } - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) { dst.diagonal() -= src.diagonal(); } }; diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index f3c869635..1d7f2262e 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -28,22 +28,24 @@ template struct dot_nocheck { - typedef typename scalar_product_traits::Scalar,typename traits::Scalar>::ReturnType ResScalar; + typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; + typedef typename conj_prod::result_type ResScalar; EIGEN_DEVICE_FUNC static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) { - return a.template binaryExpr::Scalar,typename traits::Scalar> >(b).sum(); + return a.template binaryExpr(b).sum(); } }; template struct dot_nocheck { - typedef typename scalar_product_traits::Scalar,typename traits::Scalar>::ReturnType ResScalar; + typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; + typedef typename conj_prod::result_type ResScalar; EIGEN_DEVICE_FUNC static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) { - return a.transpose().template binaryExpr::Scalar,typename traits::Scalar> >(b).sum(); + return a.transpose().template binaryExpr(b).sum(); } }; @@ -62,7 +64,7 @@ struct dot_nocheck template template EIGEN_DEVICE_FUNC -typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType +typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType MatrixBase::dot(const MatrixBase& other) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h index ba8e09674..f76995af9 100644 --- a/Eigen/src/Core/EigenBase.h +++ b/Eigen/src/Core/EigenBase.h @@ -138,7 +138,7 @@ template template Derived& DenseBase::operator+=(const EigenBase &other) { - call_assignment(derived(), other.derived(), internal::add_assign_op()); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } @@ -146,7 +146,7 @@ template template Derived& DenseBase::operator-=(const EigenBase &other) { - call_assignment(derived(), other.derived(), internal::sub_assign_op()); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 82fabbb70..76a75dee1 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -83,6 +83,7 @@ struct default_packet_traits HasErfc = 0, HasIGamma = 0, HasIGammac = 0, + HasBetaInc = 0, HasRound = 0, HasFloor = 0, @@ -466,6 +467,10 @@ Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); } +/** \internal \returns the complementary incomplete gamma function betainc(\a a, \a b, \a x) */ +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Packet pbetainc(const Packet& a, const Packet& b,const Packet& x) { using numext::betainc; return betainc(a, b, x); } + /*************************************************************************** * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 9c97ccb0e..b9c3ec25b 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -89,14 +89,32 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign) /** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent. + * + * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar). * * \sa ArrayBase::pow() + * + * \relates ArrayBase */ +#ifdef EIGEN_PARSED_BY_DOXYGEN + template + inline const CwiseBinaryOp,Derived,Constant > + pow(const Eigen::ArrayBase& x, const ScalarExponent& exponent); +#else + template + inline typename internal::enable_if< !(internal::is_same::value) + && ScalarBinaryOpTraits >::Defined, + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,ScalarExponent,pow) >::type + pow(const Eigen::ArrayBase& x, const ScalarExponent& exponent) { + return x.derived().pow(exponent); + } + template - inline const Eigen::CwiseUnaryOp, const Derived> + inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename Derived::Scalar,pow) pow(const Eigen::ArrayBase& x, const typename Derived::Scalar& exponent) { return x.derived().pow(exponent); } +#endif /** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents. * @@ -106,12 +124,14 @@ namespace Eigen * Output: \verbinclude Cwise_array_power_array.out * * \sa ArrayBase::pow() + * + * \relates ArrayBase */ template - inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> + inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> pow(const Eigen::ArrayBase& x, const Eigen::ArrayBase& exponents) { - return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( + return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( x.derived(), exponents.derived() ); @@ -120,36 +140,39 @@ namespace Eigen /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents. * * This function computes the coefficient-wise power between a scalar and an array of exponents. - * Beaware that the scalar type of the input scalar \a x and the exponents \a exponents must be the same. + * + * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar). * * Example: \include Cwise_scalar_power_array.cpp * Output: \verbinclude Cwise_scalar_power_array.out * * \sa ArrayBase::pow() + * + * \relates ArrayBase */ +#ifdef EIGEN_PARSED_BY_DOXYGEN + template + inline const CwiseBinaryOp,Constant,Derived> + pow(const Scalar& x,const Eigen::ArrayBase& x); +#else + template + inline typename internal::enable_if< !(internal::is_same::value) + && ScalarBinaryOpTraits >::Defined, + const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow) >::type + pow(const Scalar& x, const Eigen::ArrayBase& exponents) + { + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow)( + typename internal::plain_constant_type::type(exponents.rows(), exponents.cols(), x), exponents.derived() ); + } + template - inline const Eigen::CwiseBinaryOp, const typename Derived::ConstantReturnType, const Derived> - pow(const typename Derived::Scalar& x, const Eigen::ArrayBase& exponents) + inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow) + pow(const typename Derived::Scalar& x, const Eigen::ArrayBase& exponents) { - typename Derived::ConstantReturnType constant_x(exponents.rows(), exponents.cols(), x); - return Eigen::CwiseBinaryOp, const typename Derived::ConstantReturnType, const Derived>( - constant_x, - exponents.derived() - ); - } - - /** - * \brief Component-wise division of a scalar by array elements. - **/ - template - inline const Eigen::CwiseUnaryOp, const Derived> - operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase& a) - { - return Eigen::CwiseUnaryOp, const Derived>( - a.derived(), - Eigen::internal::scalar_inverse_mult_op(s) - ); + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)( + typename internal::plain_constant_type::type(exponents.rows(), exponents.cols(), x), exponents.derived() ); } +#endif /** \cpp11 \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays. * @@ -213,6 +236,28 @@ namespace Eigen ); } + /** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given arrays. + * + * This function computes the regularized incomplete beta function (integral). + * + * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, + * or float/double in non c++11 mode, the user has to provide implementations of betainc(T,T,T) for any scalar + * type T to be supported. + * + * \sa Eigen::betainc(), Eigen::lgamma() + */ + template + inline const Eigen::CwiseTernaryOp, const ArgADerived, const ArgBDerived, const ArgXDerived> + betainc(const Eigen::ArrayBase& a, const Eigen::ArrayBase& b, const Eigen::ArrayBase& x) + { + return Eigen::CwiseTernaryOp, const ArgADerived, const ArgBDerived, const ArgXDerived>( + a.derived(), + b.derived(), + x.derived() + ); + } + + /** \returns an expression of the coefficient-wise zeta(\a x, \a q) to the given arrays. * * It returns the Riemann zeta function of two arguments \a x and \a q: diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index ece04b754..7eddc8e7e 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -462,7 +462,7 @@ struct arg_retval template::IsComplex > struct log1p_impl { - static inline Scalar run(const Scalar& x) + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) typedef typename NumTraits::Real RealScalar; @@ -472,7 +472,7 @@ struct log1p_impl } }; -#if EIGEN_HAS_CXX11_MATH +#if EIGEN_HAS_CXX11_MATH && !defined(__CUDACC__) template struct log1p_impl { static inline Scalar run(const Scalar& x) @@ -494,24 +494,26 @@ struct log1p_retval * Implementation of pow * ****************************************************************************/ -template -struct pow_default_impl +template::IsInteger&&NumTraits::IsInteger> +struct pow_impl { - typedef Scalar retval; - static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) + //typedef Scalar retval; + typedef typename ScalarBinaryOpTraits >::ReturnType result_type; + static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y) { EIGEN_USING_STD_MATH(pow); return pow(x, y); } }; -template -struct pow_default_impl +template +struct pow_impl { - static EIGEN_DEVICE_FUNC inline Scalar run(Scalar x, Scalar y) + typedef ScalarX result_type; + static EIGEN_DEVICE_FUNC inline ScalarX run(const ScalarX &x, const ScalarY &y) { - Scalar res(1); - eigen_assert(!NumTraits::IsSigned || y >= 0); + ScalarX res(1); + eigen_assert(!NumTraits::IsSigned || y >= 0); if(y & 1) res *= x; y >>= 1; while(y) @@ -524,15 +526,6 @@ struct pow_default_impl } }; -template -struct pow_impl : pow_default_impl::IsInteger> {}; - -template -struct pow_retval -{ - typedef Scalar type; -}; - /**************************************************************************** * Implementation of random * ****************************************************************************/ @@ -928,11 +921,11 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); } -template +template EIGEN_DEVICE_FUNC -inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) +inline typename internal::pow_impl::result_type pow(const ScalarX& x, const ScalarY& y) { - return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); + return internal::pow_impl::run(x, y); } template EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index b8b7f458f..d9d2426ad 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -193,7 +193,7 @@ template class MatrixBase template EIGEN_DEVICE_FUNC - typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType + typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType dot(const MatrixBase& other) const; EIGEN_DEVICE_FUNC RealScalar squaredNorm() const; @@ -381,7 +381,7 @@ template class MatrixBase #ifndef EIGEN_PARSED_BY_DOXYGEN /// \internal helper struct to form the return type of the cross product template struct cross_product_return_type { - typedef typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType Scalar; typedef Matrix type; }; #endif // EIGEN_PARSED_BY_DOXYGEN @@ -403,7 +403,6 @@ template class MatrixBase inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; - inline ScalarMultipleReturnType operator*(const UniformScaling& s) const; // put this as separate enum value to work around possible GCC 4.3 bug (?) enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical) : ColsAtCompileTime==1 ? Vertical : Horizontal }; @@ -416,8 +415,7 @@ template class MatrixBase typedef Block::ColsAtCompileTime==1 ? SizeMinusOne : 1, internal::traits::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne; - typedef CwiseUnaryOp::Scalar>, - const ConstStartMinusOne > HNormalizedReturnType; + typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType; inline const HNormalizedReturnType hnormalized() const; diff --git a/Eigen/src/Core/NoAlias.h b/Eigen/src/Core/NoAlias.h index ffb673cee..33908010b 100644 --- a/Eigen/src/Core/NoAlias.h +++ b/Eigen/src/Core/NoAlias.h @@ -39,7 +39,7 @@ class NoAlias EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase& other) { - call_assignment_no_alias(m_expression, other.derived(), internal::assign_op()); + call_assignment_no_alias(m_expression, other.derived(), internal::assign_op()); return m_expression; } @@ -47,7 +47,7 @@ class NoAlias EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase& other) { - call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op()); + call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op()); return m_expression; } @@ -55,7 +55,7 @@ class NoAlias EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase& other) { - call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op()); + call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op()); return m_expression; } diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index e065fa714..03f64a8e9 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -22,14 +22,16 @@ namespace Eigen { * This class stores enums, typedefs and static methods giving information about a numeric type. * * The provided data consists of: - * \li A typedef \a Real, giving the "real part" type of \a T. If \a T is already real, - * then \a Real is just a typedef to \a T. If \a T is \c std::complex then \a Real + * \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real, + * then \c Real is just a typedef to \a T. If \a T is \c std::complex then \c Real * is a typedef to \a U. - * \li A typedef \a NonInteger, giving the type that should be used for operations producing non-integral values, + * \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values, * such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives * \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to * take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is * only intended as a helper for code that needs to explicitly promote types. + * \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for \c std::complex, Literal is defined as \c U. + * Of course, this type must be fully compatible with \a T. In doubt, just use \a T here. * \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what * this means, just use \a T here. * \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex @@ -84,6 +86,7 @@ template struct GenericNumTraits T >::type NonInteger; typedef T Nested; + typedef T Literal; EIGEN_DEVICE_FUNC static inline Real epsilon() @@ -145,6 +148,7 @@ template struct NumTraits > : GenericNumTraits > { typedef _Real Real; + typedef typename NumTraits<_Real>::Literal Literal; enum { IsComplex = 1, RequireInitialization = NumTraits<_Real>::RequireInitialization, @@ -168,6 +172,7 @@ struct NumTraits > typedef typename NumTraits::NonInteger NonIntegerScalar; typedef Array NonInteger; typedef ArrayType & Nested; + typedef typename NumTraits::Literal Literal; enum { IsComplex = NumTraits::IsComplex, diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 570dbd53b..64f5eb052 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -718,7 +718,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type //_resize_to_match(other); // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because // it wouldn't allow to copy a row-vector into a column-vector. - internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op()); + internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op()); return this->derived(); } diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 8aa1de081..ae0c94b38 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -16,39 +16,6 @@ template class Pro namespace internal { -// Determine the scalar of Product. This is normally the same as Lhs::Scalar times -// Rhs::Scalar, but product with permutation matrices inherit the scalar of the other factor. -template::Shape, - typename RhsShape = typename evaluator_traits::Shape > -struct product_result_scalar -{ - typedef typename scalar_product_traits::ReturnType Scalar; -}; - -template -struct product_result_scalar -{ - typedef typename Rhs::Scalar Scalar; -}; - -template - struct product_result_scalar -{ - typedef typename Lhs::Scalar Scalar; -}; - -template -struct product_result_scalar -{ - typedef typename Rhs::Scalar Scalar; -}; - -template - struct product_result_scalar -{ - typedef typename Lhs::Scalar Scalar; -}; - template struct traits > { @@ -59,7 +26,7 @@ struct traits > typedef MatrixXpr XprKind; - typedef typename product_result_scalar::Scalar Scalar; + typedef typename ScalarBinaryOpTraits::Scalar, typename traits::Scalar>::ReturnType Scalar; typedef typename product_promote_storage_type::ret>::ret StorageKind; diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index cc7166062..77549e709 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -35,22 +35,28 @@ struct evaluator > EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} }; -// Catch scalar * ( A * B ) and transform it to (A*scalar) * B +// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" // TODO we should apply that rule only if that's really helpful -template -struct evaluator_assume_aliasing, const Product > > +template +struct evaluator_assume_aliasing, + const CwiseNullaryOp, Plain1>, + const Product > > { static const bool value = true; }; -template -struct evaluator, const Product > > - : public evaluator,const Lhs>, Rhs, DefaultProduct> > +template +struct evaluator, + const CwiseNullaryOp, Plain1>, + const Product > > + : public evaluator > { - typedef CwiseUnaryOp, const Product > XprType; - typedef evaluator,const Lhs>, Rhs, DefaultProduct> > Base; - + typedef CwiseBinaryOp, + const CwiseNullaryOp, Plain1>, + const Product > XprType; + typedef evaluator > Base; + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) - : Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs()) + : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) {} }; @@ -122,14 +128,17 @@ protected: PlainObject m_result; }; +// The following three shortcuts are enabled only if the scalar types match excatly. +// TODO: we could enable them for different scalar types when the product is not vectorized. + // Dense = Product template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> -struct Assignment, internal::assign_op, Dense2Dense, +struct Assignment, internal::assign_op, Dense2Dense, typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> { typedef Product SrcXprType; static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::evalTo(dst, src.lhs(), src.rhs()); @@ -138,12 +147,12 @@ struct Assignment, internal::assign_op -struct Assignment, internal::add_assign_op, Dense2Dense, +struct Assignment, internal::add_assign_op, Dense2Dense, typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> { typedef Product SrcXprType; static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::addTo(dst, src.lhs(), src.rhs()); @@ -152,12 +161,12 @@ struct Assignment, internal::add_assign_op< // Dense -= Product template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> -struct Assignment, internal::sub_assign_op, Dense2Dense, +struct Assignment, internal::sub_assign_op, Dense2Dense, typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> { typedef Product SrcXprType; static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::subTo(dst, src.lhs(), src.rhs()); @@ -168,16 +177,17 @@ struct Assignment, internal::sub_assign_op< // Dense ?= scalar * Product // TODO we should apply that rule if that's really helpful // for instance, this is not good for inner products -template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis> -struct Assignment, +template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain> +struct Assignment, const CwiseNullaryOp,Plain>, const Product >, AssignFunc, Dense2Dense, Scalar> { - typedef CwiseUnaryOp, - const Product > SrcXprType; + typedef CwiseBinaryOp, + const CwiseNullaryOp,Plain>, + const Product > SrcXprType; static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) { - call_assignment_no_alias(dst, (src.functor().m_other * src.nestedExpression().lhs())*src.nestedExpression().rhs(), func); + call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func); } }; @@ -187,37 +197,38 @@ struct Assignment" as well. template -struct evaluator_assume_aliasing, const OtherXpr, +struct evaluator_assume_aliasing::Scalar>, const OtherXpr, const Product >, DenseShape > { static const bool value = true; }; -template +template struct assignment_from_xpr_plus_product { - typedef CwiseBinaryOp, const OtherXpr, const ProductType> SrcXprType; + typedef CwiseBinaryOp, const OtherXpr, const ProductType> SrcXprType; + template static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const Func1& func) + void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/) { - call_assignment_no_alias(dst, src.lhs(), func); + call_assignment_no_alias(dst, src.lhs(), Func1()); call_assignment_no_alias(dst, src.rhs(), Func2()); } }; -template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, const OtherXpr, - const Product >, internal::assign_op, Dense2Dense> - : assignment_from_xpr_plus_product, Scalar, internal::assign_op, internal::add_assign_op > +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> +struct Assignment, const OtherXpr, + const Product >, internal::assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, internal::assign_op, internal::add_assign_op > {}; -template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, const OtherXpr, - const Product >, internal::add_assign_op, Dense2Dense> - : assignment_from_xpr_plus_product, Scalar, internal::add_assign_op, internal::add_assign_op > +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> +struct Assignment, const OtherXpr, + const Product >, internal::add_assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, internal::add_assign_op, internal::add_assign_op > {}; -template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, const OtherXpr, - const Product >, internal::sub_assign_op, Dense2Dense> - : assignment_from_xpr_plus_product, Scalar, internal::sub_assign_op, internal::sub_assign_op > +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> +struct Assignment, const OtherXpr, + const Product >, internal::sub_assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, internal::sub_assign_op, internal::sub_assign_op > {}; //---------------------------------------- @@ -369,21 +380,21 @@ struct generic_product_impl { // Same as: dst.noalias() = lhs.lazyProduct(rhs); // but easier on the compiler side - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op()); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op()); } template static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // dst.noalias() += lhs.lazyProduct(rhs); - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); } template static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // dst.noalias() -= lhs.lazyProduct(rhs); - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); } // template @@ -735,7 +746,7 @@ template { - typedef typename scalar_product_traits::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; public: enum { CoeffReadCost = NumTraits::MulCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost, diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 7984cd6e1..b6e8f8887 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -425,7 +425,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::minCoeff() const { - return derived().redux(Eigen::internal::scalar_min_op()); + return derived().redux(Eigen::internal::scalar_min_op()); } /** \returns the maximum of all coefficients of \c *this. @@ -435,7 +435,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::maxCoeff() const { - return derived().redux(Eigen::internal::scalar_max_op()); + return derived().redux(Eigen::internal::scalar_max_op()); } /** \returns the sum of all coefficients of \c *this @@ -450,7 +450,7 @@ DenseBase::sum() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) return Scalar(0); - return derived().redux(Eigen::internal::scalar_sum_op()); + return derived().redux(Eigen::internal::scalar_sum_op()); } /** \returns the mean of all coefficients of *this @@ -465,7 +465,7 @@ DenseBase::mean() const #pragma warning push #pragma warning ( disable : 2259 ) #endif - return Scalar(derived().redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); + return Scalar(derived().redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); #ifdef __INTEL_COMPILER #pragma warning pop #endif diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index 6e94181f3..17065fdd5 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -262,7 +262,7 @@ template class Ref< template EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type) { - internal::call_assignment_no_alias(m_object,expr,internal::assign_op()); + internal::call_assignment_no_alias(m_object,expr,internal::assign_op()); Base::construct(m_object); } diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index 92c541f08..62d4180da 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h @@ -129,7 +129,7 @@ template class SelfAdjointView } friend EIGEN_DEVICE_FUNC - const SelfAdjointView,MatrixType>,UpLo> + const SelfAdjointView operator*(const Scalar& s, const SelfAdjointView& mat) { return (s*mat.nestedExpression()).template selfadjointView(); diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h index 78fff1549..719ed72a5 100644 --- a/Eigen/src/Core/SelfCwiseBinaryOp.h +++ b/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -12,11 +12,13 @@ namespace Eigen { +// TODO generalize the scalar type of 'other' + template EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op()); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op()); return derived(); } @@ -24,7 +26,7 @@ template EIGEN_STRONG_INLINE Derived& ArrayBase::operator+=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op()); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op()); return derived(); } @@ -32,7 +34,7 @@ template EIGEN_STRONG_INLINE Derived& ArrayBase::operator-=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op()); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op()); return derived(); } @@ -40,7 +42,7 @@ template EIGEN_STRONG_INLINE Derived& DenseBase::operator/=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op()); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op()); return derived(); } diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h index ba2ee53b8..038ad5b11 100644 --- a/Eigen/src/Core/Solve.h +++ b/Eigen/src/Core/Solve.h @@ -134,10 +134,10 @@ protected: // Specialization for "dst = dec.solve(rhs)" // NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere template -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, internal::assign_op, Dense2Dense, Scalar> { typedef Solve SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we resize dst here? src.dec()._solve_impl(src.rhs(), dst); @@ -146,10 +146,10 @@ struct Assignment, internal::assign_op -struct Assignment,RhsType>, internal::assign_op, Dense2Dense, Scalar> +struct Assignment,RhsType>, internal::assign_op, Dense2Dense, Scalar> { typedef Solve,RhsType> SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { src.dec().nestedExpression().template _solve_impl_transposed(src.rhs(), dst); } @@ -157,10 +157,11 @@ struct Assignment,RhsType>, internal: // Specialization for "dst = dec.adjoint().solve(rhs)" template -struct Assignment, const Transpose >,RhsType>, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, const Transpose >,RhsType>, + internal::assign_op, Dense2Dense, Scalar> { typedef Solve, const Transpose >,RhsType> SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed(src.rhs(), dst); } diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index f34c7bcda..a657cb854 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -392,17 +392,19 @@ struct igammac_retval { typedef Scalar type; }; -// NOTE: igamma_helper is also used to implement zeta +// NOTE: cephes_helper is also used to implement zeta template -struct igamma_helper { +struct cephes_helper { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar machep() { assert(false && "machep not supported for this type"); return 0.0; } EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar big() { assert(false && "big not supported for this type"); return 0.0; } + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar biginv() { assert(false && "biginv not supported for this type"); return 0.0; } }; template <> -struct igamma_helper { +struct cephes_helper { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE float machep() { return NumTraits::epsilon() / 2; // 1.0 - machep == 1.0 @@ -412,10 +414,15 @@ struct igamma_helper { // use epsneg (1.0 - epsneg == 1.0) return 1.0f / (NumTraits::epsilon() / 2); } + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float biginv() { + // epsneg + return machep(); + } }; template <> -struct igamma_helper { +struct cephes_helper { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE double machep() { return NumTraits::epsilon() / 2; // 1.0 - machep == 1.0 @@ -424,6 +431,11 @@ struct igamma_helper { static EIGEN_STRONG_INLINE double big() { return 1.0 / NumTraits::epsilon(); } + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double biginv() { + // inverse of eps + return NumTraits::epsilon(); + } }; #if !EIGEN_HAS_C99_MATH @@ -538,10 +550,10 @@ struct igammac_impl { const Scalar zero = 0; const Scalar one = 1; const Scalar two = 2; - const Scalar machep = igamma_helper::machep(); + const Scalar machep = cephes_helper::machep(); const Scalar maxlog = numext::log(NumTraits::highest()); - const Scalar big = igamma_helper::big(); - const Scalar biginv = 1 / big; + const Scalar big = cephes_helper::big(); + const Scalar biginv = cephes_helper::biginv(); const Scalar inf = NumTraits::infinity(); Scalar ans, ax, c, yc, r, t, y, z; @@ -590,7 +602,9 @@ struct igammac_impl { qkm2 *= biginv; qkm1 *= biginv; } - if (t <= machep) break; + if (t <= machep) { + break; + } } return (ans * ax); @@ -724,7 +738,7 @@ struct igamma_impl { EIGEN_DEVICE_FUNC static Scalar Impl(Scalar a, Scalar x) { const Scalar zero = 0; const Scalar one = 1; - const Scalar machep = igamma_helper::machep(); + const Scalar machep = cephes_helper::machep(); const Scalar maxlog = numext::log(NumTraits::highest()); Scalar ans, ax, c, r; @@ -746,7 +760,9 @@ struct igamma_impl { r += one; c *= x/r; ans += c; - if (c/ans <= machep) break; + if (c/ans <= machep) { + break; + } } return (ans * ax / a); @@ -899,7 +915,7 @@ struct zeta_impl { const Scalar maxnum = NumTraits::infinity(); const Scalar zero = 0.0, half = 0.5, one = 1.0; - const Scalar machep = igamma_helper::machep(); + const Scalar machep = cephes_helper::machep(); const Scalar nan = NumTraits::quiet_NaN(); if( x == one ) @@ -947,8 +963,9 @@ struct zeta_impl { t = a*b/A[i]; s = s + t; t = numext::abs(t/s); - if( t < machep ) - return s; + if( t < machep ) { + break; + } k += one; a *= x + k; b /= w; @@ -1007,6 +1024,467 @@ struct polygamma_impl { #endif // EIGEN_HAS_C99_MATH +/************************************************************************************************ + * Implementation of betainc (incomplete beta integral), based on Cephes but requires C++11/C99 * + ************************************************************************************************/ + +template +struct betainc_retval { + typedef Scalar type; +}; + +#if !EIGEN_HAS_C99_MATH + +template +struct betainc_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +#else + +template +struct betainc_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(Scalar, Scalar, Scalar) { + /* betaincf.c + * + * Incomplete beta integral + * + * + * SYNOPSIS: + * + * float a, b, x, y, betaincf(); + * + * y = betaincf( a, b, x ); + * + * + * DESCRIPTION: + * + * Returns incomplete beta integral of the arguments, evaluated + * from zero to x. The function is defined as + * + * x + * - - + * | (a+b) | | a-1 b-1 + * ----------- | t (1-t) dt. + * - - | | + * | (a) | (b) - + * 0 + * + * The domain of definition is 0 <= x <= 1. In this + * implementation a and b are restricted to positive values. + * The integral from x to 1 may be obtained by the symmetry + * relation + * + * 1 - betainc( a, b, x ) = betainc( b, a, 1-x ). + * + * The integral is evaluated by a continued fraction expansion. + * If a < 1, the function calls itself recursively after a + * transformation to increase a to a+1. + * + * ACCURACY (float): + * + * Tested at random points (a,b,x) with a and b in the indicated + * interval and x between 0 and 1. + * + * arithmetic domain # trials peak rms + * Relative error: + * IEEE 0,30 10000 3.7e-5 5.1e-6 + * IEEE 0,100 10000 1.7e-4 2.5e-5 + * The useful domain for relative error is limited by underflow + * of the single precision exponential function. + * Absolute error: + * IEEE 0,30 100000 2.2e-5 9.6e-7 + * IEEE 0,100 10000 6.5e-5 3.7e-6 + * + * Larger errors may occur for extreme ratios of a and b. + * + * ACCURACY (double): + * arithmetic domain # trials peak rms + * IEEE 0,5 10000 6.9e-15 4.5e-16 + * IEEE 0,85 250000 2.2e-13 1.7e-14 + * IEEE 0,1000 30000 5.3e-12 6.3e-13 + * IEEE 0,10000 250000 9.3e-11 7.1e-12 + * IEEE 0,100000 10000 8.7e-10 4.8e-11 + * Outputs smaller than the IEEE gradual underflow threshold + * were excluded from these statistics. + * + * ERROR MESSAGES: + * message condition value returned + * incbet domain x<0, x>1 nan + * incbet underflow nan + */ + + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +/* Continued fraction expansion #1 for incomplete beta integral (small_branch = True) + * Continued fraction expansion #2 for incomplete beta integral (small_branch = False) + */ +template +struct incbeta_cfe { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x, bool small_branch) { + EIGEN_STATIC_ASSERT((internal::is_same::value || + internal::is_same::value), + THIS_TYPE_IS_NOT_SUPPORTED); + const Scalar big = cephes_helper::big(); + const Scalar machep = cephes_helper::machep(); + const Scalar biginv = cephes_helper::biginv(); + + const Scalar zero = 0; + const Scalar one = 1; + const Scalar two = 2; + + Scalar xk, pk, pkm1, pkm2, qk, qkm1, qkm2; + Scalar k1, k2, k3, k4, k5, k6, k7, k8, k26update; + Scalar ans; + int n; + + const int num_iters = (internal::is_same::value) ? 100 : 300; + const Scalar thresh = + (internal::is_same::value) ? machep : Scalar(3) * machep; + Scalar r = (internal::is_same::value) ? zero : one; + + if (small_branch) { + k1 = a; + k2 = a + b; + k3 = a; + k4 = a + one; + k5 = one; + k6 = b - one; + k7 = k4; + k8 = a + two; + k26update = one; + } else { + k1 = a; + k2 = b - one; + k3 = a; + k4 = a + one; + k5 = one; + k6 = a + b; + k7 = a + one; + k8 = a + two; + k26update = -one; + x = x / (one - x); + } + + pkm2 = zero; + qkm2 = one; + pkm1 = one; + qkm1 = one; + ans = one; + n = 0; + + do { + xk = -(x * k1 * k2) / (k3 * k4); + pk = pkm1 + pkm2 * xk; + qk = qkm1 + qkm2 * xk; + pkm2 = pkm1; + pkm1 = pk; + qkm2 = qkm1; + qkm1 = qk; + + xk = (x * k5 * k6) / (k7 * k8); + pk = pkm1 + pkm2 * xk; + qk = qkm1 + qkm2 * xk; + pkm2 = pkm1; + pkm1 = pk; + qkm2 = qkm1; + qkm1 = qk; + + if (qk != zero) { + r = pk / qk; + if (numext::abs(ans - r) < numext::abs(r) * thresh) { + return r; + } + ans = r; + } + + k1 += one; + k2 += k26update; + k3 += two; + k4 += two; + k5 += one; + k6 -= k26update; + k7 += two; + k8 += two; + + if ((numext::abs(qk) + numext::abs(pk)) > big) { + pkm2 *= biginv; + pkm1 *= biginv; + qkm2 *= biginv; + qkm1 *= biginv; + } + if ((numext::abs(qk) < biginv) || (numext::abs(pk) < biginv)) { + pkm2 *= big; + pkm1 *= big; + qkm2 *= big; + qkm1 *= big; + } + } while (++n < num_iters); + + return ans; + } +}; + +/* Helper functions depending on the Scalar type */ +template +struct betainc_helper {}; + +template <> +struct betainc_helper { + /* Core implementation, assumes a large (> 1.0) */ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE float incbsa(float aa, float bb, + float xx) { + float ans, a, b, t, x, onemx; + bool reversed_a_b = false; + + onemx = 1.0f - xx; + + /* see if x is greater than the mean */ + if (xx > (aa / (aa + bb))) { + reversed_a_b = true; + a = bb; + b = aa; + t = xx; + x = onemx; + } else { + a = aa; + b = bb; + t = onemx; + x = xx; + } + + /* Choose expansion for optimal convergence */ + if (b > 10.0f) { + if (numext::abs(b * x / a) < 0.3f) { + t = betainc_helper::incbps(a, b, x); + if (reversed_a_b) t = 1.0f - t; + return t; + } + } + + ans = x * (a + b - 2.0f) / (a - 1.0f); + if (ans < 1.0f) { + ans = incbeta_cfe::run(a, b, x, true /* small_branch */); + t = b * numext::log(t); + } else { + ans = incbeta_cfe::run(a, b, x, false /* small_branch */); + t = (b - 1.0f) * numext::log(t); + } + + t += a * numext::log(x) + lgamma_impl::run(a + b) - + lgamma_impl::run(a) - lgamma_impl::run(b); + t += numext::log(ans / a); + t = numext::exp(t); + + if (reversed_a_b) t = 1.0f - t; + return t; + } + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float incbps(float a, float b, float x) { + float t, u, y, s; + const float machep = cephes_helper::machep(); + + y = a * numext::log(x) + (b - 1.0f) * numext::log1p(-x) - numext::log(a); + y -= lgamma_impl::run(a) + lgamma_impl::run(b); + y += lgamma_impl::run(a + b); + + t = x / (1.0f - x); + s = 0.0f; + u = 1.0f; + do { + b -= 1.0f; + if (b == 0.0f) { + break; + } + a += 1.0f; + u *= t * b / a; + s += u; + } while (numext::abs(u) > machep); + + return numext::exp(y) * (1.0f + s); + } +}; + +template <> +struct betainc_impl { + EIGEN_DEVICE_FUNC + static float run(float a, float b, float x) { + const float nan = NumTraits::quiet_NaN(); + float ans, t; + + if (a <= 0.0f) return nan; + if (b <= 0.0f) return nan; + if ((x <= 0.0f) || (x >= 1.0f)) { + if (x == 0.0f) return 0.0f; + if (x == 1.0f) return 1.0f; + // mtherr("betaincf", DOMAIN); + return nan; + } + + /* transformation for small aa */ + if (a <= 1.0f) { + ans = betainc_helper::incbsa(a + 1.0f, b, x); + t = a * numext::log(x) + b * numext::log1p(-x) + + lgamma_impl::run(a + b) - lgamma_impl::run(a + 1.0f) - + lgamma_impl::run(b); + return (ans + numext::exp(t)); + } else { + return betainc_helper::incbsa(a, b, x); + } + } +}; + +template <> +struct betainc_helper { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double incbps(double a, double b, double x) { + const double machep = cephes_helper::machep(); + + double s, t, u, v, n, t1, z, ai; + + ai = 1.0 / a; + u = (1.0 - b) * x; + v = u / (a + 1.0); + t1 = v; + t = u; + n = 2.0; + s = 0.0; + z = machep * ai; + while (numext::abs(v) > z) { + u = (n - b) * x / n; + t *= u; + v = t / (a + n); + s += v; + n += 1.0; + } + s += t1; + s += ai; + + u = a * numext::log(x); + // TODO: gamma() is not directly implemented in Eigen. + /* + if ((a + b) < maxgam && numext::abs(u) < maxlog) { + t = gamma(a + b) / (gamma(a) * gamma(b)); + s = s * t * pow(x, a); + } else { + */ + t = lgamma_impl::run(a + b) - lgamma_impl::run(a) - + lgamma_impl::run(b) + u + numext::log(s); + return s = exp(t); + } +}; + +template <> +struct betainc_impl { + EIGEN_DEVICE_FUNC + static double run(double aa, double bb, double xx) { + const double nan = NumTraits::quiet_NaN(); + const double machep = cephes_helper::machep(); + // const double maxgam = 171.624376956302725; + + double a, b, t, x, xc, w, y; + bool reversed_a_b = false; + + if (aa <= 0.0 || bb <= 0.0) { + return nan; // goto domerr; + } + + if ((xx <= 0.0) || (xx >= 1.0)) { + if (xx == 0.0) return (0.0); + if (xx == 1.0) return (1.0); + // mtherr("incbet", DOMAIN); + return nan; + } + + if ((bb * xx) <= 1.0 && xx <= 0.95) { + return betainc_helper::incbps(aa, bb, xx); + } + + w = 1.0 - xx; + + /* Reverse a and b if x is greater than the mean. */ + if (xx > (aa / (aa + bb))) { + reversed_a_b = true; + a = bb; + b = aa; + xc = xx; + x = w; + } else { + a = aa; + b = bb; + xc = w; + x = xx; + } + + if (reversed_a_b && (b * x) <= 1.0 && x <= 0.95) { + t = betainc_helper::incbps(a, b, x); + if (t <= machep) { + t = 1.0 - machep; + } else { + t = 1.0 - t; + } + return t; + } + + /* Choose expansion for better convergence. */ + y = x * (a + b - 2.0) - (a - 1.0); + if (y < 0.0) { + w = incbeta_cfe::run(a, b, x, true /* small_branch */); + } else { + w = incbeta_cfe::run(a, b, x, false /* small_branch */) / xc; + } + + /* Multiply w by the factor + a b _ _ _ + x (1-x) | (a+b) / ( a | (a) | (b) ) . */ + + y = a * numext::log(x); + t = b * numext::log(xc); + // TODO: gamma is not directly implemented in Eigen. + /* + if ((a + b) < maxgam && numext::abs(y) < maxlog && numext::abs(t) < maxlog) + { + t = pow(xc, b); + t *= pow(x, a); + t /= a; + t *= w; + t *= gamma(a + b) / (gamma(a) * gamma(b)); + } else { + */ + /* Resort to logarithms. */ + y += t + lgamma_impl::run(a + b) - lgamma_impl::run(a) - + lgamma_impl::run(b); + y += numext::log(w / a); + t = numext::exp(y); + + /* } */ + // done: + + if (reversed_a_b) { + if (t <= machep) { + t = 1.0 - machep; + } else { + t = 1.0 - t; + } + } + return t; + } +}; + +#endif // EIGEN_HAS_C99_MATH + } // end namespace internal namespace numext { @@ -1022,7 +1500,7 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(digamma, Scalar) digamma(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(digamma, Scalar)::run(x); } - + template EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(zeta, Scalar) zeta(const Scalar& x, const Scalar& q) { @@ -1059,6 +1537,12 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igammac, Scalar) return EIGEN_MATHFUNC_IMPL(igammac, Scalar)::run(a, x); } +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(betainc, Scalar) + betainc(const Scalar& a, const Scalar& b, const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(betainc, Scalar)::run(a, b, x); +} + } // end namespace numext diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 5c5e5028e..c599e0b32 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -367,14 +367,14 @@ template class TriangularViewImpl<_Mat template EIGEN_DEVICE_FUNC TriangularViewType& operator+=(const DenseBase& other) { - internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); + internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); return derived(); } /** \sa MatrixBase::operator-=() */ template EIGEN_DEVICE_FUNC TriangularViewType& operator-=(const DenseBase& other) { - internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); + internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -552,7 +552,7 @@ template inline TriangularView& TriangularViewImpl::operator=(const MatrixBase& other) { - internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op()); + internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op()); return derived(); } @@ -794,7 +794,7 @@ void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& sr enum { unroll = DstXprType::SizeAtCompileTime != Dynamic && SrcEvaluatorType::CoeffReadCost < HugeCost - && DstXprType::SizeAtCompileTime * SrcEvaluatorType::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT + && DstXprType::SizeAtCompileTime * (DstEvaluatorType::CoeffReadCost+SrcEvaluatorType::CoeffReadCost) / 2 <= EIGEN_UNROLLING_LIMIT }; triangular_assignment_loop::run(kernel); @@ -804,7 +804,7 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src) { - call_triangular_assignment_loop(dst, src, internal::assign_op()); + call_triangular_assignment_loop(dst, src, internal::assign_op()); } template<> struct AssignmentKind { typedef Triangular2Triangular Kind; }; @@ -933,10 +933,10 @@ namespace internal { // Triangular = Product template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, internal::assign_op, Dense2Triangular, Scalar> +struct Assignment, internal::assign_op::Scalar>, Dense2Triangular, Scalar> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst.setZero(); dst._assignProduct(src, 1); @@ -945,10 +945,10 @@ struct Assignment, internal::assign_ // Triangular += Product template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, internal::add_assign_op, Dense2Triangular, Scalar> +struct Assignment, internal::add_assign_op::Scalar>, Dense2Triangular, Scalar> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { dst._assignProduct(src, 1); } @@ -956,10 +956,10 @@ struct Assignment, internal::add_ass // Triangular -= Product template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, internal::sub_assign_op, Dense2Triangular, Scalar> +struct Assignment, internal::sub_assign_op::Scalar>, Dense2Triangular, Scalar> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { dst._assignProduct(src, -1); } diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index 193891189..00a4a8c39 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -540,7 +540,7 @@ template class VectorwiseOp /** Returns the expression of the sum of the vector \a other to each subvector of \c *this */ template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC - CwiseBinaryOp, + CwiseBinaryOp, const ExpressionTypeNestedCleaned, const typename ExtendedType::Type> operator+(const DenseBase& other) const @@ -553,7 +553,7 @@ template class VectorwiseOp /** Returns the expression of the difference between each subvector of \c *this and the vector \a other */ template EIGEN_DEVICE_FUNC - CwiseBinaryOp, + CwiseBinaryOp, const ExpressionTypeNestedCleaned, const typename ExtendedType::Type> operator-(const DenseBase& other) const diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index d4ce2eaf9..87bdbfd1e 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -480,6 +480,9 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen: template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen::half& a, const Eigen::half& x) { return Eigen::half(Eigen::numext::igammac(static_cast(a), static_cast(x))); } +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half betainc(const Eigen::half& a, const Eigen::half& b, const Eigen::half& x) { + return Eigen::half(Eigen::numext::betainc(static_cast(a), static_cast(b), static_cast(x))); +} #endif } // end namespace numext diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index c90ec96a0..8b5e8204f 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -181,6 +181,24 @@ double2 pigammac(const double2& a, const double2& x) return make_double2(igammac(a.x, x.x), igammac(a.y, x.y)); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pbetainc(const float4& a, const float4& b, const float4& x) +{ + using numext::betainc; + return make_float4( + betainc(a.x, b.x, x.x), + betainc(a.y, b.y, x.y), + betainc(a.z, b.z, x.z), + betainc(a.w, b.w, x.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pbetainc(const double2& a, const double2& b, const double2& x) +{ + using numext::betainc; + return make_double2(betainc(a.x, b.x, x.x), betainc(a.y, b.y, x.y)); +} + #endif } // end namespace internal diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 25a59066c..ad66399e0 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -44,8 +44,9 @@ template<> struct packet_traits : default_packet_traits HasPolygamma = 1, HasErf = 1, HasErfc = 1, - HasIgamma = 1, + HasIGamma = 1, HasIGammac = 1, + HasBetaInc = 1, HasBlend = 0, }; @@ -68,10 +69,13 @@ template<> struct packet_traits : default_packet_traits HasRsqrt = 1, HasLGamma = 1, HasDiGamma = 1, + HasZeta = 1, + HasPolygamma = 1, HasErf = 1, HasErfc = 1, HasIGamma = 1, HasIGammac = 1, + HasBetaInc = 1, HasBlend = 0, }; diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index 51386506f..959dff886 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -28,6 +28,8 @@ template<> struct packet_traits : default_packet_traits AlignedOnScalar = 1, size=2, HasHalfPacket = 0, + HasAdd = 1, + HasMul = 1, HasDiv = 1, HasSqrt = 1, HasRsqrt = 1, diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index d2d467936..ccc00e5a6 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -14,8 +14,9 @@ namespace Eigen { namespace internal { -static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000); -static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000); +const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; +static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA ); +static uint32x2_t p2ui_CONJ_XOR = vld1_u32( conj_XOR_DATA ); //---------- float ---------- struct Packet2cf @@ -274,7 +275,8 @@ ptranspose(PacketBlock& kernel) { //---------- double ---------- #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG -static uint64x2_t p2ul_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x0, 0x8000000000000000); +const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 }; +static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA ); struct Packet1cd { diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index deb2d7e42..e1247696d 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -49,17 +49,6 @@ typedef uint32x4_t Packet4ui; #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ const Packet4i p4i_##NAME = pset1(X) -#if EIGEN_COMP_LLVM && !EIGEN_COMP_CLANG - //Special treatment for Apple's llvm-gcc, its NEON packet types are unions - #define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}} - #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}} -#else - //Default initializer for packets - #define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y} - #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W} -#endif - - // arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function // which available on LLVM and GCC (at least) #if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC @@ -122,12 +111,14 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { - Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); + const float32_t f[] = {0, 1, 2, 3}; + Packet4f countdown = vld1q_f32(f); return vaddq_f32(pset1(a), countdown); } template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { - Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); + const int32_t i[] = {0, 1, 2, 3}; + Packet4i countdown = vld1q_s32(i); return vaddq_s32(pset1(a), countdown); } @@ -585,7 +576,8 @@ template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { r template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) { - Packet2d countdown = EIGEN_INIT_NEON_PACKET2(0, 1); + const double countdown_raw[] = {0.0,1.0}; + const Packet2d countdown = vld1q_f64(countdown_raw); return vaddq_f64(pset1(a), countdown); } template<> EIGEN_STRONG_INLINE Packet2d padd(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); } diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h index 51fef50e8..9b373c783 100644 --- a/Eigen/src/Core/functors/AssignmentFunctors.h +++ b/Eigen/src/Core/functors/AssignmentFunctors.h @@ -18,20 +18,24 @@ namespace internal { * \brief Template functor for scalar/packet assignment * */ -template struct assign_op { +template struct assign_op { EIGEN_EMPTY_STRUCT_CTOR(assign_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a = b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; } template - EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const - { internal::pstoret(a,b); } + EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const + { internal::pstoret(a,b); } }; -template -struct functor_traits > { + +// Empty overload for void type (used by PermutationMatrix +template struct assign_op {}; + +template +struct functor_traits > { enum { - Cost = NumTraits::ReadCost, - PacketAccess = packet_traits::Vectorizable + Cost = NumTraits::ReadCost, + PacketAccess = is_same::value && packet_traits::Vectorizable && packet_traits::Vectorizable }; }; @@ -39,20 +43,20 @@ struct functor_traits > { * \brief Template functor for scalar/packet assignment with addition * */ -template struct add_assign_op { +template struct add_assign_op { EIGEN_EMPTY_STRUCT_CTOR(add_assign_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a += b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a += b; } template - EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const - { internal::pstoret(a,internal::padd(internal::ploadt(a),b)); } + EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const + { internal::pstoret(a,internal::padd(internal::ploadt(a),b)); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::ReadCost + NumTraits::AddCost, - PacketAccess = packet_traits::HasAdd + Cost = NumTraits::ReadCost + NumTraits::AddCost, + PacketAccess = is_same::value && packet_traits::HasAdd }; }; @@ -60,20 +64,20 @@ struct functor_traits > { * \brief Template functor for scalar/packet assignment with subtraction * */ -template struct sub_assign_op { +template struct sub_assign_op { EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a -= b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a -= b; } template - EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const - { internal::pstoret(a,internal::psub(internal::ploadt(a),b)); } + EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const + { internal::pstoret(a,internal::psub(internal::ploadt(a),b)); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::ReadCost + NumTraits::AddCost, - PacketAccess = packet_traits::HasSub + Cost = NumTraits::ReadCost + NumTraits::AddCost, + PacketAccess = is_same::value && packet_traits::HasSub }; }; @@ -98,7 +102,6 @@ struct functor_traits > { PacketAccess = is_same::value && packet_traits::HasMul }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; /** \internal * \brief Template functor for scalar/packet assignment with diviving @@ -120,7 +123,6 @@ struct functor_traits > { PacketAccess = is_same::value && packet_traits::HasDiv }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; /** \internal * \brief Template functor for scalar/packet assignment with swapping diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 5cd8ca950..2c1331208 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -16,27 +16,43 @@ namespace internal { //---------- associative binary functors ---------- +template +struct binary_op_base +{ + typedef Arg1 first_argument_type; + typedef Arg2 second_argument_type; +}; + /** \internal * \brief Template functor to compute the sum of two scalars * * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, DenseBase::sum() */ -template struct scalar_sum_op { -// typedef Scalar result_type; +template +struct scalar_sum_op : binary_op_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; } +#else + scalar_sum_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::padd(a,b); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const { return internal::predux(a); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasAdd + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, // rough estimate! + PacketAccess = is_same::value && packet_traits::HasAdd && packet_traits::HasAdd + // TODO vectorize mixed sum }; }; @@ -45,7 +61,7 @@ struct functor_traits > { * This is required to solve Bug 426. * \sa DenseBase::count(), DenseBase::any(), ArrayBase::cast(), MatrixBase::cast() */ -template<> struct scalar_sum_op : scalar_sum_op { +template<> struct scalar_sum_op : scalar_sum_op { EIGEN_DEPRECATED scalar_sum_op() {} }; @@ -56,13 +72,17 @@ template<> struct scalar_sum_op : scalar_sum_op { * * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux() */ -template struct scalar_product_op { - enum { - // TODO vectorize mixed product - Vectorizable = is_same::value && packet_traits::HasMul && packet_traits::HasMul - }; - typedef typename scalar_product_traits::ReturnType result_type; +template +struct scalar_product_op : binary_op_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) +#else + scalar_product_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const @@ -75,7 +95,8 @@ template struct functor_traits > { enum { Cost = (NumTraits::MulCost + NumTraits::MulCost)/2, // rough estimate! - PacketAccess = scalar_product_op::Vectorizable + PacketAccess = is_same::value && packet_traits::HasMul && packet_traits::HasMul + // TODO vectorize mixed product }; }; @@ -84,13 +105,15 @@ struct functor_traits > { * * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y) */ -template struct scalar_conj_product_op { +template +struct scalar_conj_product_op : binary_op_base +{ enum { Conj = NumTraits::IsComplex }; - typedef typename scalar_product_traits::ReturnType result_type; + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const @@ -113,21 +136,24 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff() */ -template struct scalar_min_op { +template +struct scalar_min_op : binary_op_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::mini(a, b); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::mini(a, b); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmin(a,b); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const { return internal::predux_min(a); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasMin + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, + PacketAccess = internal::is_same::value && packet_traits::HasMin }; }; @@ -136,21 +162,24 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff() */ -template struct scalar_max_op { +template +struct scalar_max_op : binary_op_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::maxi(a, b); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::maxi(a, b); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmax(a,b); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const { return internal::predux_max(a); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasMax + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, + PacketAccess = internal::is_same::value && packet_traits::HasMax }; }; @@ -158,56 +187,70 @@ struct functor_traits > { * \brief Template functors for comparison of two scalars * \todo Implement packet-comparisons */ -template struct scalar_cmp_op; +template struct scalar_cmp_op; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, PacketAccess = false }; }; -template -struct result_of(Scalar,Scalar)> { +template +struct result_of(LhsScalar,RhsScalar)> { typedef bool type; }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<=b;} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>b;} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>=b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>=b;} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return !(a<=b || b<=a);} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a!=b;} }; @@ -216,7 +259,9 @@ template struct scalar_cmp_op { * * \sa MatrixBase::stableNorm(), class Redux */ -template struct scalar_hypot_op { +template +struct scalar_hypot_op : binary_op_base +{ EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) // typedef typename NumTraits::Real result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const @@ -237,7 +282,7 @@ template struct scalar_hypot_op { } }; template -struct functor_traits > { +struct functor_traits > { enum { Cost = 3 * NumTraits::AddCost + @@ -250,13 +295,24 @@ struct functor_traits > { /** \internal * \brief Template functor to compute the pow of two scalars */ -template struct scalar_binary_pow_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op) +template +struct scalar_pow_op : binary_op_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN + EIGEN_EMPTY_STRUCT_CTOR(scalar_pow_op) +#else + scalar_pow_op() { + typedef Scalar LhsScalar; + typedef Exponent RhsScalar; + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif EIGEN_DEVICE_FUNC - inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); } + inline result_type operator() (const Scalar& a, const Exponent& b) const { return numext::pow(a, b); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; @@ -269,18 +325,27 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::operator- */ -template struct scalar_difference_op { +template +struct scalar_difference_op : binary_op_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; } +#else + scalar_difference_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::psub(a,b); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasSub + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, + PacketAccess = is_same::value && packet_traits::HasSub && packet_traits::HasSub }; }; @@ -289,13 +354,17 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, Cwise::operator/() */ -template struct scalar_quotient_op { - enum { - // TODO vectorize mixed product - Vectorizable = is_same::value && packet_traits::HasDiv && packet_traits::HasDiv - }; - typedef typename scalar_product_traits::ReturnType result_type; +template +struct scalar_quotient_op : binary_op_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) +#else + scalar_quotient_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const @@ -305,7 +374,7 @@ template struct functor_traits > { typedef typename scalar_quotient_op::result_type result_type; enum { - PacketAccess = scalar_quotient_op::Vectorizable, + PacketAccess = is_same::value && packet_traits::HasDiv && packet_traits::HasDiv, Cost = NumTraits::template Div::Cost }; }; @@ -365,14 +434,15 @@ template<> struct functor_traits { * * \sa class CwiseBinaryOp, Cwise::igamma */ -template struct scalar_igamma_op { +template struct scalar_igamma_op : binary_op_base +{ EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { using numext::igamma; return igamma(a, x); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const { - return internal::pigammac(a, x); + return internal::pigamma(a, x); } }; template @@ -390,7 +460,8 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, Cwise::igammac */ -template struct scalar_igammac_op { +template struct scalar_igammac_op : binary_op_base +{ EIGEN_EMPTY_STRUCT_CTOR(scalar_igammac_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { using numext::igammac; return igammac(a, x); @@ -413,183 +484,46 @@ struct functor_traits > { //---------- binary functors bound to a constant, thus appearing as a unary functor ---------- -/** \internal - * \brief Template functor to multiply a scalar by a fixed other one - * - * \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/ - */ -/* NOTE why doing the pset1() in packetOp *is* an optimization ? - * indeed it seems better to declare m_other as a Packet and do the pset1() once - * in the constructor. However, in practice: - * - GCC does not like m_other as a Packet and generate a load every time it needs it - * - on the other hand GCC is able to moves the pset1() outside the loop :) - * - simpler code ;) - * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y) - */ -template -struct scalar_multiple_op { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pmul(a, pset1(m_other)); } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; +// The following two classes permits to turn any binary functor into a unary one with one argument bound to a constant value. +// They are analogues to std::binder1st/binder2nd but with the following differences: +// - they are compatible with packetOp +// - they are portable across C++ versions (the std::binder* are deprecated in C++11) +template struct bind1st_op : BinaryOp { -template -struct scalar_multiple2_op { - typedef typename scalar_product_traits::ReturnType result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; + typedef typename BinaryOp::first_argument_type first_argument_type; + typedef typename BinaryOp::second_argument_type second_argument_type; + typedef typename BinaryOp::result_type result_type; -/** \internal - * \brief Template functor to divide a scalar by a fixed other one - * - * This functor is used to implement the quotient of a matrix by - * a scalar where the scalar type is not necessarily a floating point type. - * - * \sa class CwiseUnaryOp, MatrixBase::operator/ - */ -template -struct scalar_quotient1_op { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pdiv(a, pset1(m_other)); } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; + bind1st_op(const first_argument_type &val) : m_value(val) {} -template -struct scalar_quotient2_op { - typedef typename scalar_product_traits::ReturnType result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const scalar_quotient2_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const Scalar2& other) : m_other(other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a / m_other; } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = false }; }; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const second_argument_type& b) const { return BinaryOp::operator()(m_value,b); } -// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication -// where the mixing of different types is handled by scalar_product_traits -// In particular, real * complex is allowed. -// FIXME move this to functor_traits adding a functor_default -template struct functor_is_product_like { enum { ret = 0 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; - - -/** \internal - * \brief Template functor to add a scalar to a fixed other one - * \sa class CwiseUnaryOp, Array::operator+ - */ -/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */ -template -struct scalar_add_op { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_DEVICE_FUNC inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC inline scalar_add_op(const Scalar& other) : m_other(other) { } - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a + m_other; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::padd(a, pset1(m_other)); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; - -/** \internal - * \brief Template functor to subtract a fixed scalar to another one - * \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_rsub_op - */ -template -struct scalar_sub_op { - EIGEN_DEVICE_FUNC inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC inline scalar_sub_op(const Scalar& other) : m_other(other) { } - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a - m_other; } - template - EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const - { return internal::psub(a, pset1(m_other)); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; - -/** \internal - * \brief Template functor to subtract a scalar to fixed another one - * \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_sub_op - */ -template -struct scalar_rsub_op { - EIGEN_DEVICE_FUNC inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC inline scalar_rsub_op(const Scalar& other) : m_other(other) { } - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other - a; } - template - EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const - { return internal::psub(pset1(m_other), a); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; - -/** \internal - * \brief Template functor to raise a scalar to a power - * \sa class CwiseUnaryOp, Cwise::pow - */ -template -struct scalar_pow_op { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_DEVICE_FUNC inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { } - EIGEN_DEVICE_FUNC inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {} - EIGEN_DEVICE_FUNC - inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); } - const Scalar m_exponent; -}; -template -struct functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to compute the quotient between a scalar and array entries. - * \sa class CwiseUnaryOp, Cwise::inverse() - */ -template -struct scalar_inverse_mult_op { - EIGEN_DEVICE_FUNC scalar_inverse_mult_op(const Scalar& other) : m_other(other) {} - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other / a; } template - EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const - { return internal::pdiv(pset1(m_other),a); } - Scalar m_other; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& b) const + { return BinaryOp::packetOp(internal::pset1(m_value), b); } + + first_argument_type m_value; }; -template -struct functor_traits > -{ enum { PacketAccess = packet_traits::HasDiv, Cost = NumTraits::template Div::Cost }; }; +template struct functor_traits > : functor_traits {}; + + +template struct bind2nd_op : BinaryOp { + + typedef typename BinaryOp::first_argument_type first_argument_type; + typedef typename BinaryOp::second_argument_type second_argument_type; + typedef typename BinaryOp::result_type result_type; + + bind2nd_op(const second_argument_type &val) : m_value(val) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const first_argument_type& a) const { return BinaryOp::operator()(a,m_value); } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + { return BinaryOp::packetOp(a,internal::pset1(m_value)); } + + second_argument_type m_value; +}; +template struct functor_traits > : functor_traits {}; } // end namespace internal diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index 78cc22277..eaa582f23 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -26,7 +26,8 @@ struct scalar_constant_op { }; template struct functor_traits > -{ enum { Cost = 1, PacketAccess = packet_traits::Vectorizable, IsRepeatable = true }; }; +{ enum { Cost = 0 /* as the constant value should be loaded in register only once for the whole expression */, + PacketAccess = packet_traits::Vectorizable, IsRepeatable = true }; }; template struct scalar_identity_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op) diff --git a/Eigen/src/Core/functors/TernaryFunctors.h b/Eigen/src/Core/functors/TernaryFunctors.h new file mode 100644 index 000000000..8b9e53062 --- /dev/null +++ b/Eigen/src/Core/functors/TernaryFunctors.h @@ -0,0 +1,47 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TERNARY_FUNCTORS_H +#define EIGEN_TERNARY_FUNCTORS_H + +namespace Eigen { + +namespace internal { + +//---------- associative ternary functors ---------- + +/** \internal + * \brief Template functor to compute the incomplete beta integral betainc(a, b, x) + * + */ +template struct scalar_betainc_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_betainc_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& x, const Scalar& a, const Scalar& b) const { + using numext::betainc; return betainc(x, a, b); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& x, const Packet& a, const Packet& b) const + { + return internal::pbetainc(x, a, b); + } +}; +template +struct functor_traits > { + enum { + // Guesstimate + Cost = 400 * NumTraits::MulCost + 400 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBetaInc + }; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TERNARY_FUNCTORS_H diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 253c03462..63a9fc462 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -363,7 +363,7 @@ class gebp_traits public: typedef _LhsScalar LhsScalar; typedef _RhsScalar RhsScalar; - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { ConjLhs = _ConjLhs, @@ -478,7 +478,7 @@ class gebp_traits, RealScalar, _ConjLhs, false> public: typedef std::complex LhsScalar; typedef RealScalar RhsScalar; - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { ConjLhs = _ConjLhs, diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 7528fef24..b1465c3b5 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -25,7 +25,7 @@ struct general_matrix_matrix_product Traits; - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run( Index rows, Index cols, Index depth, const LhsScalar* lhs, Index lhsStride, @@ -55,7 +55,7 @@ struct general_matrix_matrix_product Traits; -typedef typename scalar_product_traits::ReturnType ResScalar; +typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static void run(Index rows, Index cols, Index depth, const LhsScalar* _lhs, Index lhsStride, const RhsScalar* _rhs, Index rhsStride, diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index 80ba89465..29d6dc721 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -40,7 +40,7 @@ template struct general_matrix_matrix_triangular_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha, level3_blocking& blocking) @@ -57,7 +57,7 @@ template struct general_matrix_matrix_triangular_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, const ResScalar& alpha, level3_blocking& blocking) diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index fc8886511..4a5cf3fb6 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -58,7 +58,7 @@ namespace internal { template struct general_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { Vectorizable = packet_traits::Vectorizable && packet_traits::Vectorizable @@ -334,7 +334,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product struct general_matrix_vector_product { -typedef typename scalar_product_traits::ReturnType ResScalar; +typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { Vectorizable = packet_traits::Vectorizable && packet_traits::Vectorizable diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index f79840aa7..c11a983c7 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -20,7 +20,7 @@ struct triangular_matrix_vector_product; template struct triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = ((Mode&Lower)==Lower), HasUnitDiag = (Mode & UnitDiag)==UnitDiag, @@ -91,7 +91,7 @@ EIGEN_DONT_INLINE void triangular_matrix_vector_product struct triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = ((Mode&Lower)==Lower), HasUnitDiag = (Mode & UnitDiag)==UnitDiag, diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index c163f1458..a85ad558f 100755 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -293,16 +293,27 @@ struct blas_traits, NestedXpr> > }; // pop scalar multiple -template -struct blas_traits, NestedXpr> > +template +struct blas_traits, const CwiseNullaryOp,Plain>, NestedXpr> > : blas_traits { typedef blas_traits Base; - typedef CwiseUnaryOp, NestedXpr> XprType; + typedef CwiseBinaryOp, const CwiseNullaryOp,Plain>, NestedXpr> XprType; typedef typename Base::ExtractType ExtractType; - static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); } + static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); } static inline Scalar extractScalarFactor(const XprType& x) - { return x.functor().m_other * Base::extractScalarFactor(x.nestedExpression()); } + { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); } +}; +template +struct blas_traits, NestedXpr, const CwiseNullaryOp,Plain> > > + : blas_traits +{ + typedef blas_traits Base; + typedef CwiseBinaryOp, NestedXpr, const CwiseNullaryOp,Plain> > XprType; + typedef typename Base::ExtractType ExtractType; + static inline ExtractType extract(const XprType& x) { return Base::extract(x.lhs()); } + static inline Scalar extractScalarFactor(const XprType& x) + { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; } }; // pop opposite diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index a102e5457..830f20f90 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -91,6 +91,7 @@ template class CwiseNullaryOp; template class CwiseUnaryOp; template class CwiseUnaryView; template class CwiseBinaryOp; +template class CwiseTernaryOp; template class Solve; template class Inverse; @@ -130,6 +131,7 @@ template class ArrayWrapper; template class MatrixWrapper; template class SolverBase; template class InnerIterator; +template struct ScalarBinaryOpTraits; namespace internal { template struct kernel_retval_base; @@ -174,9 +176,11 @@ namespace internal { // with optional conjugation of the arguments. template struct conj_helper; -template struct scalar_sum_op; -template struct scalar_difference_op; -template struct scalar_conj_product_op; +template struct scalar_sum_op; +template struct scalar_difference_op; +template struct scalar_conj_product_op; +template struct scalar_min_op; +template struct scalar_max_op; template struct scalar_opposite_op; template struct scalar_conjugate_op; template struct scalar_real_op; @@ -192,27 +196,22 @@ template struct scalar_sin_op; template struct scalar_acos_op; template struct scalar_asin_op; template struct scalar_tan_op; -template struct scalar_pow_op; template struct scalar_inverse_op; template struct scalar_square_op; template struct scalar_cube_op; template struct scalar_cast_op; -template struct scalar_multiple_op; -template struct scalar_quotient1_op; -template struct scalar_min_op; -template struct scalar_max_op; template struct scalar_random_op; -template struct scalar_add_op; template struct scalar_constant_op; template struct scalar_identity_op; template struct scalar_sign_op; template struct scalar_igamma_op; template struct scalar_igammac_op; +template struct scalar_betainc_op; +template struct scalar_pow_op; +template struct scalar_hypot_op; template struct scalar_product_op; -template struct scalar_multiple2_op; template struct scalar_quotient_op; -template struct scalar_quotient2_op; } // end namespace internal diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index c9a0b9893..6de21d2bb 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -462,6 +462,8 @@ #define EIGEN_CAT2(a,b) a ## b #define EIGEN_CAT(a,b) EIGEN_CAT2(a,b) +#define EIGEN_COMMA , + // convert a token to a string #define EIGEN_MAKESTRING2(a) #a #define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a) @@ -876,18 +878,10 @@ namespace Eigen { #define EIGEN_IMPLIES(a,b) (!(a) || (b)) -#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR) \ - template \ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> \ - (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ - { \ - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); \ - } - -// the expression type of a cwise product -#define EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS) \ +// the expression type of a standard coefficient wise binary operation +#define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \ CwiseBinaryOp< \ - internal::scalar_product_op< \ + EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)< \ typename internal::traits::Scalar, \ typename internal::traits::Scalar \ >, \ @@ -895,6 +889,45 @@ namespace Eigen { const RHS \ > +#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,OPNAME) \ + template \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME) \ + (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ + { \ + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME)(derived(), other.derived()); \ + } + +#define EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(EXPR,SCALAR,OPNAME) \ + CwiseBinaryOp::Scalar,SCALAR>, const EXPR, \ + const typename internal::plain_constant_type::type> + +#define EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(SCALAR,EXPR,OPNAME) \ + CwiseBinaryOp::Scalar>, \ + const typename internal::plain_constant_type::type, const EXPR> + +#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \ + template EIGEN_DEVICE_FUNC inline \ + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg >::Defined>::type,OPNAME) \ + (METHOD)(const T& scalar) const { \ + typedef typename internal::promote_scalar_arg >::Defined>::type PromotedT; \ + return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedT,OPNAME)(derived(), \ + typename internal::plain_constant_type::type(derived().rows(), derived().cols(), internal::scalar_constant_op(scalar))); \ + } + +#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \ + template EIGEN_DEVICE_FUNC inline friend \ + const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg >::Defined>::type,Derived,OPNAME) \ + (METHOD)(const T& scalar, const StorageBaseType& matrix) { \ + typedef typename internal::promote_scalar_arg >::Defined>::type PromotedT; \ + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedT,Derived,OPNAME)( \ + typename internal::plain_constant_type::type(matrix.derived().rows(), matrix.derived().cols(), internal::scalar_constant_op(scalar)), matrix.derived()); \ + } + +#define EIGEN_MAKE_SCALAR_BINARY_OP(METHOD,OPNAME) \ + EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \ + EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) + + #ifdef EIGEN_EXCEPTIONS # define EIGEN_THROW_X(X) throw X # define EIGEN_THROW throw diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 7ecd59add..a4a491ff8 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -328,6 +328,30 @@ struct result_of { enum {FunctorType = sizeof(testFunctor(static_cast(0)))}; typedef typename binary_result_of_select::type type; }; + +template +struct ternary_result_of_select {typedef typename internal::remove_all::type type;}; + +template +struct ternary_result_of_select +{typedef typename Func::result_type type;}; + +template +struct ternary_result_of_select +{typedef typename Func::template result::type type;}; + +template +struct result_of { + template + static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); + template + static has_tr1_result testFunctor(T const *, typename T::template result::type const * = 0); + static has_none testFunctor(...); + + // note that the following indirection is needed for gcc-3.3 + enum {FunctorType = sizeof(testFunctor(static_cast(0)))}; + typedef typename ternary_result_of_select::type type; +}; #endif /** \internal In short, it computes int(sqrt(\a Y)) with \a Y an integer. @@ -375,33 +399,6 @@ template struct scalar_product_traits enum { Defined = 0 }; }; -template struct scalar_product_traits -{ - enum { - // Cost = NumTraits::MulCost, - Defined = 1 - }; - typedef T ReturnType; -}; - -template struct scalar_product_traits > -{ - enum { - // Cost = 2*NumTraits::MulCost, - Defined = 1 - }; - typedef std::complex ReturnType; -}; - -template struct scalar_product_traits, T> -{ - enum { - // Cost = 2*NumTraits::MulCost, - Defined = 1 - }; - typedef std::complex ReturnType; -}; - // FIXME quick workaround around current limitation of result_of // template // struct result_of(ArgType0,ArgType1)> { @@ -434,6 +431,67 @@ T div_ceil(const T &a, const T &b) } // end namespace numext + +/** \class ScalarBinaryOpTraits + * \ingroup Core_Module + * + * \brief Determines whether the given binary operation of two numeric types is allowed and what the scalar return type is. + * + * \sa CwiseBinaryOp + */ +template +struct ScalarBinaryOpTraits +#ifndef EIGEN_PARSED_BY_DOXYGEN + // for backward compatibility, use the hints given by the (deprecated) internal::scalar_product_traits class. + : internal::scalar_product_traits +#endif // EIGEN_PARSED_BY_DOXYGEN +{}; + +template +struct ScalarBinaryOpTraits +{ + enum { Defined = 1 }; + typedef T ReturnType; +}; + +// For Matrix * Permutation +template +struct ScalarBinaryOpTraits +{ + enum { Defined = 1 }; + typedef T ReturnType; +}; + +// For Permutation * Matrix +template +struct ScalarBinaryOpTraits +{ + enum { Defined = 1 }; + typedef T ReturnType; +}; + +// for Permutation*Permutation +template +struct ScalarBinaryOpTraits +{ + enum { Defined = 1 }; + typedef void ReturnType; +}; + +template +struct ScalarBinaryOpTraits,BinaryOp> +{ + enum { Defined = 1 }; + typedef std::complex ReturnType; +}; + +template +struct ScalarBinaryOpTraits, T,BinaryOp> +{ + enum { Defined = 1 }; + typedef std::complex ReturnType; +}; + } // end namespace Eigen #endif // EIGEN_META_H diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 6faaf889a..bee80ca38 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -98,7 +98,9 @@ EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE, THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS, MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY, - THIS_TYPE_IS_NOT_SUPPORTED + THIS_TYPE_IS_NOT_SUPPORTED, + STORAGE_KIND_MUST_MATCH, + STORAGE_INDEX_MUST_MATCH }; }; diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 3605de6fd..b372ac1ad 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -45,6 +45,34 @@ inline IndexDest convert_index(const IndexSrc& idx) { } +// promote_scalar_arg is an helper used in operation between an expression and a scalar, like: +// expression * scalar +// Its role is to determine how the type T of the scalar operand should be promoted given the scalar type ExprScalar of the given expression. +// The IsSupported template parameter must be provided by the caller as: ScalarBinaryOpTraits::Defined using the proper order for ExprScalar and T. +// Then the logic is as follows: +// - if the operation is natively supported as defined by IsSupported, then the scalar type is not promoted, and T is returned. +// - otherwise, NumTraits::Literal is returned if T is implicitly convertible to NumTraits::Literal AND that this does not imply a float to integer conversion. +// - In all other cases, the promoted type is not defined, and the respective operation is thus invalid and not available (SFINAE). +template::Literal>::value, + bool IsSafe = NumTraits::IsInteger || !NumTraits::Literal>::IsInteger> +struct promote_scalar_arg +{ +}; + +template +struct promote_scalar_arg +{ + typedef T type; +}; + +template +struct promote_scalar_arg +{ + typedef typename NumTraits::Literal type; +}; + //classes inheriting no_assignment_operator don't generate a default operator=. class no_assignment_operator { @@ -576,6 +604,20 @@ struct plain_diag_type >::type type; }; +template +struct plain_constant_type +{ + enum { Options = (traits::Flags&RowMajorBit)?RowMajor:0 }; + + typedef Array::RowsAtCompileTime, traits::ColsAtCompileTime, + Options, traits::MaxRowsAtCompileTime,traits::MaxColsAtCompileTime> array_type; + + typedef Matrix::RowsAtCompileTime, traits::ColsAtCompileTime, + Options, traits::MaxRowsAtCompileTime,traits::MaxColsAtCompileTime> matrix_type; + + typedef CwiseNullaryOp, const typename conditional::XprKind, MatrixXpr >::value, matrix_type, array_type>::type > type; +}; + template struct is_lvalue { @@ -610,11 +652,6 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if struct is_same_or_void { enum { value = is_same::value }; }; -template struct is_same_or_void { enum { value = 1 }; }; -template struct is_same_or_void { enum { value = 1 }; }; -template<> struct is_same_or_void { enum { value = 1 }; }; - #ifdef EIGEN_DEBUG_ASSIGN std::string demangle_traversal(int t) { @@ -649,17 +686,12 @@ std::string demangle_flags(int f) } // end namespace internal -// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor -// that would take two operands of different types. If there were such an example, then this check should be -// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as -// currently they take only one typename Scalar template parameter. +// We require Lhs and Rhs to have "compatible" scalar types. // It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths. // So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to // add together a float matrix and a double matrix. #define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \ - EIGEN_STATIC_ASSERT((internal::functor_is_product_like::ret \ - ? int(internal::scalar_product_traits::Defined) \ - : int(internal::is_same_or_void::value)), \ + EIGEN_STATIC_ASSERT(int(ScalarBinaryOpTraits::Defined), \ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) } // end namespace Eigen diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index a9d6790d5..650617ca7 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -327,13 +327,22 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp } else { - Scalar p = Scalar(0.5) * (m_matS.coeff(i, i) - m_matS.coeff(i+1, i+1)); - Scalar z = sqrt(abs(p * p + m_matS.coeff(i+1, i) * m_matS.coeff(i, i+1))); - m_alphas.coeffRef(i) = ComplexScalar(m_matS.coeff(i+1, i+1) + p, z); - m_alphas.coeffRef(i+1) = ComplexScalar(m_matS.coeff(i+1, i+1) + p, -z); + // We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a positive diagonal 2x2 block T + // Then taking beta=T_00*T_11, we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00): - m_betas.coeffRef(i) = m_realQZ.matrixT().coeff(i,i); - m_betas.coeffRef(i+1) = m_realQZ.matrixT().coeff(i,i); + // T = [a 0] + // [0 b] + RealScalar a = m_realQZ.matrixT().coeff(i, i), b = m_realQZ.matrixT().coeff(i+1, i+1); + Matrix S2 = m_matS.template block<2,2>(i,i) * Matrix(b,a).asDiagonal(); + + Scalar p = Scalar(0.5) * (S2.coeff(0,0) - S2.coeff(1,1)); + Scalar z = sqrt(abs(p * p + S2.coeff(1,0) * S2.coeff(0,1))); + m_alphas.coeffRef(i) = ComplexScalar(S2.coeff(1,1) + p, z); + m_alphas.coeffRef(i+1) = ComplexScalar(S2.coeff(1,1) + p, -z); + + m_betas.coeffRef(i) = + m_betas.coeffRef(i+1) = a*b; + i += 2; } } diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h index a62071d42..b3a910dd9 100644 --- a/Eigen/src/Eigenvalues/RealQZ.h +++ b/Eigen/src/Eigenvalues/RealQZ.h @@ -552,7 +552,6 @@ namespace Eigen { m_T.coeffRef(l,l-1) = Scalar(0.0); } - template RealQZ& RealQZ::compute(const MatrixType& A_in, const MatrixType& B_in, bool computeQZ) { @@ -616,6 +615,37 @@ namespace Eigen { } // check if we converged before reaching iterations limit m_info = (local_iter j_left, j_right; + internal::real_2x2_jacobi_svd(m_T, i, i+1, &j_left, &j_right); + + // Apply resulting Jacobi rotations + m_S.applyOnTheLeft(i,i+1,j_left); + m_S.applyOnTheRight(i,i+1,j_right); + m_T.applyOnTheLeft(i,i+1,j_left); + m_T.applyOnTheRight(i,i+1,j_right); + m_T(i+1,i) = m_T(i,i+1) = Scalar(0); + + if(m_computeQZ) { + m_Q.applyOnTheRight(i,i+1,j_left.transpose()); + m_Z.applyOnTheLeft(i,i+1,j_right.transpose()); + } + + i++; + } + } + } + return *this; } // end compute diff --git a/Eigen/src/Eigenvalues/Tridiagonalization.h b/Eigen/src/Eigenvalues/Tridiagonalization.h index 2030b5be1..1d102c17b 100644 --- a/Eigen/src/Eigenvalues/Tridiagonalization.h +++ b/Eigen/src/Eigenvalues/Tridiagonalization.h @@ -367,10 +367,10 @@ void tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs) hCoeffs.tail(n-i-1).noalias() = (matA.bottomRightCorner(remainingSize,remainingSize).template selfadjointView() * (conj(h) * matA.col(i).tail(remainingSize))); - hCoeffs.tail(n-i-1) += (conj(h)*Scalar(-0.5)*(hCoeffs.tail(remainingSize).dot(matA.col(i).tail(remainingSize)))) * matA.col(i).tail(n-i-1); + hCoeffs.tail(n-i-1) += (conj(h)*RealScalar(-0.5)*(hCoeffs.tail(remainingSize).dot(matA.col(i).tail(remainingSize)))) * matA.col(i).tail(n-i-1); matA.bottomRightCorner(remainingSize, remainingSize).template selfadjointView() - .rankUpdate(matA.col(i).tail(remainingSize), hCoeffs.tail(remainingSize), -1); + .rankUpdate(matA.col(i).tail(remainingSize), hCoeffs.tail(remainingSize), Scalar(-1)); matA.col(i).coeffRef(i+1) = beta; hCoeffs.coeffRef(i) = h; diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h index 03f1a11f8..d20d17492 100644 --- a/Eigen/src/Geometry/AlignedBox.h +++ b/Eigen/src/Geometry/AlignedBox.h @@ -36,8 +36,9 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) typedef NumTraits ScalarTraits; typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 typedef typename ScalarTraits::Real RealScalar; - typedef typename ScalarTraits::NonInteger NonInteger; + typedef typename ScalarTraits::NonInteger NonInteger; typedef Matrix VectorType; + typedef CwiseBinaryOp, const VectorType, const VectorType> VectorTypeSum; /** Define constants to name the corners of a 1D, 2D or 3D axis aligned bounding box */ enum CornerType @@ -111,16 +112,15 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) inline VectorType& (max)() { return m_max; } /** \returns the center of the box */ - inline const CwiseUnaryOp, - const CwiseBinaryOp, const VectorType, const VectorType> > + inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient) center() const - { return (m_min+m_max)/2; } + { return (m_min+m_max)/RealScalar(2); } /** \returns the lengths of the sides of the bounding box. * Note that this function does not get the same * result for integral or floating scalar types: see */ - inline const CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> sizes() const + inline const CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> sizes() const { return m_max - m_min; } /** \returns the volume of the bounding box */ @@ -131,7 +131,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) * if the length of the diagonal is needed: diagonal().norm() * will provide it. */ - inline CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> diagonal() const + inline CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> diagonal() const { return sizes(); } /** \returns the vertex of the bounding box at the corner defined by diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h index cd52b5470..1c35ca486 100644 --- a/Eigen/src/Geometry/Homogeneous.h +++ b/Eigen/src/Geometry/Homogeneous.h @@ -329,10 +329,10 @@ protected: // dense = homogeneous template< typename DstXprType, typename ArgType, typename Scalar> -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, internal::assign_op, Dense2Dense, Scalar> { typedef Homogeneous SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst.template topRows(src.nestedExpression().rows()) = src.nestedExpression(); dst.row(dst.rows()-1).setOnes(); @@ -341,10 +341,10 @@ struct Assignment, internal::assign_op // dense = homogeneous template< typename DstXprType, typename ArgType, typename Scalar> -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, internal::assign_op, Dense2Dense, Scalar> { typedef Homogeneous SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst.template leftCols(src.nestedExpression().cols()) = src.nestedExpression(); dst.col(dst.cols()-1).setOnes(); @@ -373,7 +373,7 @@ struct homogeneous_right_product_refactoring_helper typedef typename Rhs::ConstRowXpr ConstantColumn; typedef Replicate ConstantBlock; typedef Product LinearProduct; - typedef CwiseBinaryOp, const LinearProduct, const ConstantBlock> Xpr; + typedef CwiseBinaryOp, const LinearProduct, const ConstantBlock> Xpr; }; template @@ -414,7 +414,7 @@ struct homogeneous_left_product_refactoring_helper typedef typename Lhs::ConstColXpr ConstantColumn; typedef Replicate ConstantBlock; typedef Product LinearProduct; - typedef CwiseBinaryOp, const LinearProduct, const ConstantBlock> Xpr; + typedef CwiseBinaryOp, const LinearProduct, const ConstantBlock> Xpr; }; template diff --git a/Eigen/src/Geometry/Scaling.h b/Eigen/src/Geometry/Scaling.h index 643138199..3e12681b0 100644 --- a/Eigen/src/Geometry/Scaling.h +++ b/Eigen/src/Geometry/Scaling.h @@ -107,12 +107,15 @@ public: /** \addtogroup Geometry_Module */ //@{ -/** Concatenates a linear transformation matrix and a uniform scaling */ +/** Concatenates a linear transformation matrix and a uniform scaling + * \relates UniformScaling + */ // NOTE this operator is defiend in MatrixBase and not as a friend function // of UniformScaling to fix an internal crash of Intel's ICC -template typename MatrixBase::ScalarMultipleReturnType -MatrixBase::operator*(const UniformScaling& s) const -{ return derived() * s.factor(); } +template +EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,Scalar,product) +operator*(const MatrixBase& matrix, const UniformScaling& s) +{ return matrix.derived() * s.factor(); } /** Constructs a uniform scaling from scale factor \a s */ static inline UniformScaling Scaling(float s) { return UniformScaling(s); } diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index 4fc876bcf..073f4dcd1 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -1367,7 +1367,7 @@ struct transform_right_product_impl< TransformType, MatrixType, 2, 1> // rhs is EIGEN_STATIC_ASSERT(OtherRows==Dim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES); Matrix rhs; - rhs << other,1; + rhs.template head() = other; rhs[Dim] = typename ResultType::Scalar(1); Matrix res(T.matrix() * rhs); return res.template head(); } diff --git a/Eigen/src/Householder/HouseholderSequence.h b/Eigen/src/Householder/HouseholderSequence.h index a57f81764..3ce0a693d 100644 --- a/Eigen/src/Householder/HouseholderSequence.h +++ b/Eigen/src/Householder/HouseholderSequence.h @@ -108,7 +108,7 @@ struct hseq_side_dependent_impl template struct matrix_type_times_scalar_type { - typedef typename scalar_product_traits::ReturnType + typedef typename ScalarBinaryOpTraits::ReturnType ResultScalar; typedef Matrix Type; diff --git a/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h index 35923be3d..7d67d3ce2 100644 --- a/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +++ b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h @@ -91,10 +91,10 @@ protected: // Specialization for "dst = dec.solveWithGuess(rhs)" // NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere template -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, internal::assign_op, Dense2Dense, Scalar> { typedef SolveWithGuess SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we resize dst here? dst = src.guess(); diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index c39f8e3d5..2d01b18c6 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -839,12 +839,12 @@ namespace internal { /***** Implementation of inverse() *****************************************************/ -template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +template +struct Assignment >, internal::assign_op::Scalar>, Dense2Dense> { typedef FullPivLU LuType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } diff --git a/Eigen/src/LU/InverseImpl.h b/Eigen/src/LU/InverseImpl.h index e202a55cb..3134632e1 100644 --- a/Eigen/src/LU/InverseImpl.h +++ b/Eigen/src/LU/InverseImpl.h @@ -286,11 +286,11 @@ struct compute_inverse_and_det_with_check namespace internal { // Specialization for "dense = dense_xpr.inverse()" -template -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +template +struct Assignment, internal::assign_op, Dense2Dense> { typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we resize dst here? const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime); diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index b68916287..ac2902261 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -525,12 +525,12 @@ MatrixType PartialPivLU::reconstructedMatrix() const namespace internal { /***** Implementation of inverse() *****************************************************/ -template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +template +struct Assignment >, internal::assign_op::Scalar>, Dense2Dense> { typedef PartialPivLU LuType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } diff --git a/Eigen/src/PardisoSupport/PardisoSupport.h b/Eigen/src/PardisoSupport/PardisoSupport.h index 80d914f25..091c3970e 100644 --- a/Eigen/src/PardisoSupport/PardisoSupport.h +++ b/Eigen/src/PardisoSupport/PardisoSupport.h @@ -183,7 +183,7 @@ class PardisoImpl : public SparseSolverBase { if(m_isInitialized) // Factorization ran at least once { - internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, -1, m_size,0, 0, 0, m_perm.data(), 0, + internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, -1, internal::convert_index(m_size),0, 0, 0, m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); m_isInitialized = false; } @@ -194,11 +194,11 @@ class PardisoImpl : public SparseSolverBase m_type = type; bool symmetric = std::abs(m_type) < 10; m_iparm[0] = 1; // No solver default - m_iparm[1] = 3; // use Metis for the ordering - m_iparm[2] = 1; // Numbers of processors, value of OMP_NUM_THREADS + m_iparm[1] = 2; // use Metis for the ordering + m_iparm[2] = 0; // Reserved. Set to zero. (??Numbers of processors, value of OMP_NUM_THREADS??) m_iparm[3] = 0; // No iterative-direct algorithm m_iparm[4] = 0; // No user fill-in reducing permutation - m_iparm[5] = 0; // Write solution into x + m_iparm[5] = 0; // Write solution into x, b is left unchanged m_iparm[6] = 0; // Not in use m_iparm[7] = 2; // Max numbers of iterative refinement steps m_iparm[8] = 0; // Not in use @@ -219,7 +219,8 @@ class PardisoImpl : public SparseSolverBase m_iparm[26] = 0; // No matrix checker m_iparm[27] = (sizeof(RealScalar) == 4) ? 1 : 0; m_iparm[34] = 1; // C indexing - m_iparm[59] = 1; // Automatic switch between In-Core and Out-of-Core modes + m_iparm[36] = 0; // CSR + m_iparm[59] = 0; // 0 - In-Core ; 1 - Automatic switch between In-Core and Out-of-Core modes ; 2 - Out-of-Core memset(m_pt, 0, sizeof(m_pt)); } @@ -246,7 +247,7 @@ class PardisoImpl : public SparseSolverBase mutable SparseMatrixType m_matrix; mutable ComputationInfo m_info; bool m_analysisIsOk, m_factorizationIsOk; - Index m_type, m_msglvl; + StorageIndex m_type, m_msglvl; mutable void *m_pt[64]; mutable ParameterType m_iparm; mutable IntColVectorType m_perm; @@ -265,10 +266,9 @@ Derived& PardisoImpl::compute(const MatrixType& a) derived().getMatrix(a); Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 12, m_size, + error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 12, internal::convert_index(m_size), m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); - manageErrorCode(error); m_analysisIsOk = true; m_factorizationIsOk = true; @@ -287,7 +287,7 @@ Derived& PardisoImpl::analyzePattern(const MatrixType& a) derived().getMatrix(a); Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 11, m_size, + error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 11, internal::convert_index(m_size), m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); @@ -306,8 +306,8 @@ Derived& PardisoImpl::factorize(const MatrixType& a) derived().getMatrix(a); - Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 22, m_size, + Index error; + error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 22, internal::convert_index(m_size), m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); @@ -354,9 +354,9 @@ void PardisoImpl::_solve_impl(const MatrixBase &b, MatrixBase } Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 33, m_size, + error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 33, internal::convert_index(m_size), m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), - m_perm.data(), nrhs, m_iparm.data(), m_msglvl, + m_perm.data(), internal::convert_index(nrhs), m_iparm.data(), m_msglvl, rhs_ptr, x.derived().data()); manageErrorCode(error); @@ -371,6 +371,9 @@ void PardisoImpl::_solve_impl(const MatrixBase &b, MatrixBase * using the Intel MKL PARDISO library. The sparse matrix A must be squared and invertible. * The vectors or matrices X and B can be either dense or sparse. * + * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set: + * \code solver.pardisoParameterArray()[59] = 1; \endcode + * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * * \implsparsesolverconcept @@ -421,6 +424,9 @@ class PardisoLU : public PardisoImpl< PardisoLU > * using the Intel MKL PARDISO library. The sparse matrix A must be selfajoint and positive definite. * The vectors or matrices X and B can be either dense or sparse. * + * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set: + * \code solver.pardisoParameterArray()[59] = 1; \endcode + * * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam UpLo can be any bitwise combination of Upper, Lower. The default is Upper, meaning only the upper triangular part has to be used. * Upper|Lower can be used to tell both triangular parts can be used as input. @@ -480,6 +486,9 @@ class PardisoLLT : public PardisoImpl< PardisoLLT > * For complex matrices, A can also be symmetric only, see the \a Options template parameter. * The vectors or matrices X and B can be either dense or sparse. * + * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set: + * \code solver.pardisoParameterArray()[59] = 1; \endcode + * * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam Options can be any bitwise combination of Upper, Lower, and Symmetric. The default is Upper, meaning only the upper triangular part has to be used. * Symmetric can be used for symmetric, non-selfadjoint complex matrices, the default being to assume a selfadjoint matrix. diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h index 7c559f952..525ee8c18 100644 --- a/Eigen/src/QR/ColPivHouseholderQR.h +++ b/Eigen/src/QR/ColPivHouseholderQR.h @@ -598,11 +598,11 @@ void ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType & namespace internal { template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +struct Assignment >, internal::assign_op, Dense2Dense, Scalar> { typedef ColPivHouseholderQR QrType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } diff --git a/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/Eigen/src/QR/CompleteOrthogonalDecomposition.h index 230d0d23c..52bcc2173 100644 --- a/Eigen/src/QR/CompleteOrthogonalDecomposition.h +++ b/Eigen/src/QR/CompleteOrthogonalDecomposition.h @@ -510,11 +510,11 @@ void CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl( namespace internal { template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +struct Assignment >, internal::assign_op, Dense2Dense, Scalar> { typedef CompleteOrthogonalDecomposition CodType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.rows())); } diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h index 32a10f3fe..4f55d52a5 100644 --- a/Eigen/src/QR/FullPivHouseholderQR.h +++ b/Eigen/src/QR/FullPivHouseholderQR.h @@ -560,11 +560,11 @@ void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType namespace internal { template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +struct Assignment >, internal::assign_op, Dense2Dense, Scalar> { typedef FullPivHouseholderQR QrType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index 1940c8294..b83fd7a4d 100644 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h @@ -419,38 +419,6 @@ struct svd_precondition_2x2_block_to_be_real } }; -template -void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q, - JacobiRotation *j_left, - JacobiRotation *j_right) -{ - using std::sqrt; - using std::abs; - Matrix m; - m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)), - numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q)); - JacobiRotation rot1; - RealScalar t = m.coeff(0,0) + m.coeff(1,1); - RealScalar d = m.coeff(1,0) - m.coeff(0,1); - if(d == RealScalar(0)) - { - rot1.s() = RealScalar(0); - rot1.c() = RealScalar(1); - } - else - { - // If d!=0, then t/d cannot overflow because the magnitude of the - // entries forming d are not too small compared to the ones forming t. - RealScalar u = t / d; - RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u)); - rot1.s() = RealScalar(1) / tmp; - rot1.c() = u / tmp; - } - m.applyOnTheLeft(0,1,rot1); - j_right->makeJacobi(m,0,1); - *j_left = rot1 * j_right->transpose(); -} - template struct traits > { diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h index 4a8dd12e4..b284fa9e4 100644 --- a/Eigen/src/SparseCore/SparseAssign.h +++ b/Eigen/src/SparseCore/SparseAssign.h @@ -34,8 +34,8 @@ template inline Derived& SparseMatrixBase::operator=(const SparseMatrixBase& other) { // by default sparse evaluation do not alias, so we can safely bypass the generic call_assignment routine - internal::Assignment > - ::run(derived(), other.derived(), internal::assign_op()); + internal::Assignment > + ::run(derived(), other.derived(), internal::assign_op()); return derived(); } @@ -127,7 +127,7 @@ void assign_sparse_to_sparse(DstXprType &dst, const SrcXprType &src) template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> struct Assignment { - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { assign_sparse_to_sparse(dst.derived(), src.derived()); } @@ -141,7 +141,7 @@ struct Assignment { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); - if(internal::is_same >::value) + if(internal::is_same >::value) dst.setZero(); internal::evaluator srcEval(src); @@ -156,10 +156,10 @@ struct Assignment // Specialization for "dst = dec.solve(rhs)" // NOTE we need to specialize it for Sparse2Sparse to avoid ambiguous specialization error template -struct Assignment, internal::assign_op, Sparse2Sparse, Scalar> +struct Assignment, internal::assign_op, Sparse2Sparse, Scalar> { typedef Solve SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { src.dec()._solve_impl(src.rhs(), dst); } @@ -176,7 +176,7 @@ struct Assignment typedef Array ArrayXI; typedef Array ArrayXS; template - static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { Index size = src.diagonal().size(); dst.makeCompressed(); @@ -187,15 +187,15 @@ struct Assignment } template - static void run(SparseMatrixBase &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(SparseMatrixBase &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { dst.diagonal() = src.diagonal(); } - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) { dst.diagonal() += src.diagonal(); } - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) { dst.diagonal() -= src.diagonal(); } }; } // end namespace internal diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index d422f3cbe..aad7b7d79 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -28,6 +28,9 @@ namespace Eigen { // generic sparse // 4 - dense op dense product dense // generic dense +// +// TODO to ease compiler job, we could specialize product/quotient with a scalar +// and fallback to cwise-unary evaluator using bind1st_op and bind2nd_op. template class CwiseBinaryOpImpl @@ -323,12 +326,12 @@ protected: }; // "sparse .* sparse" -template -struct binary_evaluator, Lhs, Rhs>, IteratorBased, IteratorBased> - : evaluator_base, Lhs, Rhs> > +template +struct binary_evaluator, Lhs, Rhs>, IteratorBased, IteratorBased> + : evaluator_base, Lhs, Rhs> > { protected: - typedef scalar_product_op BinaryOp; + typedef scalar_product_op BinaryOp; typedef typename evaluator::InnerIterator LhsIterator; typedef typename evaluator::InnerIterator RhsIterator; typedef CwiseBinaryOp XprType; @@ -407,12 +410,12 @@ protected: }; // "dense .* sparse" -template -struct binary_evaluator, Lhs, Rhs>, IndexBased, IteratorBased> - : evaluator_base, Lhs, Rhs> > +template +struct binary_evaluator, Lhs, Rhs>, IndexBased, IteratorBased> + : evaluator_base, Lhs, Rhs> > { protected: - typedef scalar_product_op BinaryOp; + typedef scalar_product_op BinaryOp; typedef evaluator LhsEvaluator; typedef typename evaluator::InnerIterator RhsIterator; typedef CwiseBinaryOp XprType; @@ -480,12 +483,12 @@ protected: }; // "sparse .* dense" -template -struct binary_evaluator, Lhs, Rhs>, IteratorBased, IndexBased> - : evaluator_base, Lhs, Rhs> > +template +struct binary_evaluator, Lhs, Rhs>, IteratorBased, IndexBased> + : evaluator_base, Lhs, Rhs> > { protected: - typedef scalar_product_op BinaryOp; + typedef scalar_product_op BinaryOp; typedef typename evaluator::InnerIterator LhsIterator; typedef evaluator RhsEvaluator; typedef CwiseBinaryOp XprType; @@ -579,7 +582,7 @@ template template Derived& SparseMatrixBase::operator+=(const DiagonalBase& other) { - call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); + call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); return derived(); } @@ -587,7 +590,7 @@ template template Derived& SparseMatrixBase::operator-=(const DiagonalBase& other) { - call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); + call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -600,31 +603,31 @@ SparseMatrixBase::cwiseProduct(const MatrixBase &other) c } template -EIGEN_STRONG_INLINE const CwiseBinaryOp, const DenseDerived, const SparseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const DenseDerived, const SparseDerived> operator+(const MatrixBase &a, const SparseMatrixBase &b) { - return CwiseBinaryOp, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); + return CwiseBinaryOp, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); } template -EIGEN_STRONG_INLINE const CwiseBinaryOp, const SparseDerived, const DenseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const SparseDerived, const DenseDerived> operator+(const SparseMatrixBase &a, const MatrixBase &b) { - return CwiseBinaryOp, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); + return CwiseBinaryOp, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); } template -EIGEN_STRONG_INLINE const CwiseBinaryOp, const DenseDerived, const SparseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const DenseDerived, const SparseDerived> operator-(const MatrixBase &a, const SparseMatrixBase &b) { - return CwiseBinaryOp, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); + return CwiseBinaryOp, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); } template -EIGEN_STRONG_INLINE const CwiseBinaryOp, const SparseDerived, const DenseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const SparseDerived, const DenseDerived> operator-(const SparseMatrixBase &a, const MatrixBase &b) { - return CwiseBinaryOp, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); + return CwiseBinaryOp, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); } } // end namespace Eigen diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h index 476796dd7..0547db596 100644 --- a/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/Eigen/src/SparseCore/SparseDenseProduct.h @@ -74,7 +74,7 @@ struct sparse_time_dense_product_impl let's disable it for now as it is conflicting with generic scalar*matrix and matrix*scalar operators // template -// struct scalar_product_traits > +// struct ScalarBinaryOpTraits > // { // enum { // Defined = 1 @@ -97,7 +97,7 @@ struct sparse_time_dense_product_impl::ReturnType rhs_j(alpha * rhs.coeff(j,c)); + typename ScalarBinaryOpTraits::ReturnType rhs_j(alpha * rhs.coeff(j,c)); for(LhsInnerIterator it(lhsEval,j); it ;++it) res.coeffRef(it.index(),c) += it.value() * rhs_j; } diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index a78bd57c3..531fea399 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -440,7 +440,7 @@ class SparseMatrix template void setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func); - void sumupDuplicates() { collapseDuplicates(internal::scalar_sum_op()); } + void sumupDuplicates() { collapseDuplicates(internal::scalar_sum_op()); } template void collapseDuplicates(DupFunctor dup_func = DupFunctor()); @@ -979,7 +979,7 @@ template template void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end) { - internal::set_from_triplets >(begin, end, *this, internal::scalar_sum_op()); + internal::set_from_triplets >(begin, end, *this, internal::scalar_sum_op()); } /** The same as setFromTriplets but when duplicates are met the functor \a dup_func is applied: diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index 24df36884..45f64e7f2 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -256,7 +256,7 @@ template class SparseMatrixBase Derived& operator/=(const Scalar& other); template struct CwiseProductDenseReturnType { - typedef CwiseBinaryOp::Scalar, typename internal::traits::Scalar >::ReturnType>, diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h index b23003bb1..84e69903b 100644 --- a/Eigen/src/SparseCore/SparseProduct.h +++ b/Eigen/src/SparseCore/SparseProduct.h @@ -99,10 +99,10 @@ struct generic_product_impl -struct Assignment, internal::assign_op, Sparse2Dense> +struct Assignment, internal::assign_op::Scalar>, Sparse2Dense> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { generic_product_impl::evalTo(dst,src.lhs(),src.rhs()); } @@ -110,10 +110,10 @@ struct Assignment, internal::assig // dense += sparse-product (can be sparse*sparse, sparse*perm, etc.) template< typename DstXprType, typename Lhs, typename Rhs> -struct Assignment, internal::add_assign_op, Sparse2Dense> +struct Assignment, internal::add_assign_op::Scalar>, Sparse2Dense> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { generic_product_impl::addTo(dst,src.lhs(),src.rhs()); } @@ -121,10 +121,10 @@ struct Assignment, internal::add_a // dense -= sparse-product (can be sparse*sparse, sparse*perm, etc.) template< typename DstXprType, typename Lhs, typename Rhs> -struct Assignment, internal::sub_assign_op, Sparse2Dense> +struct Assignment, internal::sub_assign_op::Scalar>, Sparse2Dense> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { generic_product_impl::subTo(dst,src.lhs(),src.rhs()); } diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h index b92bb17e2..4f0c84d88 100644 --- a/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -223,13 +223,13 @@ struct Assignment - static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { internal::permute_symm_to_fullsymm(src.matrix(), dst); } template - static void run(DynamicSparseMatrix& dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DynamicSparseMatrix& dst, const SrcXprType &src, const internal::assign_op &/*func*/) { // TODO directly evaluate into dst; SparseMatrix tmp(dst.rows(),dst.cols()); @@ -586,12 +586,12 @@ class SparseSymmetricPermutationProduct namespace internal { template -struct Assignment, internal::assign_op, Sparse2Sparse> +struct Assignment, internal::assign_op, Sparse2Sparse> { typedef SparseSymmetricPermutationProduct SrcXprType; typedef typename DstXprType::StorageIndex DstIndex; template - static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &) + static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &) { // internal::permute_symm_to_fullsymm(m_matrix,_dest,m_perm.indices().data()); SparseMatrix tmp; @@ -600,7 +600,7 @@ struct Assignment } template - static void run(SparseSelfAdjointView& dst, const SrcXprType &src, const internal::assign_op &) + static void run(SparseSelfAdjointView& dst, const SrcXprType &src, const internal::assign_op &) { internal::permute_symm_to_symm(src.matrix(),dst.matrix(),src.perm().indices().data()); } diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h index acd7f7e10..2d4498b03 100644 --- a/Eigen/src/SparseQR/SparseQR.h +++ b/Eigen/src/SparseQR/SparseQR.h @@ -705,12 +705,12 @@ struct evaluator_traits > }; template< typename DstXprType, typename SparseQRType> -struct Assignment, internal::assign_op, Sparse2Sparse> +struct Assignment, internal::assign_op, Sparse2Sparse> { typedef SparseQRMatrixQReturnType SrcXprType; typedef typename DstXprType::Scalar Scalar; typedef typename DstXprType::StorageIndex StorageIndex; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { typename DstXprType::PlainObject idMat(src.m_qr.rows(), src.m_qr.rows()); idMat.setIdentity(); @@ -721,12 +721,12 @@ struct Assignment, internal: }; template< typename DstXprType, typename SparseQRType> -struct Assignment, internal::assign_op, Sparse2Dense> +struct Assignment, internal::assign_op, Sparse2Dense> { typedef SparseQRMatrixQReturnType SrcXprType; typedef typename DstXprType::Scalar Scalar; typedef typename DstXprType::StorageIndex StorageIndex; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { dst = src.m_qr.matrixQ() * DstXprType::Identity(src.m_qr.rows(), src.m_qr.rows()); } diff --git a/Eigen/src/misc/RealSvd2x2.h b/Eigen/src/misc/RealSvd2x2.h new file mode 100644 index 000000000..dfaaa0b17 --- /dev/null +++ b/Eigen/src/misc/RealSvd2x2.h @@ -0,0 +1,54 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Benoit Jacob +// Copyright (C) 2013-2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REALSVD2X2_H +#define EIGEN_REALSVD2X2_H + +namespace Eigen { + +namespace internal { + +template +void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q, + JacobiRotation *j_left, + JacobiRotation *j_right) +{ + using std::sqrt; + using std::abs; + Matrix m; + m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)), + numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q)); + JacobiRotation rot1; + RealScalar t = m.coeff(0,0) + m.coeff(1,1); + RealScalar d = m.coeff(1,0) - m.coeff(0,1); + if(d == RealScalar(0)) + { + rot1.s() = RealScalar(0); + rot1.c() = RealScalar(1); + } + else + { + // If d!=0, then t/d cannot overflow because the magnitude of the + // entries forming d are not too small compared to the ones forming t. + RealScalar u = t / d; + RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u)); + rot1.s() = RealScalar(1) / tmp; + rot1.c() = u / tmp; + } + m.applyOnTheLeft(0,1,rot1); + j_right->makeJacobi(m,0,1); + *j_left = rot1 * j_right->transpose(); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_REALSVD2X2_H diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index c3f8c2575..19e25ab62 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -1,13 +1,14 @@ + /** \returns an expression of the coefficient wise product of \c *this and \a other * * \sa MatrixBase::cwiseProduct */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived) +EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product) operator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived)(derived(), other.derived()); + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived()); } /** \returns an expression of the coefficient wise quotient of \c *this and \a other @@ -16,10 +17,10 @@ operator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> operator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); + return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } /** \returns an expression of the coefficient-wise min of \c *this and \a other @@ -29,14 +30,14 @@ operator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const * * \sa max() */ -EIGEN_MAKE_CWISE_BINARY_OP(min,internal::scalar_min_op) +EIGEN_MAKE_CWISE_BINARY_OP(min,min) /** \returns an expression of the coefficient-wise min of \c *this and scalar \a other * * \sa max() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const CwiseNullaryOp, PlainObject> > #ifdef EIGEN_PARSED_BY_DOXYGEN min @@ -55,14 +56,14 @@ min * * \sa min() */ -EIGEN_MAKE_CWISE_BINARY_OP(max,internal::scalar_max_op) +EIGEN_MAKE_CWISE_BINARY_OP(max,max) /** \returns an expression of the coefficient-wise max of \c *this and scalar \a other * * \sa min() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const CwiseNullaryOp, PlainObject> > #ifdef EIGEN_PARSED_BY_DOXYGEN max @@ -81,27 +82,38 @@ max * Example: \include Cwise_array_power_array.cpp * Output: \verbinclude Cwise_array_power_array.out */ -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -const CwiseBinaryOp, const Derived, const ExponentDerived> -pow(const ArrayBase& exponents) const -{ - return CwiseBinaryOp, const Derived, const ExponentDerived>( - this->derived(), - exponents.derived() - ); -} +EIGEN_MAKE_CWISE_BINARY_OP(pow,pow) + +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow) +#else +/** \returns an expression of the coefficients of \c *this rasied to the constant power \a exponent + * + * \tparam T is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression. + * + * This function computes the coefficient-wise power. The function MatrixBase::pow() in the + * unsupported module MatrixFunctions computes the matrix power. + * + * Example: \include Cwise_pow.cpp + * Output: \verbinclude Cwise_pow.out + * + * \sa ArrayBase::pow(ArrayBase), square(), cube(), exp(), log() + */ +template +const CwiseBinaryOp,Derived,Constant > pow(const T& exponent) const; +#endif + // TODO code generating macros could be moved to Macros.h and could include generation of documentation #define EIGEN_MAKE_CWISE_COMP_OP(OP, COMPARATOR) \ template \ -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> \ OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ { \ - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); \ + return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); \ }\ -typedef CwiseBinaryOp, const Derived, const CwiseNullaryOp, PlainObject> > Cmp ## COMPARATOR ## ReturnType; \ -typedef CwiseBinaryOp, const CwiseNullaryOp, PlainObject>, const Derived > RCmp ## COMPARATOR ## ReturnType; \ +typedef CwiseBinaryOp, const Derived, const CwiseNullaryOp, PlainObject> > Cmp ## COMPARATOR ## ReturnType; \ +typedef CwiseBinaryOp, const CwiseNullaryOp, PlainObject>, const Derived > RCmp ## COMPARATOR ## ReturnType; \ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Cmp ## COMPARATOR ## ReturnType \ OP(const Scalar& s) const { \ return this->OP(Derived::PlainObject::Constant(rows(), cols(), s)); \ @@ -113,10 +125,10 @@ OP(const Scalar& s, const Derived& d) { \ #define EIGEN_MAKE_CWISE_COMP_R_OP(OP, R_OP, RCOMPARATOR) \ template \ -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const OtherDerived, const Derived> \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const OtherDerived, const Derived> \ OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ { \ - return CwiseBinaryOp, const OtherDerived, const Derived>(other.derived(), derived()); \ + return CwiseBinaryOp, const OtherDerived, const Derived>(other.derived(), derived()); \ } \ EIGEN_DEVICE_FUNC \ inline const RCmp ## RCOMPARATOR ## ReturnType \ @@ -199,48 +211,63 @@ EIGEN_MAKE_CWISE_COMP_OP(operator!=, NEQ) #undef EIGEN_MAKE_CWISE_COMP_R_OP // scalar addition - +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP(operator+,sum) +#else /** \returns an expression of \c *this with each coeff incremented by the constant \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. * * Example: \include Cwise_plus.cpp * Output: \verbinclude Cwise_plus.out * * \sa operator+=(), operator-() */ -EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp, const Derived> -operator+(const Scalar& scalar) const -{ - return CwiseUnaryOp, const Derived>(derived(), internal::scalar_add_op(scalar)); -} - -EIGEN_DEVICE_FUNC -friend inline const CwiseUnaryOp, const Derived> -operator+(const Scalar& scalar,const EIGEN_CURRENT_STORAGE_BASE_CLASS& other) -{ - return other + scalar; -} +template +const CwiseBinaryOp,Derived,Constant > operator+(const T& scalar) const; +/** \returns an expression of \a expr with each coeff incremented by the constant \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template friend +const CwiseBinaryOp,Constant,Derived> operator+(const T& scalar, const StorageBaseType& expr); +#endif +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP(operator-,difference) +#else /** \returns an expression of \c *this with each coeff decremented by the constant \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. * * Example: \include Cwise_minus.cpp * Output: \verbinclude Cwise_minus.out * - * \sa operator+(), operator-=() + * \sa operator+=(), operator-() */ -EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp, const Derived> -operator-(const Scalar& scalar) const -{ - return CwiseUnaryOp, const Derived>(derived(), internal::scalar_sub_op(scalar));; -} +template +const CwiseBinaryOp,Derived,Constant > operator-(const T& scalar) const; +/** \returns an expression of the constant matrix of value \a scalar decremented by the coefficients of \a expr + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template friend +const CwiseBinaryOp,Constant,Derived> operator-(const T& scalar, const StorageBaseType& expr); +#endif -EIGEN_DEVICE_FUNC -friend inline const CwiseUnaryOp, const Derived> -operator-(const Scalar& scalar,const EIGEN_CURRENT_STORAGE_BASE_CLASS& other) -{ - return CwiseUnaryOp, const Derived>(other.derived(), internal::scalar_rsub_op(scalar));; -} + +#ifndef EIGEN_PARSED_BY_DOXYGEN + EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(operator/,quotient) +#else + /** + * \brief Component-wise division of the scalar \a s by array elements of \a a. + * + * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar). + */ + template friend + inline const CwiseBinaryOp,Constant,Derived> + operator/(const T& s,const StorageBaseType& a); +#endif /** \returns an expression of the coefficient-wise && operator of *this and \a other * diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 775fa6ee0..9e42bb540 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -26,7 +26,6 @@ typedef CwiseUnaryOp, const Derived> LgammaRe typedef CwiseUnaryOp, const Derived> DigammaReturnType; typedef CwiseUnaryOp, const Derived> ErfReturnType; typedef CwiseUnaryOp, const Derived> ErfcReturnType; -typedef CwiseUnaryOp, const Derived> PowReturnType; typedef CwiseUnaryOp, const Derived> SquareReturnType; typedef CwiseUnaryOp, const Derived> CubeReturnType; typedef CwiseUnaryOp, const Derived> RoundReturnType; @@ -248,6 +247,7 @@ tan() const * * \sa tan(), asin(), acos() */ +EIGEN_DEVICE_FUNC inline const AtanReturnType atan() const { @@ -289,6 +289,7 @@ asin() const * * \sa tan(), sinh(), cosh() */ +EIGEN_DEVICE_FUNC inline const TanhReturnType tanh() const { @@ -302,6 +303,7 @@ tanh() const * * \sa sin(), tanh(), cosh() */ +EIGEN_DEVICE_FUNC inline const SinhReturnType sinh() const { @@ -315,6 +317,7 @@ sinh() const * * \sa tan(), sinh(), cosh() */ +EIGEN_DEVICE_FUNC inline const CoshReturnType cosh() const { @@ -332,6 +335,7 @@ cosh() const * * \sa digamma() */ +EIGEN_DEVICE_FUNC inline const LgammaReturnType lgamma() const { @@ -346,6 +350,7 @@ lgamma() const * * \sa Eigen::digamma(), Eigen::polygamma(), lgamma() */ +EIGEN_DEVICE_FUNC inline const DigammaReturnType digamma() const { @@ -364,6 +369,7 @@ digamma() const * * \sa erfc() */ +EIGEN_DEVICE_FUNC inline const ErfReturnType erf() const { @@ -382,30 +388,13 @@ erf() const * * \sa erf() */ +EIGEN_DEVICE_FUNC inline const ErfcReturnType erfc() const { return ErfcReturnType(derived()); } -/** \returns an expression of the coefficient-wise power of *this to the given exponent. - * - * This function computes the coefficient-wise power. The function MatrixBase::pow() in the - * unsupported module MatrixFunctions computes the matrix power. - * - * Example: \include Cwise_pow.cpp - * Output: \verbinclude Cwise_pow.out - * - * \sa exp(), log() - */ -EIGEN_DEVICE_FUNC -inline const PowReturnType -pow(const Scalar& exponent) const -{ - return PowReturnType(derived(), internal::scalar_pow_op(exponent)); -} - - /** \returns an expression of the coefficient-wise inverse of *this. * * Example: \include Cwise_inverse.cpp @@ -455,6 +444,7 @@ cube() const * * \sa ceil(), floor() */ +EIGEN_DEVICE_FUNC inline const RoundReturnType round() const { @@ -468,6 +458,7 @@ round() const * * \sa ceil(), round() */ +EIGEN_DEVICE_FUNC inline const FloorReturnType floor() const { @@ -481,6 +472,7 @@ floor() const * * \sa floor(), round() */ +EIGEN_DEVICE_FUNC inline const CeilReturnType ceil() const { @@ -494,6 +486,7 @@ ceil() const * * \sa isfinite(), isinf() */ +EIGEN_DEVICE_FUNC inline const IsNaNReturnType isNaN() const { @@ -507,6 +500,7 @@ isNaN() const * * \sa isnan(), isfinite() */ +EIGEN_DEVICE_FUNC inline const IsInfReturnType isInf() const { @@ -520,6 +514,7 @@ isInf() const * * \sa isnan(), isinf() */ +EIGEN_DEVICE_FUNC inline const IsFiniteReturnType isFinite() const { diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h index a8fa287c9..b51ee9e4c 100644 --- a/Eigen/src/plugins/CommonCwiseBinaryOps.h +++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2008-2016 Gael Guennebaud // Copyright (C) 2006-2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -16,7 +16,7 @@ * * \sa class CwiseBinaryOp, operator-=() */ -EIGEN_MAKE_CWISE_BINARY_OP(operator-,internal::scalar_difference_op) +EIGEN_MAKE_CWISE_BINARY_OP(operator-,difference) /** \returns an expression of the sum of \c *this and \a other * @@ -24,7 +24,7 @@ EIGEN_MAKE_CWISE_BINARY_OP(operator-,internal::scalar_difference_op) * * \sa class CwiseBinaryOp, operator+=() */ -EIGEN_MAKE_CWISE_BINARY_OP(operator+,internal::scalar_sum_op) +EIGEN_MAKE_CWISE_BINARY_OP(operator+,sum) /** \returns an expression of a custom coefficient-wise operator \a func of *this and \a other * @@ -45,3 +45,33 @@ binaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other, const Cu return CwiseBinaryOp(derived(), other.derived(), func); } + +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product) +#else +/** \returns an expression of \c *this scaled by the scalar factor \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template +const CwiseBinaryOp,Derived,Constant > operator*(const T& scalar) const; +/** \returns an expression of \a expr scaled by the scalar factor \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template friend +const CwiseBinaryOp,Constant,Derived> operator*(const T& scalar, const StorageBaseType& expr); +#endif + + + +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient) +#else +/** \returns an expression of \c *this divided by the scalar value \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template +const CwiseBinaryOp,Derived,Constant > operator/(const T& scalar) const; +#endif diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.h b/Eigen/src/plugins/CommonCwiseUnaryOps.h index 67ec601b9..6cd5479a0 100644 --- a/Eigen/src/plugins/CommonCwiseUnaryOps.h +++ b/Eigen/src/plugins/CommonCwiseUnaryOps.h @@ -12,12 +12,6 @@ #ifndef EIGEN_PARSED_BY_DOXYGEN -/** \internal Represents a scalar multiple of an expression */ -typedef CwiseUnaryOp, const Derived> ScalarMultipleReturnType; -typedef CwiseUnaryOp >, const Derived> ScalarComplexMultipleReturnType; - -/** \internal Represents a quotient of an expression by a scalar*/ -typedef CwiseUnaryOp, const Derived> ScalarQuotient1ReturnType; /** \internal the return type of conjugate() */ typedef typename internal::conditional::IsComplex, const CwiseUnaryOp, const Derived>, @@ -39,7 +33,6 @@ typedef CwiseUnaryOp, const Derived> ImagReturn typedef CwiseUnaryView, Derived> NonConstImagReturnType; typedef CwiseUnaryOp, const Derived> NegativeReturnType; -//typedef CwiseUnaryOp, const Derived> #endif // not EIGEN_PARSED_BY_DOXYGEN @@ -50,71 +43,6 @@ inline const NegativeReturnType operator-() const { return NegativeReturnType(derived()); } -/** \returns an expression of \c *this scaled by the scalar factor \a scalar */ -EIGEN_DEVICE_FUNC -inline const ScalarMultipleReturnType -operator*(const Scalar& scalar) const -{ - return ScalarMultipleReturnType(derived(), internal::scalar_multiple_op(scalar)); -} - -#ifdef EIGEN_PARSED_BY_DOXYGEN -const ScalarMultipleReturnType operator*(const RealScalar& scalar) const; -#endif - -/** \returns an expression of \c *this divided by the scalar value \a scalar */ -EIGEN_DEVICE_FUNC -inline const ScalarQuotient1ReturnType -operator/(const Scalar& scalar) const -{ - return ScalarQuotient1ReturnType(derived(), internal::scalar_quotient1_op(scalar)); -} - -/** Overloaded for efficiently multipling with compatible scalar types */ -template -EIGEN_DEVICE_FUNC inline -typename internal::enable_if::Defined, - const CwiseUnaryOp, const Derived> >::type -operator*(const T& scalar) const -{ -#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN - EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN -#endif - return CwiseUnaryOp, const Derived>( - derived(), internal::scalar_multiple2_op(scalar) ); -} - -EIGEN_DEVICE_FUNC -inline friend const ScalarMultipleReturnType -operator*(const Scalar& scalar, const StorageBaseType& matrix) -{ return matrix*scalar; } - -template -EIGEN_DEVICE_FUNC inline friend -typename internal::enable_if::Defined, - const CwiseUnaryOp, const Derived> >::type -operator*(const T& scalar, const StorageBaseType& matrix) -{ -#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN - EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN -#endif - return CwiseUnaryOp, const Derived>( - matrix.derived(), internal::scalar_multiple2_op(scalar) ); -} - -template -EIGEN_DEVICE_FUNC inline -typename internal::enable_if::Defined, - const CwiseUnaryOp, const Derived> >::type -operator/(const T& scalar) const -{ -#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN - EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN -#endif - return CwiseUnaryOp, const Derived>( - derived(), internal::scalar_quotient2_op(scalar) ); -} - template struct CastXpr { typedef typename internal::cast_return_type, const Derived> >::type Type; }; /** \returns an expression of *this with the \a Scalar type casted to diff --git a/Eigen/src/plugins/MatrixCwiseBinaryOps.h b/Eigen/src/plugins/MatrixCwiseBinaryOps.h index 6dd2e1192..f1084abef 100644 --- a/Eigen/src/plugins/MatrixCwiseBinaryOps.h +++ b/Eigen/src/plugins/MatrixCwiseBinaryOps.h @@ -19,10 +19,10 @@ */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived) +EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product) cwiseProduct(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived)(derived(), other.derived()); + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived()); } /** \returns an expression of the coefficient-wise == operator of *this and \a other @@ -74,10 +74,10 @@ cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); + return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } /** \returns an expression of the coefficient-wise min of *this and scalar \a other @@ -85,7 +85,7 @@ cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const * \sa class CwiseBinaryOp, min() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> cwiseMin(const Scalar &other) const { return cwiseMin(Derived::Constant(rows(), cols(), other)); @@ -100,10 +100,10 @@ cwiseMin(const Scalar &other) const */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); + return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } /** \returns an expression of the coefficient-wise max of *this and scalar \a other @@ -111,7 +111,7 @@ cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const * \sa class CwiseBinaryOp, min() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> cwiseMax(const Scalar &other) const { return cwiseMax(Derived::Constant(rows(), cols(), other)); @@ -133,7 +133,7 @@ cwiseQuotient(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } -typedef CwiseBinaryOp, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType; +typedef CwiseBinaryOp, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType; /** \returns an expression of the coefficient-wise == operator of \c *this and a scalar \a s * @@ -148,5 +148,5 @@ EIGEN_DEVICE_FUNC inline const CwiseScalarEqualReturnType cwiseEqual(const Scalar& s) const { - return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op()); + return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op()); } diff --git a/bench/perf_monitoring/gemm/changesets.txt b/bench/perf_monitoring/gemm/changesets.txt index fb3e48e99..d00b4603a 100644 --- a/bench/perf_monitoring/gemm/changesets.txt +++ b/bench/perf_monitoring/gemm/changesets.txt @@ -44,4 +44,8 @@ before-evaluators 7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5) 7591:09a8e2186610 # 3.3-alpha1 7650:b0f3c8f43025 # help clang inlining +8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs) +8789:efcb912e4356 # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes +8972:81d53c711775 # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path +8985:d935df21a082 # Remove the rotating kernel. diff --git a/blas/PackedTriangularMatrixVector.h b/blas/PackedTriangularMatrixVector.h index e9886d56f..0039536a8 100644 --- a/blas/PackedTriangularMatrixVector.h +++ b/blas/PackedTriangularMatrixVector.h @@ -18,7 +18,7 @@ struct packed_triangular_matrix_vector_product; template struct packed_triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = (Mode & Lower) ==Lower, HasUnitDiag = (Mode & UnitDiag)==UnitDiag, @@ -47,7 +47,7 @@ struct packed_triangular_matrix_vector_product struct packed_triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = (Mode & Lower) ==Lower, HasUnitDiag = (Mode & UnitDiag)==UnitDiag, diff --git a/doc/CustomizingEigen.dox b/doc/CustomizingEigen.dox index cb25f4ec9..607f86658 100644 --- a/doc/CustomizingEigen.dox +++ b/doc/CustomizingEigen.dox @@ -56,13 +56,13 @@ void makeFloor(const MatrixBase& other) { derived() = derived().cw template void makeCeil(const MatrixBase& other) { derived() = derived().cwiseMax(other.derived()); } -const CwiseUnaryOp, Derived> +const CwiseBinaryOp, const Derived, const ConstantReturnType> operator+(const Scalar& scalar) const -{ return CwiseUnaryOp, Derived>(derived(), internal::scalar_add_op(scalar)); } +{ return CwiseBinaryOp, const Derived, const ConstantReturnType>(derived(), Constant(rows(),cols(),scalar)); } -friend const CwiseUnaryOp, Derived> +friend const CwiseBinaryOp, const ConstantReturnType, Derived> operator+(const Scalar& scalar, const MatrixBase& mat) -{ return CwiseUnaryOp, Derived>(mat.derived(), internal::scalar_add_op(scalar)); } +{ return CwiseBinaryOp, const ConstantReturnType, Derived>(Constant(rows(),cols(),scalar), mat.derived()); } \endcode Then one can the following declaration in the config.h or whatever prerequisites header file of his project: diff --git a/test/array.cpp b/test/array.cpp index 39a7b856f..0416ec5d2 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -72,7 +72,7 @@ template void array(const ArrayType& m) VERIFY_IS_MUCH_SMALLER_THAN(abs(m1.rowwise().sum().sum() - m1.sum()), m1.abs().sum()); if (!internal::isMuchSmallerThan(abs(m1.sum() - (m1+m2).sum()), m1.abs().sum(), test_precision())) VERIFY_IS_NOT_APPROX(((m1+m2).rowwise().sum()).sum(), m1.sum()); - VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); + VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); // vector-wise ops m3 = m1; @@ -592,16 +592,178 @@ template void array_special_functions() ref << 0.644934066848, 0.394934066848, 0.0399946696496, nan, 293.334565435, 0.445487887616, -2.47810300902e-07, -8.29668781082e-09, -0.434562276666, 0.567742190178, -0.0108615497927; CALL_SUBTEST( verify_component_wise(ref, ref); ); - if(sizeof(RealScalar)>=64) { -// CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res, ref); ); + if(sizeof(RealScalar)>=8) { // double + // Reason for commented line: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1232 + // CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res, ref); ); CALL_SUBTEST( res = polygamma(n,x); verify_component_wise(res, ref); ); } else { -// CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res.head(8), ref.head(8)); ); + // CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res.head(8), ref.head(8)); ); CALL_SUBTEST( res = polygamma(n,x); verify_component_wise(res.head(8), ref.head(8)); ); } } #endif + +#if EIGEN_HAS_C99_MATH + { + // Inputs and ground truth generated with scipy via: + // a = np.logspace(-3, 3, 5) - 1e-3 + // b = np.logspace(-3, 3, 5) - 1e-3 + // x = np.linspace(-0.1, 1.1, 5) + // (full_a, full_b, full_x) = np.vectorize(lambda a, b, x: (a, b, x))(*np.ix_(a, b, x)) + // full_a = full_a.flatten().tolist() # same for full_b, full_x + // v = scipy.special.betainc(full_a, full_b, full_x).flatten().tolist() + // + // Note in Eigen, we call betainc with arguments in the order (x, a, b). + ArrayType a(125); + ArrayType b(125); + ArrayType x(125); + ArrayType v(125); + ArrayType res(125); + + a << 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999; + + b << 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999, + 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, + 999.999, 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, + 0.999, 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999, + 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999, + 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999, + 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999, + 999.999, 999.999; + + x << -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, + 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, + 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, + -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, + 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, + 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, + 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1; + + v << nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, + nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, + nan, nan, nan, 0.47972119876364683, 0.5, 0.5202788012363533, nan, nan, + 0.9518683957740043, 0.9789663010413743, 0.9931729188073435, nan, nan, + 0.999995949033062, 0.9999999999993698, 0.9999999999999999, nan, nan, + 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, nan, + nan, nan, nan, nan, nan, 0.006827081192655869, 0.0210336989586256, + 0.04813160422599567, nan, nan, 0.20014344256217678, 0.5000000000000001, + 0.7998565574378232, nan, nan, 0.9991401428435834, 0.999999999698403, + 0.9999999999999999, nan, nan, 0.9999999999999999, 0.9999999999999999, + 0.9999999999999999, nan, nan, nan, nan, nan, nan, nan, + 1.0646600232370887e-25, 6.301722877826246e-13, 4.050966937974938e-06, + nan, nan, 7.864342668429763e-23, 3.015969667594166e-10, + 0.0008598571564165444, nan, nan, 6.031987710123844e-08, + 0.5000000000000007, 0.9999999396801229, nan, nan, 0.9999999999999999, + 0.9999999999999999, 0.9999999999999999, nan, nan, nan, nan, nan, nan, + nan, 0.0, 7.029920380986636e-306, 2.2450728208591345e-101, nan, nan, + 0.0, 9.275871147869727e-302, 1.2232913026152827e-97, nan, nan, 0.0, + 3.0891393081932924e-252, 2.9303043666183996e-60, nan, nan, + 2.248913486879199e-196, 0.5000000000004947, 0.9999999999999999, nan; + + CALL_SUBTEST(res = betainc(a, b, x); + verify_component_wise(res, v);); + } + + // Test various properties of betainc + { + ArrayType m1 = ArrayType::Random(32); + ArrayType m2 = ArrayType::Random(32); + ArrayType m3 = ArrayType::Random(32); + ArrayType one = ArrayType::Constant(32, Scalar(1.0)); + const Scalar eps = std::numeric_limits::epsilon(); + ArrayType a = (m1 * 4.0).exp(); + ArrayType b = (m2 * 4.0).exp(); + ArrayType x = m3.abs(); + + // betainc(a, 1, x) == x**a + CALL_SUBTEST( + ArrayType test = betainc(a, one, x); + ArrayType expected = x.pow(a); + verify_component_wise(test, expected);); + + // betainc(1, b, x) == 1 - (1 - x)**b + CALL_SUBTEST( + ArrayType test = betainc(one, b, x); + ArrayType expected = one - (one - x).pow(b); + verify_component_wise(test, expected);); + + // betainc(a, b, x) == 1 - betainc(b, a, 1-x) + CALL_SUBTEST( + ArrayType test = betainc(a, b, x) + betainc(b, a, one - x); + ArrayType expected = one; + verify_component_wise(test, expected);); + + // betainc(a+1, b, x) = betainc(a, b, x) - x**a * (1 - x)**b / (a * beta(a, b)) + CALL_SUBTEST( + ArrayType num = x.pow(a) * (one - x).pow(b); + ArrayType denom = a * (a.lgamma() + b.lgamma() - (a + b).lgamma()).exp(); + // Add eps to rhs and lhs so that component-wise test doesn't result in + // nans when both outputs are zeros. + ArrayType expected = betainc(a, b, x) - num / denom + eps; + ArrayType test = betainc(a + one, b, x) + eps; + if (sizeof(Scalar) >= 8) { // double + verify_component_wise(test, expected); + } else { + // Reason for limited test: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1232 + verify_component_wise(test.head(8), expected.head(8)); + }); + + // betainc(a, b+1, x) = betainc(a, b, x) + x**a * (1 - x)**b / (b * beta(a, b)) + CALL_SUBTEST( + // Add eps to rhs and lhs so that component-wise test doesn't result in + // nans when both outputs are zeros. + ArrayType num = x.pow(a) * (one - x).pow(b); + ArrayType denom = b * (a.lgamma() + b.lgamma() - (a + b).lgamma()).exp(); + ArrayType expected = betainc(a, b, x) + num / denom + eps; + ArrayType test = betainc(a, b + one, x) + eps; + verify_component_wise(test, expected);); + } +#endif } void test_array() @@ -645,7 +807,7 @@ void test_array() VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, int >::value)); VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, float >::value)); VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, ArrayBase >::value)); - typedef CwiseUnaryOp, ArrayXd > Xpr; + typedef CwiseUnaryOp, ArrayXd > Xpr; VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, ArrayBase >::value)); diff --git a/test/array_for_matrix.cpp b/test/array_for_matrix.cpp index 75e6a778f..97e03be83 100644 --- a/test/array_for_matrix.cpp +++ b/test/array_for_matrix.cpp @@ -45,7 +45,7 @@ template void array_for_matrix(const MatrixType& m) VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum().sum() - m1.sum(), m1.squaredNorm()); VERIFY_IS_MUCH_SMALLER_THAN(m1.colwise().sum() + m2.colwise().sum() - (m1+m2).colwise().sum(), (m1+m2).squaredNorm()); VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum() - m2.rowwise().sum() - (m1-m2).rowwise().sum(), (m1-m2).squaredNorm()); - VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); + VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); // vector-wise ops m3 = m1; diff --git a/test/commainitializer.cpp b/test/commainitializer.cpp index 99102b966..86bdb040e 100644 --- a/test/commainitializer.cpp +++ b/test/commainitializer.cpp @@ -43,4 +43,27 @@ void test_commainitializer() 4, 5, 6, vec[2].transpose(); VERIFY_IS_APPROX(m3, ref); + + + // Check with empty matrices (bug #1242) + { + int const M = 0; + int const N1 = 2; + int const N2 = 1; + + { + Matrix A1; + Matrix A2; + Matrix B; + B << A1, A2; + } + { + Matrix A1; + Matrix A2; + Matrix B; + B << A1, + A2; + } + } + } diff --git a/test/eigensolver_generalized_real.cpp b/test/eigensolver_generalized_real.cpp index a46a2e50e..da14482de 100644 --- a/test/eigensolver_generalized_real.cpp +++ b/test/eigensolver_generalized_real.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2012 Gael Guennebaud +// Copyright (C) 2012-2016 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -10,6 +10,7 @@ #include "main.h" #include #include +#include template void generalized_eigensolver_real(const MatrixType& m) { @@ -21,6 +22,7 @@ template void generalized_eigensolver_real(const MatrixType Index cols = m.cols(); typedef typename MatrixType::Scalar Scalar; + typedef std::complex ComplexScalar; typedef Matrix VectorType; MatrixType a = MatrixType::Random(rows,cols); @@ -31,14 +33,28 @@ template void generalized_eigensolver_real(const MatrixType MatrixType spdB = b.adjoint() * b + b1.adjoint() * b1; // lets compare to GeneralizedSelfAdjointEigenSolver - GeneralizedSelfAdjointEigenSolver symmEig(spdA, spdB); - GeneralizedEigenSolver eig(spdA, spdB); + { + GeneralizedSelfAdjointEigenSolver symmEig(spdA, spdB); + GeneralizedEigenSolver eig(spdA, spdB); - VERIFY_IS_EQUAL(eig.eigenvalues().imag().cwiseAbs().maxCoeff(), 0); + VERIFY_IS_EQUAL(eig.eigenvalues().imag().cwiseAbs().maxCoeff(), 0); - VectorType realEigenvalues = eig.eigenvalues().real(); - std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size()); - VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues()); + VectorType realEigenvalues = eig.eigenvalues().real(); + std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size()); + VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues()); + } + + // non symmetric case: + { + GeneralizedEigenSolver eig(a,b); + for(Index k=0; k tmp = (eig.betas()(k)*a).template cast() - eig.alphas()(k)*b; + if(tmp.norm()>(std::numeric_limits::min)()) + tmp /= tmp.norm(); + VERIFY_IS_MUCH_SMALLER_THAN( std::abs(tmp.determinant()), Scalar(1) ); + } + } // regression test for bug 1098 { @@ -57,7 +73,7 @@ void test_eigensolver_generalized_real() s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(s,s)) ); - // some trivial but implementation-wise tricky cases + // some trivial but implementation-wise special cases CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(1,1)) ); CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(2,2)) ); CALL_SUBTEST_3( generalized_eigensolver_real(Matrix()) ); diff --git a/test/evaluators.cpp b/test/evaluators.cpp index 876dffe22..aed5a05a7 100644 --- a/test/evaluators.cpp +++ b/test/evaluators.cpp @@ -21,7 +21,7 @@ namespace Eigen { EIGEN_STRONG_INLINE DstXprType& copy_using_evaluator(const EigenBase &dst, const SrcXprType &src) { - call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); + call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); return dst.const_cast_derived(); } @@ -29,7 +29,7 @@ namespace Eigen { EIGEN_STRONG_INLINE const DstXprType& copy_using_evaluator(const NoAlias& dst, const SrcXprType &src) { - call_assignment(dst, src.derived(), internal::assign_op()); + call_assignment(dst, src.derived(), internal::assign_op()); return dst.expression(); } @@ -45,7 +45,7 @@ namespace Eigen { dst.const_cast_derived().resizeLike(src.derived()); #endif - call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); + call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); return dst.const_cast_derived(); } @@ -53,28 +53,28 @@ namespace Eigen { void add_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) { typedef typename DstXprType::Scalar Scalar; - call_assignment(const_cast(dst), src.derived(), internal::add_assign_op()); + call_assignment(const_cast(dst), src.derived(), internal::add_assign_op()); } template void subtract_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) { typedef typename DstXprType::Scalar Scalar; - call_assignment(const_cast(dst), src.derived(), internal::sub_assign_op()); + call_assignment(const_cast(dst), src.derived(), internal::sub_assign_op()); } template void multiply_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) { typedef typename DstXprType::Scalar Scalar; - call_assignment(dst.const_cast_derived(), src.derived(), internal::mul_assign_op()); + call_assignment(dst.const_cast_derived(), src.derived(), internal::mul_assign_op()); } template void divide_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) { typedef typename DstXprType::Scalar Scalar; - call_assignment(dst.const_cast_derived(), src.derived(), internal::div_assign_op()); + call_assignment(dst.const_cast_derived(), src.derived(), internal::div_assign_op()); } template diff --git a/test/geo_alignedbox.cpp b/test/geo_alignedbox.cpp index 2bdb4b7f2..ba3378aab 100644 --- a/test/geo_alignedbox.cpp +++ b/test/geo_alignedbox.cpp @@ -48,6 +48,8 @@ template void alignedbox(const BoxType& _box) b0.extend(p0); b0.extend(p1); VERIFY(b0.contains(p0*s1+(Scalar(1)-s1)*p1)); + VERIFY(b0.contains(b0.center())); + VERIFY(b0.center()==(p0+p1)/Scalar(2)); (b2 = b0).extend(b1); VERIFY(b2.contains(b0)); diff --git a/test/geo_homogeneous.cpp b/test/geo_homogeneous.cpp index bf63c69ec..305794cdf 100644 --- a/test/geo_homogeneous.cpp +++ b/test/geo_homogeneous.cpp @@ -58,6 +58,8 @@ template void homogeneous(void) T2MatrixType t2 = T2MatrixType::Random(); VERIFY_IS_APPROX(t2 * (v0.homogeneous().eval()), t2 * v0.homogeneous()); VERIFY_IS_APPROX(t2 * (m0.colwise().homogeneous().eval()), t2 * m0.colwise().homogeneous()); + VERIFY_IS_APPROX(t2 * (v0.homogeneous().asDiagonal()), t2 * hv0.asDiagonal()); + VERIFY_IS_APPROX((v0.homogeneous().asDiagonal()) * t2, hv0.asDiagonal() * t2); VERIFY_IS_APPROX((v0.transpose().rowwise().homogeneous().eval()) * t2, v0.transpose().rowwise().homogeneous() * t2); diff --git a/test/linearstructure.cpp b/test/linearstructure.cpp index e7f4b3dc5..17474af10 100644 --- a/test/linearstructure.cpp +++ b/test/linearstructure.cpp @@ -9,7 +9,7 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. static bool g_called; -#define EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN { g_called = true; } +#define EIGEN_SCALAR_BINARY_OP_PLUGIN { g_called |= (!internal::is_same::value); } #include "main.h" @@ -93,6 +93,22 @@ template void real_complex(DenseIndex rows = MatrixType::Ro g_called = false; VERIFY_IS_APPROX(m1/s, m1/Scalar(s)); VERIFY(g_called && "matrix / real not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(s+m1.array(), Scalar(s)+m1.array()); + VERIFY(g_called && "real + matrix not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1.array()+s, m1.array()+Scalar(s)); + VERIFY(g_called && "matrix + real not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(s-m1.array(), Scalar(s)-m1.array()); + VERIFY(g_called && "real - matrix not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1.array()-s, m1.array()-Scalar(s)); + VERIFY(g_called && "matrix - real not properly optimized"); } void test_linearstructure() diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index dbcf468ea..57ef85c32 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -23,10 +23,18 @@ #endif +static bool g_called; +#define EIGEN_SCALAR_BINARY_OP_PLUGIN { g_called |= (!internal::is_same::value); } + #include "main.h" using namespace std; +#define VERIFY_MIX_SCALAR(XPR,REF) \ + g_called = false; \ + VERIFY_IS_APPROX(XPR,REF); \ + VERIFY( g_called && #XPR" not properly optimized"); + template void mixingtypes(int size = SizeAtCompileType) { typedef std::complex CF; @@ -42,6 +50,7 @@ template void mixingtypes(int size = SizeAtCompileType) Mat_f mf = Mat_f::Random(size,size); Mat_d md = mf.template cast(); + //Mat_d rd = md; Mat_cf mcf = Mat_cf::Random(size,size); Mat_cd mcd = mcf.template cast >(); Mat_cd rcd = mcd; @@ -56,23 +65,50 @@ template void mixingtypes(int size = SizeAtCompileType) mf+mf; - VERIFY_RAISES_ASSERT(mf+md); -#if !EIGEN_HAS_STD_RESULT_OF - // this one does not even compile with C++11 - VERIFY_RAISES_ASSERT(mf+mcf); -#endif + +// VERIFY_RAISES_ASSERT(mf+md); // does not even compile #ifdef EIGEN_DONT_VECTORIZE VERIFY_RAISES_ASSERT(vf=vd); VERIFY_RAISES_ASSERT(vf+=vd); - VERIFY_RAISES_ASSERT(mcd=md); #endif // check scalar products - VERIFY_IS_APPROX(vcf * sf , vcf * complex(sf)); - VERIFY_IS_APPROX(sd * vcd, complex(sd) * vcd); - VERIFY_IS_APPROX(vf * scf , vf.template cast >() * scf); - VERIFY_IS_APPROX(scd * vd, scd * vd.template cast >()); + VERIFY_MIX_SCALAR(vcf * sf , vcf * complex(sf)); + VERIFY_MIX_SCALAR(sd * vcd , complex(sd) * vcd); + VERIFY_MIX_SCALAR(vf * scf , vf.template cast >() * scf); + VERIFY_MIX_SCALAR(scd * vd , scd * vd.template cast >()); + + VERIFY_MIX_SCALAR(vcf * 2 , vcf * complex(2)); + VERIFY_MIX_SCALAR(vcf * 2.1 , vcf * complex(2.1)); + VERIFY_MIX_SCALAR(2 * vcf, vcf * complex(2)); + VERIFY_MIX_SCALAR(2.1 * vcf , vcf * complex(2.1)); + + // check scalar quotients + VERIFY_MIX_SCALAR(vcf / sf , vcf / complex(sf)); + VERIFY_MIX_SCALAR(vf / scf , vf.template cast >() / scf); + VERIFY_MIX_SCALAR(vf.array() / scf, vf.template cast >().array() / scf); + VERIFY_MIX_SCALAR(scd / vd.array() , scd / vd.template cast >().array()); + + // check scalar increment + VERIFY_MIX_SCALAR(vcf.array() + sf , vcf.array() + complex(sf)); + VERIFY_MIX_SCALAR(sd + vcd.array(), complex(sd) + vcd.array()); + VERIFY_MIX_SCALAR(vf.array() + scf, vf.template cast >().array() + scf); + VERIFY_MIX_SCALAR(scd + vd.array() , scd + vd.template cast >().array()); + + // check scalar subtractions + VERIFY_MIX_SCALAR(vcf.array() - sf , vcf.array() - complex(sf)); + VERIFY_MIX_SCALAR(sd - vcd.array(), complex(sd) - vcd.array()); + VERIFY_MIX_SCALAR(vf.array() - scf, vf.template cast >().array() - scf); + VERIFY_MIX_SCALAR(scd - vd.array() , scd - vd.template cast >().array()); + + // check scalar powers + VERIFY_MIX_SCALAR( pow(vcf.array(), sf), pow(vcf.array(), complex(sf)) ); + VERIFY_MIX_SCALAR( vcf.array().pow(sf) , pow(vcf.array(), complex(sf)) ); + VERIFY_MIX_SCALAR( pow(sd, vcd.array()), pow(complex(sd), vcd.array()) ); + VERIFY_MIX_SCALAR( pow(vf.array(), scf), pow(vf.template cast >().array(), scf) ); + VERIFY_MIX_SCALAR( vf.array().pow(scf) , pow(vf.template cast >().array(), scf) ); + VERIFY_MIX_SCALAR( pow(scd, vd.array()), pow(scd, vd.template cast >().array()) ); // check dot product vf.dot(vf); @@ -186,16 +222,50 @@ template void mixingtypes(int size = SizeAtCompileType) Mat_cd((scd * md.template cast().eval() * mcd).template triangularView())); - VERIFY_IS_APPROX( md.array() * mcd.array(), md.template cast().eval().array() * mcd.array() ); - VERIFY_IS_APPROX( mcd.array() * md.array(), mcd.array() * md.template cast().eval().array() ); -// VERIFY_IS_APPROX( md.array() / mcd.array(), md.template cast().eval().array() / mcd.array() ); + VERIFY_IS_APPROX( md.array() * mcd.array(), md.template cast().eval().array() * mcd.array() ); + VERIFY_IS_APPROX( mcd.array() * md.array(), mcd.array() * md.template cast().eval().array() ); + + VERIFY_IS_APPROX( md.array() + mcd.array(), md.template cast().eval().array() + mcd.array() ); + VERIFY_IS_APPROX( mcd.array() + md.array(), mcd.array() + md.template cast().eval().array() ); + + VERIFY_IS_APPROX( md.array() - mcd.array(), md.template cast().eval().array() - mcd.array() ); + VERIFY_IS_APPROX( mcd.array() - md.array(), mcd.array() - md.template cast().eval().array() ); + + VERIFY_IS_APPROX( md.array() / mcd.array(), md.template cast().eval().array() / mcd.array() ); VERIFY_IS_APPROX( mcd.array() / md.array(), mcd.array() / md.template cast().eval().array() ); + VERIFY_IS_APPROX( md.array().pow(mcd.array()), md.template cast().eval().array().pow(mcd.array()) ); + VERIFY_IS_APPROX( mcd.array().pow(md.array()), mcd.array().pow(md.template cast().eval().array()) ); + + VERIFY_IS_APPROX( pow(md.array(),mcd.array()), md.template cast().eval().array().pow(mcd.array()) ); + VERIFY_IS_APPROX( pow(mcd.array(),md.array()), mcd.array().pow(md.template cast().eval().array()) ); + + rcd = mcd; + VERIFY_IS_APPROX( rcd = md, md.template cast().eval() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd += md, mcd + md.template cast().eval() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd -= md, mcd - md.template cast().eval() ); rcd = mcd; VERIFY_IS_APPROX( rcd.array() *= md.array(), mcd.array() * md.template cast().eval().array() ); rcd = mcd; VERIFY_IS_APPROX( rcd.array() /= md.array(), mcd.array() / md.template cast().eval().array() ); + + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() += md + mcd*md, mcd + (md.template cast().eval()) + mcd*(md.template cast().eval())); + + VERIFY_IS_APPROX( rcd.noalias() = md*md, ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() += md*md, mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() -= md*md, mcd - ((md*md).eval().template cast()) ); + + VERIFY_IS_APPROX( rcd.noalias() = mcd + md*md, mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() += mcd + md*md, mcd + mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() -= mcd + md*md, - ((md*md).eval().template cast()) ); } void test_mixingtypes() diff --git a/test/nesting_ops.cpp b/test/nesting_ops.cpp index 2f5025305..a419b0e44 100644 --- a/test/nesting_ops.cpp +++ b/test/nesting_ops.cpp @@ -75,8 +75,8 @@ template void run_nesting_ops_2(const MatrixType& _m) } else { - VERIFY( verify_eval_type<1>(2*m1, 2*m1) ); - VERIFY( verify_eval_type<2>(2*m1, m1) ); + VERIFY( verify_eval_type<2>(2*m1, 2*m1) ); + VERIFY( verify_eval_type<3>(2*m1, m1) ); } VERIFY( verify_eval_type<2>(m1+m1, m1+m1) ); VERIFY( verify_eval_type<3>(m1+m1, m1) ); diff --git a/test/real_qz.cpp b/test/real_qz.cpp index a1766c6d9..45ae8d763 100644 --- a/test/real_qz.cpp +++ b/test/real_qz.cpp @@ -49,11 +49,20 @@ template void real_qz(const MatrixType& m) for (Index i=0; i } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> pow(Scalar exponent) const { - return unaryExpr(internal::scalar_pow_op(exponent)); + return unaryExpr(internal::bind2nd_op >(exponent)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator+ (Scalar rhs) const { - return unaryExpr(internal::scalar_add_op(rhs)); + return unaryExpr(internal::bind2nd_op >(rhs)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend - const TensorCwiseUnaryOp, const Derived> + const TensorCwiseUnaryOp >, const Derived> operator+ (Scalar lhs, const Derived& rhs) { - return rhs + lhs; + return rhs.unaryExpr(internal::bind1st_op >(lhs)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator- (Scalar rhs) const { EIGEN_STATIC_ASSERT((NumTraits::IsSigned || internal::is_same >::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return unaryExpr(internal::scalar_sub_op(rhs)); + return unaryExpr(internal::bind2nd_op >(rhs)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend - const TensorCwiseUnaryOp, - const TensorCwiseUnaryOp, const Derived> > + const TensorCwiseUnaryOp >, const Derived> operator- (Scalar lhs, const Derived& rhs) { - return -rhs + lhs; + return rhs.unaryExpr(internal::bind1st_op >(lhs)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator* (Scalar rhs) const { - return unaryExpr(internal::scalar_multiple_op(rhs)); + return unaryExpr(internal::bind2nd_op >(rhs)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend - const TensorCwiseUnaryOp, const Derived> + const TensorCwiseUnaryOp >, const Derived> operator* (Scalar lhs, const Derived& rhs) { - return rhs * lhs; + return rhs.unaryExpr(internal::bind1st_op >(lhs)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator/ (Scalar rhs) const { - return unaryExpr(internal::scalar_quotient1_op(rhs)); + return unaryExpr(internal::bind2nd_op >(rhs)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend - const TensorCwiseUnaryOp, - const TensorCwiseUnaryOp, const Derived> > + const TensorCwiseUnaryOp >, const Derived> operator/ (Scalar lhs, const Derived& rhs) { - return rhs.inverse() * lhs; + return rhs.unaryExpr(internal::bind1st_op >(lhs)); } EIGEN_DEVICE_FUNC @@ -307,7 +305,6 @@ class TensorBase return unaryExpr(internal::scalar_floor_op()); } - // Generic binary operation support. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseBinaryOp @@ -372,66 +369,66 @@ class TensorBase // Comparisons and tests. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator<(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator<=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator>(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator>=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator==(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator!=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } // comparisons and tests for Scalars EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator<(Scalar threshold) const { return operator<(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator<=(Scalar threshold) const { return operator<=(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator>(Scalar threshold) const { return operator>(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator>=(Scalar threshold) const { return operator>=(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator==(Scalar threshold) const { return operator==(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator!=(Scalar threshold) const { return operator!=(constant(threshold)); } @@ -487,15 +484,22 @@ class TensorBase typedef TensorScanOp, const Derived> TensorScanSumOp; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorScanSumOp - cumsum(const Index& axis) const { - return TensorScanSumOp(derived(), axis); + cumsum(const Index& axis, bool exclusive = false) const { + return TensorScanSumOp(derived(), axis, exclusive); } typedef TensorScanOp, const Derived> TensorScanProdOp; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorScanProdOp - cumprod(const Index& axis) const { - return TensorScanProdOp(derived(), axis); + cumprod(const Index& axis, bool exclusive = false) const { + return TensorScanProdOp(derived(), axis, exclusive); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorScanOp + scan(const Index& axis, const Reducer& reducer, bool exclusive = false) const { + return TensorScanOp(derived(), axis, exclusive, reducer); } // Reductions. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index a60a17049..ee16cde9b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -202,7 +202,7 @@ struct TensorEvaluator::numThreads( + int num_threads = TensorCostModel::numThreads( static_cast(n) * m, cost, this->m_device.numThreads()); // TODO(dvyukov): this is a stop-gap to prevent regressions while the cost @@ -301,7 +301,7 @@ struct TensorEvaluator(scratchpad()) + kCudaScratchSize; + semaphore_ = reinterpret_cast(scratch); + cudaError_t err = cudaMemsetAsync(semaphore_, 0, sizeof(unsigned int), *stream_); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); + } + return semaphore_; + } + private: const cudaStream_t* stream_; int device_; mutable void* scratch_; + mutable unsigned int* semaphore_; }; struct GpuDevice { @@ -174,6 +194,15 @@ struct GpuDevice { #endif } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned int* semaphore() const { +#ifndef __CUDA_ARCH__ + return stream_->semaphore(); +#else + eigen_assert(false && "The default device should be used instead to generate kernel code"); + return NULL; +#endif + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { #ifndef __CUDA_ARCH__ cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index 9073c611a..0af91fe64 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -106,7 +106,7 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) { // Build a thread pool device on top the an existing pool of threads. struct ThreadPoolDevice { // The ownership of the thread pool remains with the caller. - ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { } + ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) : pool_(pool), num_threads_(num_cores) { } EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { return internal::aligned_malloc(num_bytes); @@ -130,7 +130,7 @@ struct ThreadPoolDevice { ::memset(buffer, c, n); } - EIGEN_STRONG_INLINE size_t numThreads() const { + EIGEN_STRONG_INLINE int numThreads() const { return num_threads_; } @@ -186,7 +186,7 @@ struct ThreadPoolDevice { std::function f) const { typedef TensorCostModel CostModel; if (n <= 1 || numThreads() == 1 || - CostModel::numThreads(n, cost, numThreads()) == 1) { + CostModel::numThreads(n, cost, static_cast(numThreads())) == 1) { f(0, n); return; } @@ -246,7 +246,7 @@ struct ThreadPoolDevice { // Recursively divide size into halves until we reach block_size. // Division code rounds mid to block_size, so we are guaranteed to get // block_count leaves that do actual computations. - Barrier barrier(block_count); + Barrier barrier(static_cast(block_count)); std::function handleRange; handleRange = [=, &handleRange, &barrier, &f](Index first, Index last) { if (last - first <= block_size) { @@ -272,7 +272,7 @@ struct ThreadPoolDevice { private: ThreadPoolInterface* pool_; - size_t num_threads_; + int num_threads_; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index 31b361c83..a48cb1daa 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -403,6 +403,101 @@ struct TensorEvaluator m_rightImpl; }; +// -------------------- CwiseTernaryOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorCwiseTernaryOp XprType; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & + internal::functor_traits::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + : m_functor(op.functor()), + m_arg1Impl(op.arg1Expression(), device), + m_arg2Impl(op.arg2Expression(), device), + m_arg3Impl(op.arg3Expression(), device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::Index, + typename internal::traits::Index>::value), + STORAGE_INDEX_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::Index, + typename internal::traits::Index>::value), + STORAGE_INDEX_MUST_MATCH) + + eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions())); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const + { + // TODO: use arg2 or arg3 dimensions if they are known at compile time. + return m_arg1Impl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + m_arg1Impl.evalSubExprsIfNeeded(NULL); + m_arg2Impl.evalSubExprsIfNeeded(NULL); + m_arg3Impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_arg1Impl.cleanup(); + m_arg2Impl.cleanup(); + m_arg3Impl.cleanup(); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_functor.packetOp(m_arg1Impl.template packet(index), + m_arg2Impl.template packet(index), + m_arg3Impl.template packet(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double functor_cost = internal::functor_traits::Cost; + return m_arg1Impl.costPerCoeff(vectorized) + + m_arg2Impl.costPerCoeff(vectorized) + + m_arg3Impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + + private: + const TernaryOp m_functor; + TensorEvaluator m_arg1Impl; + TensorEvaluator m_arg2Impl; + TensorEvaluator m_arg3Impl; +}; + // -------------------- SelectOp -------------------- diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h index ea250d8bc..5f2e329f2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h @@ -218,6 +218,86 @@ class TensorCwiseBinaryOp : public TensorBase +struct traits > +{ + // Type promotion to handle the case where the types of the args are different. + typedef typename result_of< + TernaryOp(typename Arg1XprType::Scalar, + typename Arg2XprType::Scalar, + typename Arg3XprType::Scalar)>::type Scalar; + typedef traits XprTraits; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::Index Index; + typedef typename Arg1XprType::Nested Arg1Nested; + typedef typename Arg2XprType::Nested Arg2Nested; + typedef typename Arg3XprType::Nested Arg3Nested; + typedef typename remove_reference::type _Arg1Nested; + typedef typename remove_reference::type _Arg2Nested; + typedef typename remove_reference::type _Arg3Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCwiseTernaryOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorCwiseTernaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCwiseTernaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef Scalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseTernaryOp(const Arg1XprType& arg1, const Arg2XprType& arg2, const Arg3XprType& arg3, const TernaryOp& func = TernaryOp()) + : m_arg1_xpr(arg1), m_arg2_xpr(arg2), m_arg3_xpr(arg3), m_functor(func) {} + + EIGEN_DEVICE_FUNC + const TernaryOp& functor() const { return m_functor; } + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg1Expression() const { return m_arg1_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg2Expression() const { return m_arg2_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg3Expression() const { return m_arg3_xpr; } + + protected: + typename Arg1XprType::Nested m_arg1_xpr; + typename Arg1XprType::Nested m_arg2_xpr; + typename Arg3XprType::Nested m_arg3_xpr; + const TernaryOp m_functor; +}; + + namespace internal { template struct traits > diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index a1a18d938..f35275ffb 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -21,6 +21,7 @@ template: template class TensorCwiseNullaryOp; template class TensorCwiseUnaryOp; template class TensorCwiseBinaryOp; +template class TensorCwiseTernaryOp; template class TensorSelectOp; template class TensorReductionOp; template class TensorIndexTupleOp; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 3dd32e9d1..a8e48fced 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -84,6 +84,14 @@ struct functor_traits > { }; +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + // Standard reduction functors template struct SumReducer { @@ -119,6 +127,15 @@ template struct SumReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasAdd + }; +}; + + template struct MeanReducer { static const bool PacketAccess = packet_traits::HasAdd && !NumTraits::IsInteger; @@ -162,6 +179,15 @@ template struct MeanReducer DenseIndex packetCount_; }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasAdd + }; +}; + + template struct MaxReducer { static const bool PacketAccess = packet_traits::HasMax; @@ -195,6 +221,15 @@ template struct MaxReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasMax + }; +}; + + template struct MinReducer { static const bool PacketAccess = packet_traits::HasMin; @@ -228,6 +263,14 @@ template struct MinReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasMin + }; +}; + template struct ProdReducer { @@ -263,6 +306,14 @@ template struct ProdReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::MulCost, + PacketAccess = PacketType::HasMul + }; +}; + struct AndReducer { @@ -280,6 +331,15 @@ struct AndReducer } }; +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + + struct OrReducer { static const bool PacketAccess = false; static const bool IsStateful = false; @@ -295,6 +355,15 @@ struct OrReducer { } }; +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + + // Argmin/Argmax reducers template struct ArgMaxTupleReducer { @@ -312,6 +381,15 @@ template struct ArgMaxTupleReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = false + }; +}; + + template struct ArgMinTupleReducer { static const bool PacketAccess = false; @@ -328,6 +406,14 @@ template struct ArgMinTupleReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = false + }; +}; + // Random number generation namespace { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h new file mode 100644 index 000000000..665b861cf --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h @@ -0,0 +1,33 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H +#define EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H + +namespace Eigen { + +/** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given tensors. + * + * This function computes the regularized incomplete beta function (integral). + * + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const + TensorCwiseTernaryOp, + const ADerived, const BDerived, const XDerived> + betainc(const ADerived& a, const BDerived& b, const XDerived& x) { + return TensorCwiseTernaryOp< + internal::scalar_betainc_op, const ADerived, + const BDerived, const XDerived>( + a, b, x, internal::scalar_betainc_op()); +} + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h index 38a833f82..f3a3a1b88 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h @@ -17,34 +17,62 @@ template<> struct significant_decimals_impl : significant_decimals_default_impl {}; -} +// Print the tensor as a 2d matrix +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + typedef typename internal::remove_const::type Scalar; + typedef typename Tensor::Index Index; + const Index total_size = internal::array_prod(tensor.dimensions()); + if (total_size > 0) { + const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); + static const int layout = Tensor::Layout; + Map > matrix(const_cast(tensor.data()), first_dim, total_size/first_dim); + os << matrix; + } + } +}; + + +// Print the tensor as a vector +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + typedef typename internal::remove_const::type Scalar; + typedef typename Tensor::Index Index; + const Index total_size = internal::array_prod(tensor.dimensions()); + if (total_size > 0) { + Map > array(const_cast(tensor.data()), total_size); + os << array; + } + } +}; + + +// Print the tensor as a scalar +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + os << tensor.coeff(0); + } +}; +} + template std::ostream& operator << (std::ostream& os, const TensorBase& expr) { + typedef TensorEvaluator, DefaultDevice> Evaluator; + typedef typename Evaluator::Dimensions Dimensions; + // Evaluate the expression if needed TensorForcedEvalOp eval = expr.eval(); - TensorEvaluator, DefaultDevice> tensor(eval, DefaultDevice()); + Evaluator tensor(eval, DefaultDevice()); tensor.evalSubExprsIfNeeded(NULL); - typedef typename internal::remove_const::type Scalar; - typedef typename T::Index Index; - typedef typename TensorEvaluator, DefaultDevice>::Dimensions Dimensions; - const Index total_size = internal::array_prod(tensor.dimensions()); - - // Print the tensor as a 1d vector or a 2d matrix. + // Print the result static const int rank = internal::array_size::value; - if (rank == 0) { - os << tensor.coeff(0); - } else if (rank == 1) { - Map > array(const_cast(tensor.data()), total_size); - os << array; - } else { - const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); - static const int layout = TensorEvaluator, DefaultDevice>::Layout; - Map > matrix(const_cast(tensor.data()), first_dim, total_size/first_dim); - os << matrix; - } + internal::TensorPrinter::run(os, tensor); // Cleanup. tensor.cleanup(); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index b1645d56f..fdb5ee6b8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -47,22 +47,39 @@ template <> struct max_n_1<0> { // Default packet types template -struct PacketType { +struct PacketType : internal::packet_traits { typedef typename internal::packet_traits::type type; - enum { size = internal::unpacket_traits::size }; }; // For CUDA packet types when using a GpuDevice -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && defined(EIGEN_HAS_CUDA_FP16) template <> -struct PacketType { - typedef float4 type; - static const int size = 4; -}; -template <> -struct PacketType { - typedef double2 type; +struct PacketType { + typedef half2 type; static const int size = 2; + enum { + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasNegate = 1, + HasAbs = 1, + HasArg = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0, + HasBlend = 0, + + HasDiv = 1, + HasSqrt = 1, + HasRsqrt = 1, + HasExp = 1, + HasLog = 1, + HasLog1p = 0, + HasLog10 = 0, + HasPow = 1, + }; }; #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index 52cfc2824..d34f1e328 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -148,7 +148,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast(m_impl.data()); } - const TensorEvaluator& impl() const { return m_impl; } + EIGEN_DEVICE_FUNC const TensorEvaluator& impl() const { return m_impl; } protected: TensorEvaluator m_impl; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 99a09c058..04ba45a8f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -316,7 +316,7 @@ struct OuterReducer { #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) template -__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*); +__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); #ifdef EIGEN_HAS_CUDA_FP16 @@ -558,7 +558,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index + PacketSize - 1 < dimensions().TotalSize()); + eigen_assert(index + PacketSize - 1 < internal::array_prod(dimensions())); EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; if (ReducingInnerMostDims) { @@ -616,7 +616,7 @@ struct TensorEvaluator, Device> template friend struct internal::FullReducerShard; #endif #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) - template friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*); + template friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); #ifdef EIGEN_HAS_CUDA_FP16 template friend void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*); template friend void internal::FullReductionKernelHalfFloat(R, const S, I, half*, half2*); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 45087a9a4..d9bbcd858 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -112,17 +112,42 @@ __global__ void ReductionInitKernel(const CoeffType val, Index num_preserved_coe } } + template __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs, - typename Self::CoeffReturnType* output) { + typename Self::CoeffReturnType* output, unsigned int* semaphore) { + // Initialize the output value const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x; - - // Initialize the output value if it wasn't initialized by the ReductionInitKernel - if (gridDim.x == 1 && first_index == 0) { - *output = reducer.initialize(); - __syncthreads(); + if (gridDim.x == 1) { + if (first_index == 0) { + *output = reducer.initialize(); + } } + else { + if (threadIdx.x == 0) { + unsigned int block = atomicCAS(semaphore, 0u, 1u); + if (block == 0) { + // We're the first block to run, initialize the output value + atomicExch(output, reducer.initialize()); + __threadfence(); + atomicExch(semaphore, 2u); + } + else { + // Wait for the first block to initialize the output value. + // Use atomicCAS here to ensure that the reads aren't cached + unsigned int val; + do { + val = atomicCAS(semaphore, 2u, 2u); + } + while (val < 2u); + } + } + } + + __syncthreads(); + + eigen_assert(gridDim.x == 1 || *semaphore >= 2u); typename Self::CoeffReturnType accum = reducer.initialize(); Index max_iter = numext::mini(num_coeffs - first_index, NumPerThread*BlockSize); @@ -141,6 +166,11 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num if ((threadIdx.x & (warpSize - 1)) == 0) { atomicReduce(output, accum, reducer); } + + if (gridDim.x > 1 && threadIdx.x == 0) { + // Let the last block reset the semaphore + atomicInc(semaphore, gridDim.x + 1); + } } @@ -246,15 +276,13 @@ struct FullReductionLauncher { const int num_per_thread = 128; const int num_blocks = divup(num_coeffs, block_size * num_per_thread); + unsigned int* semaphore = NULL; if (num_blocks > 1) { - // We initialize the outputs outside the reduction kernel when we can't be sure that there - // won't be a race conditions between multiple thread blocks. - LAUNCH_CUDA_KERNEL((ReductionInitKernel), - 1, 32, 0, device, reducer.initialize(), 1, output); + semaphore = device.semaphore(); } LAUNCH_CUDA_KERNEL((FullReductionKernel), - num_blocks, block_size, 0, device, reducer, self, num_coeffs, output); + num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, semaphore); } }; @@ -300,10 +328,10 @@ struct FullReducer { // Unfortunately nvidia doesn't support well exotic types such as complex, // so reduce the scope of the optimized version of the code to the simple case // of floats and half floats. - #ifdef EIGEN_HAS_CUDA_FP16 +#ifdef EIGEN_HAS_CUDA_FP16 static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same::value || - (internal::is_same::value && Op::PacketAccess)); + (internal::is_same::value && reducer_traits::PacketAccess)); #else static const bool HasOptimizedImplementation = !Op::IsStateful && internal::is_same::value; @@ -318,7 +346,7 @@ struct FullReducer { return; } - FullReductionLauncher::run(self, reducer, device, output, num_coeffs); + FullReductionLauncher::PacketAccess>::run(self, reducer, device, output, num_coeffs); } }; @@ -580,7 +608,7 @@ struct InnerReducer { #ifdef EIGEN_HAS_CUDA_FP16 static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same::value || - (internal::is_same::value && Op::PacketAccess)); + (internal::is_same::value && reducer_traits::PacketAccess)); #else static const bool HasOptimizedImplementation = !Op::IsStateful && internal::is_same::value; @@ -599,7 +627,7 @@ struct InnerReducer { return true; } - return InnerReductionLauncher::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals); + return InnerReductionLauncher::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals); } }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index 031dbf6f2..1aa196b84 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -57,8 +57,8 @@ public: typedef typename Eigen::internal::traits::Index Index; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorScanOp( - const XprType& expr, const Index& axis, const Op& op = Op()) - : m_expr(expr), m_axis(axis), m_accumulator(op) {} + const XprType& expr, const Index& axis, bool exclusive = false, const Op& op = Op()) + : m_expr(expr), m_axis(axis), m_accumulator(op), m_exclusive(exclusive) {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index axis() const { return m_axis; } @@ -66,11 +66,14 @@ public: const XprType& expression() const { return m_expr; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Op accumulator() const { return m_accumulator; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + bool exclusive() const { return m_exclusive; } protected: typename XprType::Nested m_expr; const Index m_axis; const Op m_accumulator; + const bool m_exclusive; }; // Eval as rvalue @@ -81,7 +84,7 @@ struct TensorEvaluator, Device> { typedef typename XprType::Index Index; static const int NumDims = internal::array_size::Dimensions>::value; typedef DSizes Dimensions; - typedef typename XprType::Scalar Scalar; + typedef typename internal::remove_const::type Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; @@ -99,6 +102,7 @@ struct TensorEvaluator, Device> { : m_impl(op.expression(), device), m_device(device), m_axis(op.axis()), + m_exclusive(op.exclusive()), m_accumulator(op.accumulator()), m_dimensions(m_impl.dimensions()), m_size(m_dimensions[m_axis]), @@ -106,7 +110,7 @@ struct TensorEvaluator, Device> { m_output(NULL) { // Accumulating a scalar isn't supported. - EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); eigen_assert(m_axis >= 0 && m_axis < NumDims); // Compute stride of scan axis @@ -122,7 +126,7 @@ struct TensorEvaluator, Device> { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { - return m_dimensions; + return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { @@ -136,7 +140,7 @@ struct TensorEvaluator, Device> { return true; } } - + template EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { return internal::ploadt(m_output + index); @@ -152,6 +156,10 @@ struct TensorEvaluator, Device> { return m_output[index]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { if (m_output != NULL) { m_device.deallocate(m_output); @@ -164,6 +172,7 @@ protected: TensorEvaluator m_impl; const Device& m_device; const Index m_axis; + const bool m_exclusive; Op m_accumulator; const Dimensions& m_dimensions; const Index& m_size; @@ -172,7 +181,7 @@ protected: // TODO(ibab) Parallelize this single-threaded implementation if desired EIGEN_DEVICE_FUNC void accumulateTo(Scalar* data) { - // We fix the index along the scan axis to 0 and perform an + // We fix the index along the scan axis to 0 and perform a // scan per remaining entry. The iteration is split into two nested // loops to avoid an integer division by keeping track of each idx1 and idx2. for (Index idx1 = 0; idx1 < dimensions().TotalSize() / m_size; idx1 += m_stride) { @@ -180,12 +189,17 @@ protected: // Calculate the starting offset for the scan Index offset = idx1 * m_size + idx2; - // Compute the prefix sum along the axis, starting at the calculated offset + // Compute the scan along the axis, starting at the calculated offset CoeffReturnType accum = m_accumulator.initialize(); for (Index idx3 = 0; idx3 < m_size; idx3++) { Index curr = offset + idx3 * m_stride; - m_accumulator.reduce(m_impl.coeff(curr), &accum); - data[curr] = m_accumulator.finalize(accum); + if (m_exclusive) { + data[curr] = m_accumulator.finalize(accum); + m_accumulator.reduce(m_impl.coeff(curr), &accum); + } else { + m_accumulator.reduce(m_impl.coeff(curr), &accum); + data[curr] = m_accumulator.finalize(accum); + } } } } diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h index 8bc986c84..1369ca183 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h @@ -113,10 +113,10 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { typedef typename Environment::EnvThread Thread; struct PerThread { - bool inited; - NonBlockingThreadPoolTempl* pool; // Parent pool, or null for normal threads. - int thread_id; // Worker thread index in pool. - unsigned rand; // Random generator state. + constexpr PerThread() : pool(NULL), index(-1), rand(0) { } + NonBlockingThreadPoolTempl* pool; // Parent pool, or null for normal threads. + int thread_id; // Worker thread index in pool. + uint64_t rand; // Random generator state. }; Environment env_; @@ -133,6 +133,7 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { void WorkerLoop(int thread_id) { PerThread* pt = GetPerThread(); pt->pool = this; + pt->rand = std::hash()(std::this_thread::get_id()); pt->thread_id = thread_id; Queue* q = queues_[thread_id]; EventCount::Waiter* waiter = &waiters_[thread_id]; @@ -249,17 +250,18 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { return -1; } - PerThread* GetPerThread() { + static EIGEN_STRONG_INLINE PerThread* GetPerThread() { EIGEN_THREAD_LOCAL PerThread per_thread_; PerThread* pt = &per_thread_; - if (pt->inited) return pt; - pt->inited = true; - pt->rand = static_cast(std::hash()(std::this_thread::get_id())); return pt; } - static unsigned Rand(unsigned* state) { - return *state = *state * 1103515245 + 12345; + static EIGEN_STRONG_INLINE unsigned Rand(uint64_t* state) { + uint64_t current = *state; + // Update the internal state + *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; + // Generate the random output (using the PCG-XSH-RS scheme) + return static_cast((current ^ (current >> 22)) >> (22 + (current >> 61))); } }; diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index 089042751..feaeeaf5a 100755 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -30,6 +30,13 @@ template struct auto_diff_special_op; } // end namespace internal +template class AutoDiffScalar; + +template +inline AutoDiffScalar MakeAutoDiffScalar(const typename NewDerType::Scalar& value, const NewDerType &der) { + return AutoDiffScalar(value,der); +} + /** \class AutoDiffScalar * \brief A scalar type replacement with automatic differentation capability * @@ -257,20 +264,16 @@ class AutoDiffScalar -m_derivatives); } - inline const AutoDiffScalar, const DerType> > + inline const AutoDiffScalar operator*(const Scalar& other) const { - return AutoDiffScalar, const DerType> >( - m_value * other, - (m_derivatives * other)); + return MakeAutoDiffScalar(m_value * other, m_derivatives * other); } - friend inline const AutoDiffScalar, const DerType> > + friend inline const AutoDiffScalar operator*(const Scalar& other, const AutoDiffScalar& a) { - return AutoDiffScalar, const DerType> >( - a.value() * other, - a.derivatives() * other); + return MakeAutoDiffScalar(a.value() * other, a.derivatives() * other); } // inline const AutoDiffScalar, DerType>::Type > @@ -289,20 +292,16 @@ class AutoDiffScalar // a.derivatives() * other); // } - inline const AutoDiffScalar, const DerType> > + inline const AutoDiffScalar operator/(const Scalar& other) const { - return AutoDiffScalar, const DerType> >( - m_value / other, - (m_derivatives * (Scalar(1)/other))); + return MakeAutoDiffScalar(m_value / other, (m_derivatives * (Scalar(1)/other))); } - friend inline const AutoDiffScalar, const DerType> > + friend inline const AutoDiffScalar operator/(const Scalar& other, const AutoDiffScalar& a) { - return AutoDiffScalar, const DerType> >( - other / a.value(), - a.derivatives() * (Scalar(-other) / (a.value()*a.value()))); + return MakeAutoDiffScalar(other / a.value(), a.derivatives() * (Scalar(-other) / (a.value()*a.value()))); } // inline const AutoDiffScalar, DerType>::Type > @@ -322,34 +321,29 @@ class AutoDiffScalar // } template - inline const AutoDiffScalar, - const CwiseBinaryOp, - const CwiseUnaryOp, const DerType>, - const CwiseUnaryOp, const typename internal::remove_all::type > > > > + inline const AutoDiffScalar EIGEN_COMMA + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) EIGEN_COMMA + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all::type,Scalar,product) >,Scalar,product) > operator/(const AutoDiffScalar& other) const { internal::make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar, - const CwiseBinaryOp, - const CwiseUnaryOp, const DerType>, - const CwiseUnaryOp, const typename internal::remove_all::type > > > >( + return MakeAutoDiffScalar( m_value / other.value(), - ((m_derivatives * other.value()) - (m_value * other.derivatives())) + ((m_derivatives * other.value()) - (other.derivatives() * m_value)) * (Scalar(1)/(other.value()*other.value()))); } template inline const AutoDiffScalar, - const CwiseUnaryOp, const DerType>, - const CwiseUnaryOp, const typename internal::remove_all::type> > > + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product), + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all::type,Scalar,product) > > operator*(const AutoDiffScalar& other) const { internal::make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar, - const CwiseUnaryOp, const DerType>, - const CwiseUnaryOp, const typename internal::remove_all::type > > >( + return MakeAutoDiffScalar( m_value * other.value(), - (m_derivatives * other.value()) + (m_value * other.derivatives())); + (m_derivatives * other.value()) + (other.derivatives() * m_value)); } inline AutoDiffScalar& operator*=(const Scalar& other) @@ -426,18 +420,18 @@ struct auto_diff_special_op<_DerType, true> } - inline const AutoDiffScalar, DerType>::Type > + inline const AutoDiffScalar >, DerType>::Type > operator*(const Real& other) const { - return AutoDiffScalar, DerType>::Type >( + return AutoDiffScalar >, DerType>::Type >( derived().value() * other, derived().derivatives() * other); } - friend inline const AutoDiffScalar, DerType>::Type > + friend inline const AutoDiffScalar >, DerType>::Type > operator*(const Real& other, const AutoDiffScalar<_DerType>& a) { - return AutoDiffScalar, DerType>::Type >( + return AutoDiffScalar >, DerType>::Type >( a.value() * other, a.derivatives() * other); } @@ -501,43 +495,43 @@ struct make_coherent_impl -struct scalar_product_traits,A_Scalar> -{ - enum { Defined = 1 }; - typedef Matrix ReturnType; -}; - -template -struct scalar_product_traits > -{ - enum { Defined = 1 }; - typedef Matrix ReturnType; -}; - -template -struct scalar_product_traits,typename DerType::Scalar> -{ - enum { Defined = 1 }; - typedef AutoDiffScalar ReturnType; -}; - -template -struct scalar_product_traits > -{ - enum { Defined = 1 }; - typedef AutoDiffScalar ReturnType; -}; - } // end namespace internal +template +struct ScalarBinaryOpTraits,A_Scalar> +{ + enum { Defined = 1 }; + typedef Matrix ReturnType; +}; + +template +struct ScalarBinaryOpTraits > +{ + enum { Defined = 1 }; + typedef Matrix ReturnType; +}; + +template +struct ScalarBinaryOpTraits,typename DerType::Scalar> +{ + enum { Defined = 1 }; + typedef AutoDiffScalar ReturnType; +}; + +template +struct ScalarBinaryOpTraits > +{ + enum { Defined = 1 }; + typedef AutoDiffScalar ReturnType; +}; + #define EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(FUNC,CODE) \ template \ - inline const Eigen::AutoDiffScalar::type>::Scalar>, const typename Eigen::internal::remove_all::type> > \ + inline const Eigen::AutoDiffScalar< \ + EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename Eigen::internal::remove_all::type, typename Eigen::internal::traits::type>::Scalar, product) > \ FUNC(const Eigen::AutoDiffScalar& x) { \ using namespace Eigen; \ typedef typename Eigen::internal::traits::type>::Scalar Scalar; \ - typedef AutoDiffScalar, const typename Eigen::internal::remove_all::type> > ReturnType; \ CODE; \ } @@ -570,46 +564,45 @@ inline AutoDiffScalar::type::Plain EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(abs, using std::abs; - return ReturnType(abs(x.value()), x.derivatives() * (x.value()<0 ? -1 : 1) );) + return Eigen::MakeAutoDiffScalar(abs(x.value()), x.derivatives() * (x.value()<0 ? -1 : 1) );) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(abs2, using numext::abs2; - return ReturnType(abs2(x.value()), x.derivatives() * (Scalar(2)*x.value()));) + return Eigen::MakeAutoDiffScalar(abs2(x.value()), x.derivatives() * (Scalar(2)*x.value()));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(sqrt, using std::sqrt; Scalar sqrtx = sqrt(x.value()); - return ReturnType(sqrtx,x.derivatives() * (Scalar(0.5) / sqrtx));) + return Eigen::MakeAutoDiffScalar(sqrtx,x.derivatives() * (Scalar(0.5) / sqrtx));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(cos, using std::cos; using std::sin; - return ReturnType(cos(x.value()), x.derivatives() * (-sin(x.value())));) + return Eigen::MakeAutoDiffScalar(cos(x.value()), x.derivatives() * (-sin(x.value())));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(sin, using std::sin; using std::cos; - return ReturnType(sin(x.value()),x.derivatives() * cos(x.value()));) + return Eigen::MakeAutoDiffScalar(sin(x.value()),x.derivatives() * cos(x.value()));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(exp, using std::exp; Scalar expx = exp(x.value()); - return ReturnType(expx,x.derivatives() * expx);) + return Eigen::MakeAutoDiffScalar(expx,x.derivatives() * expx);) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(log, using std::log; - return ReturnType(log(x.value()),x.derivatives() * (Scalar(1)/x.value()));) + return Eigen::MakeAutoDiffScalar(log(x.value()),x.derivatives() * (Scalar(1)/x.value()));) template -inline const Eigen::AutoDiffScalar::type>::Scalar>, const typename internal::remove_all::type> > -pow(const Eigen::AutoDiffScalar& x, const typename internal::traits::type>::Scalar &y) +inline const Eigen::AutoDiffScalar< +EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all::type,typename internal::traits::type>::Scalar,product) > +pow(const Eigen::AutoDiffScalar &x, const typename internal::traits::type>::Scalar &y) { using namespace Eigen; typedef typename internal::remove_all::type DerTypeCleaned; typedef typename Eigen::internal::traits::Scalar Scalar; - return AutoDiffScalar, const DerTypeCleaned> >( - std::pow(x.value(),y), - x.derivatives() * (y * std::pow(x.value(),y-1))); + return Eigen::MakeAutoDiffScalar(std::pow(x.value(),y), x.derivatives() * (y * std::pow(x.value(),y-1))); } @@ -634,17 +627,17 @@ atan2(const AutoDiffScalar& a, const AutoDiffScalar& b) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(tan, using std::tan; using std::cos; - return ReturnType(tan(x.value()),x.derivatives() * (Scalar(1)/numext::abs2(cos(x.value()))));) + return Eigen::MakeAutoDiffScalar(tan(x.value()),x.derivatives() * (Scalar(1)/numext::abs2(cos(x.value()))));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(asin, using std::sqrt; using std::asin; - return ReturnType(asin(x.value()),x.derivatives() * (Scalar(1)/sqrt(1-numext::abs2(x.value()))));) + return Eigen::MakeAutoDiffScalar(asin(x.value()),x.derivatives() * (Scalar(1)/sqrt(1-numext::abs2(x.value()))));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(acos, using std::sqrt; using std::acos; - return ReturnType(acos(x.value()),x.derivatives() * (Scalar(-1)/sqrt(1-numext::abs2(x.value()))));) + return Eigen::MakeAutoDiffScalar(acos(x.value()),x.derivatives() * (Scalar(-1)/sqrt(1-numext::abs2(x.value()))));) #undef EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY diff --git a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h index bf9727c21..582fa8512 100644 --- a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +++ b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h @@ -203,7 +203,7 @@ struct traits > { typedef typename remove_all<_Lhs>::type Lhs; typedef typename remove_all<_Rhs>::type Rhs; - typedef typename scalar_product_traits::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; typedef typename promote_index_type::type StorageIndex; enum { @@ -222,7 +222,7 @@ struct traits > typedef MatrixXpr XprKind; typedef typename remove_all<_Lhs>::type Lhs; typedef typename remove_all<_Rhs>::type Rhs; - typedef typename scalar_product_traits::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; typedef typename cwise_promote_storage_type::StorageKind, typename traits::StorageKind, scalar_product_op >::ret StorageKind; typedef typename promote_index_type::type StorageIndex; diff --git a/unsupported/doc/examples/BVH_Example.cpp b/unsupported/doc/examples/BVH_Example.cpp index 6b6fac075..afb0c94c2 100644 --- a/unsupported/doc/examples/BVH_Example.cpp +++ b/unsupported/doc/examples/BVH_Example.cpp @@ -6,9 +6,7 @@ using namespace Eigen; typedef AlignedBox Box2d; namespace Eigen { - namespace internal { - Box2d bounding_box(const Vector2d &v) { return Box2d(v, v); } //compute the bounding box of a single point - } + Box2d bounding_box(const Vector2d &v) { return Box2d(v, v); } //compute the bounding box of a single point } struct PointPointMinimizer //how to compute squared distances between points and rectangles diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 2d65eb0cd..ff0ca75c3 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -112,6 +112,10 @@ ei_add_test(kronecker_product) # TODO: The following test names are prefixed with the cxx11 string, since historically # the tests depended on c++11. This isn't the case anymore so we ought to rename them. +# FIXME: Old versions of MSVC fail to compile this code, so we just disable these tests +# when using visual studio. We should make the check more strict to enable the tests for +# newer versions of MSVC. +if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") ei_add_test(cxx11_float16) ei_add_test(cxx11_tensor_dimension) ei_add_test(cxx11_tensor_map) @@ -130,7 +134,8 @@ ei_add_test(cxx11_tensor_io) if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") # This test requires __uint128_t which is only available on 64bit systems ei_add_test(cxx11_tensor_uint128) -endif() +endif() +endif() if(EIGEN_TEST_CXX11) # It should be safe to always run these tests as there is some fallback code for diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 4026f48f0..284b46803 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -1019,6 +1019,153 @@ void test_cuda_erfc(const Scalar stddev) cudaFree(d_out); } +template +void test_cuda_betainc() +{ + Tensor in_x(125); + Tensor in_a(125); + Tensor in_b(125); + Tensor out(125); + Tensor expected_out(125); + out.setZero(); + + Scalar nan = std::numeric_limits::quiet_NaN(); + + Array x(125); + Array a(125); + Array b(125); + Array v(125); + + a << 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999; + + b << 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999, + 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999, + 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999, + 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999, + 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999, + 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999, + 999.999, 999.999, 999.999; + + x << -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, + 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, + 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, + 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, + -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, + 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, + 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1; + + v << nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, + nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, + nan, nan, 0.47972119876364683, 0.5, 0.5202788012363533, nan, nan, + 0.9518683957740043, 0.9789663010413743, 0.9931729188073435, nan, nan, + 0.999995949033062, 0.9999999999993698, 0.9999999999999999, nan, nan, + 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, nan, nan, + nan, nan, nan, nan, 0.006827081192655869, 0.0210336989586256, + 0.04813160422599567, nan, nan, 0.20014344256217678, 0.5000000000000001, + 0.7998565574378232, nan, nan, 0.9991401428435834, 0.999999999698403, + 0.9999999999999999, nan, nan, 0.9999999999999999, 0.9999999999999999, + 0.9999999999999999, nan, nan, nan, nan, nan, nan, nan, + 1.0646600232370887e-25, 6.301722877826246e-13, 4.050966937974938e-06, nan, + nan, 7.864342668429763e-23, 3.015969667594166e-10, 0.0008598571564165444, + nan, nan, 6.031987710123844e-08, 0.5000000000000007, 0.9999999396801229, + nan, nan, 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, + nan, nan, nan, nan, nan, nan, 0.0, 7.029920380986636e-306, + 2.2450728208591345e-101, nan, nan, 0.0, 9.275871147869727e-302, + 1.2232913026152827e-97, nan, nan, 0.0, 3.0891393081932924e-252, + 2.9303043666183996e-60, nan, nan, 2.248913486879199e-196, + 0.5000000000004947, 0.9999999999999999, nan; + + for (int i = 0; i < 125; ++i) { + in_x(i) = x(i); + in_a(i) = a(i); + in_b(i) = b(i); + expected_out(i) = v(i); + } + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_in_x; + Scalar* d_in_a; + Scalar* d_in_b; + Scalar* d_out; + cudaMalloc((void**)(&d_in_x), bytes); + cudaMalloc((void**)(&d_in_a), bytes); + cudaMalloc((void**)(&d_in_b), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in_a, in_a.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in_b, in_b.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in_x(d_in_x, 125); + Eigen::TensorMap > gpu_in_a(d_in_a, 125); + Eigen::TensorMap > gpu_in_b(d_in_b, 125); + Eigen::TensorMap > gpu_out(d_out, 125); + + gpu_out.device(gpu_device) = betainc(gpu_in_a, gpu_in_b, gpu_in_x); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 1; i < 125; ++i) { + if ((std::isnan)(expected_out(i))) { + VERIFY((std::isnan)(out(i))); + } else { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + } + + cudaFree(d_in_x); + cudaFree(d_in_a); + cudaFree(d_in_b); + cudaFree(d_out); +} + + void test_cxx11_tensor_cuda() { CALL_SUBTEST_1(test_cuda_elementwise_small()); @@ -1086,5 +1233,8 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST_5(test_cuda_igamma()); CALL_SUBTEST_5(test_cuda_igammac()); + + CALL_SUBTEST_6(test_cuda_betainc()); + CALL_SUBTEST_6(test_cuda_betainc()); #endif } diff --git a/unsupported/test/cxx11_tensor_io.cpp b/unsupported/test/cxx11_tensor_io.cpp index 8bbcf7089..489960529 100644 --- a/unsupported/test/cxx11_tensor_io.cpp +++ b/unsupported/test/cxx11_tensor_io.cpp @@ -13,6 +13,20 @@ #include +template +static void test_output_0d() +{ + Tensor tensor; + tensor() = 123; + + std::stringstream os; + os << tensor; + + std::string expected("123"); + VERIFY_IS_EQUAL(std::string(os.str()), expected); +} + + template static void test_output_1d() { @@ -26,6 +40,12 @@ static void test_output_1d() std::string expected("0\n1\n2\n3\n4"); VERIFY_IS_EQUAL(std::string(os.str()), expected); + + Eigen::Tensor empty_tensor(0); + std::stringstream empty_os; + empty_os << empty_tensor; + std::string empty_string; + VERIFY_IS_EQUAL(std::string(empty_os.str()), empty_string); } @@ -101,6 +121,8 @@ static void test_output_const() void test_cxx11_tensor_io() { + CALL_SUBTEST(test_output_0d()); + CALL_SUBTEST(test_output_0d()); CALL_SUBTEST(test_output_1d()); CALL_SUBTEST(test_output_1d()); CALL_SUBTEST(test_output_2d()); diff --git a/unsupported/test/cxx11_tensor_scan.cpp b/unsupported/test/cxx11_tensor_scan.cpp index dbd3023d7..bafa6c96e 100644 --- a/unsupported/test/cxx11_tensor_scan.cpp +++ b/unsupported/test/cxx11_tensor_scan.cpp @@ -38,6 +38,30 @@ static void test_1d_scan() } } +template +static void test_1d_inclusive_scan() +{ + int size = 50; + Tensor tensor(size); + tensor.setRandom(); + Tensor result = tensor.cumsum(0, true); + + VERIFY_IS_EQUAL(tensor.dimension(0), result.dimension(0)); + + float accum = 0; + for (int i = 0; i < size; i++) { + VERIFY_IS_EQUAL(result(i), accum); + accum += tensor(i); + } + + accum = 1; + result = tensor.cumprod(0, true); + for (int i = 0; i < size; i++) { + VERIFY_IS_EQUAL(result(i), accum); + accum *= tensor(i); + } +} + template static void test_4d_scan() {