diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index 795d19dce..fcee7b2e3 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -642,7 +642,6 @@ MatrixType LDLT::reconstructedMatrix() const return res; } -#ifndef __CUDACC__ /** \cholesky_module * \returns the Cholesky decomposition with full pivoting without square root of \c *this * \sa MatrixBase::ldlt() @@ -664,7 +663,6 @@ MatrixBase::ldlt() const { return LDLT(derived()); } -#endif // __CUDACC__ } // end namespace Eigen diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h index bd966656d..ddf4875ab 100644 --- a/Eigen/src/Cholesky/LLT.h +++ b/Eigen/src/Cholesky/LLT.h @@ -507,7 +507,6 @@ MatrixType LLT::reconstructedMatrix() const return matrixL() * matrixL().adjoint().toDenseMatrix(); } -#ifndef __CUDACC__ /** \cholesky_module * \returns the LLT decomposition of \c *this * \sa SelfAdjointView::llt() @@ -529,7 +528,6 @@ SelfAdjointView::llt() const { return LLT(m_matrix); } -#endif // __CUDACC__ } // end namespace Eigen diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 5766b6f9d..7a5540593 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -343,29 +343,29 @@ template::value> struct nullary_wrapper { - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, Index i, Index j) const { return op(i,j); } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, Index i) const { return op(i); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { return op(i,j); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); } - template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, Index i, Index j) const { return op.template packetOp(i,j); } - template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, Index i) const { return op.template packetOp(i); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { return op.template packetOp(i,j); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp(i); } }; template struct nullary_wrapper { - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, Index=0, Index=0) const { return op(); } - template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, Index=0, Index=0) const { return op.template packetOp(); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType=0, IndexType=0) const { return op(); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType=0, IndexType=0) const { return op.template packetOp(); } }; template struct nullary_wrapper { - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, Index i, Index j=0) const { return op(i,j); } - template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, Index i, Index j=0) const { return op.template packetOp(i,j); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j=0) const { return op(i,j); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j=0) const { return op.template packetOp(i,j); } }; // We need the following specialization for vector-only functors assigned to a runtime vector, @@ -373,25 +373,84 @@ struct nullary_wrapper // In this case, i==0 and j is used for the actual iteration. template struct nullary_wrapper - : nullary_wrapper // to get the identity wrapper { - typedef nullary_wrapper base; - using base::operator(); - using base::packetOp; - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, Index i, Index j) const { + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { eigen_assert(i==0 || j==0); return op(i+j); } - template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, Index i, Index j) const { + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { eigen_assert(i==0 || j==0); return op.template packetOp(i+j); } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp(i); } }; template struct nullary_wrapper {}; +#if 0 && EIGEN_COMP_MSVC>0 +// Disable this ugly workaround. This is now handled in traits::match, +// but this piece of code might still become handly if some other weird compilation +// erros pop up again. + +// MSVC exhibits a weird compilation error when +// compiling: +// Eigen::MatrixXf A = MatrixXf::Random(3,3); +// Ref R = 2.f*A; +// and that has_*ary_operator> have not been instantiated yet. +// The "problem" is that evaluator<2.f*A> is instantiated by traits::match<2.f*A> +// and at that time has_*ary_operator returns true regardless of T. +// Then nullary_wrapper is badly instantiated as nullary_wrapper<.,.,true,true,true>. +// The trick is thus to defer the proper instantiation of nullary_wrapper when coeff(), +// and packet() are really instantiated as implemented below: + +// This is a simple wrapper around Index to enforce the re-instantiation of +// has_*ary_operator when needed. +template struct nullary_wrapper_workaround_msvc { + nullary_wrapper_workaround_msvc(const T&); + operator T()const; +}; + +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().operator()(op,i,j); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().operator()(op,i); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().template packetOp(op,i,j); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().template packetOp(op,i); + } +}; +#endif // MSVC workaround + template struct evaluator > : evaluator_base > @@ -418,30 +477,30 @@ struct evaluator > typedef typename XprType::CoeffReturnType CoeffReturnType; - template + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - CoeffReturnType coeff(Index row, Index col) const + CoeffReturnType coeff(IndexType row, IndexType col) const { return m_wrapper(m_functor, row, col); } - template + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - CoeffReturnType coeff(Index index) const + CoeffReturnType coeff(IndexType index) const { return m_wrapper(m_functor,index); } - template + template EIGEN_STRONG_INLINE - PacketType packet(Index row, Index col) const + PacketType packet(IndexType row, IndexType col) const { return m_wrapper.template packetOp(m_functor, row, col); } - template + template EIGEN_STRONG_INLINE - PacketType packet(Index index) const + PacketType packet(IndexType index) const { return m_wrapper.template packetOp(m_functor, index); } diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index ad6b1923d..e3f20894d 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -38,6 +38,14 @@ struct traits > : traits + \c operator()() if the procedural generation does not depend on the coefficient entries (e.g., random numbers) + \c operator()(Index i)if the procedural generation makes sense for vectors only and that it depends on the coefficient index \c i (e.g., linspace) + \c operator()(Index i,Index j)if the procedural generation depends on the matrix coordinates \c i, \c j (e.g., to generate a checkerboard with 0 and 1) + + * It is also possible to expose the last two operators if the generation makes sense for matrices but can be optimized for vectors. + * * See DenseBase::NullaryExpr(Index,const CustomNullaryOp&) for an example binding * C++11 random number generators. * diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index 42cffbd3b..dd61195bc 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -97,23 +97,6 @@ template struct GenericNumTraits MulCost = 1 }; - // Division is messy but important, because it is expensive and throughput - // varies significantly. The following numbers are based on min division - // throughput on Haswell. - template - struct Div { - enum { -#ifdef EIGEN_VECTORIZE_AVX - AVX = true, -#else - AVX = false, -#endif - Cost = IsInteger ? (sizeof(T) == 8 ? (IsSigned ? 24 : 21) : (IsSigned ? 8 : 9)): - Vectorized ? (sizeof(T) == 8 ? (AVX ? 16 : 8) : (AVX ? 14 : 7)) : 8 - }; - }; - - typedef T Real; typedef typename internal::conditional< IsInteger, @@ -255,6 +238,9 @@ private: static inline std::string quiet_NaN(); }; +// Empty specialization for void to allow template specialization based on NumTraits::Real with T==void and SFINAE. +template<> struct NumTraits {}; + } // end namespace Eigen #endif // EIGEN_NUMTRAITS_H diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index 17065fdd5..bdf24f52a 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -35,7 +35,13 @@ struct traits > || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1), OuterStrideMatch = Derived::IsVectorAtCompileTime || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime), - AlignmentMatch = (int(traits::Alignment)==int(Unaligned)) || (int(evaluator::Alignment) >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment + // NOTE, this indirection of evaluator::Alignment is needed + // to workaround a very strange bug in MSVC related to the instantiation + // of has_*ary_operator in evaluator. + // This line is surprisingly very sensitive. For instance, simply adding parenthesis + // as "DerivedAlignment = (int(evaluator::Alignment))," will make MSVC fail... + DerivedAlignment = int(evaluator::Alignment), + AlignmentMatch = (int(traits::Alignment)==int(Unaligned)) || (DerivedAlignment >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment ScalarTypeMatch = internal::is_same::value, MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch }; diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 7014a6889..dae0ca5d0 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -94,6 +94,9 @@ template<> struct packet_traits : default_packet_traits }; }; +template<> struct scalar_div_cost { enum { value = 14 }; }; +template<> struct scalar_div_cost { enum { value = 16 }; }; + /* Proper support for integers is only provided by AVX2. In the meantime, we'll use SSE instructions and packets to deal with integers. template<> struct packet_traits : default_packet_traits diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 0057e2062..baad692e3 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -162,6 +162,11 @@ template<> struct unpacket_traits { typedef float type; enum {size=4, template<> struct unpacket_traits { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; }; template<> struct unpacket_traits { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; +#ifndef EIGEN_VECTORIZE_AVX +template<> struct scalar_div_cost { enum { value = 7 }; }; +template<> struct scalar_div_cost { enum { value = 8 }; }; +#endif + #if EIGEN_COMP_MSVC==1500 // Workaround MSVC 9 internal compiler error. // TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index dc3690444..d82ffed02 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -287,7 +287,7 @@ struct functor_traits > { { Cost = 3 * NumTraits::AddCost + 2 * NumTraits::MulCost + - 2 * NumTraits::template Div::Cost, + 2 * scalar_div_cost::value, PacketAccess = false }; }; @@ -375,7 +375,7 @@ struct functor_traits > { typedef typename scalar_quotient_op::result_type result_type; enum { PacketAccess = is_same::value && packet_traits::HasDiv && packet_traits::HasDiv, - Cost = NumTraits::template Div::Cost + Cost = scalar_div_cost::value }; }; diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index 692242f7d..a2154d3b5 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -30,8 +30,8 @@ struct functor_traits > template struct scalar_identity_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op) - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType row, IndexType col) const { return row==col ? Scalar(1) : Scalar(0); } }; template struct functor_traits > @@ -55,15 +55,15 @@ struct linspaced_op_impl m_packetStep(pset1(unpacket_traits::size*m_step)), m_base(padd(pset1(low), pmul(pset1(m_step),plset(-unpacket_traits::size)))) {} - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { m_base = padd(m_base, pset1(m_step)); return m_low+Scalar(i)*m_step; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType) const { return m_base = padd(m_base,m_packetStep); } const Scalar m_low; const Scalar m_step; @@ -81,11 +81,11 @@ struct linspaced_op_impl m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)), m_lowPacket(pset1(m_low)), m_stepPacket(pset1(m_step)), m_interPacket(plset(0)) {} - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return m_low+i*m_step; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index i) const + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1(Scalar(i)),m_interPacket))); } const Scalar m_low; @@ -102,15 +102,15 @@ struct linspaced_op_impl m_low(low), m_length(high-low), m_divisor(convert_index(num_steps==1?1:num_steps-1)), m_interPacket(plset(0)) {} - template + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Scalar operator() (Index i) const { + const Scalar operator() (IndexType i) const { return m_low + (m_length*Scalar(i))/m_divisor; } - template + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Packet packetOp(Index i) const { + const Packet packetOp(IndexType i) const { return internal::padd(pset1(m_low), pdiv(pmul(pset1(m_length), padd(pset1(Scalar(i)),m_interPacket)), pset1(m_divisor))); } @@ -142,11 +142,11 @@ template struct linspa : impl((num_steps==1 ? high : low),high,num_steps) {} - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return impl(i); } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const { return impl.packetOp(i); } // This proxy object handles the actual required temporaries, the different // implementations (random vs. sequential access) as well as the diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 59b9edf69..2009f8e57 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -248,7 +248,7 @@ struct functor_traits > { // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other : (14 * NumTraits::AddCost + 6 * NumTraits::MulCost + - NumTraits::template Div::HasDiv>::Cost)) + scalar_div_cost::HasDiv>::value)) #else Cost = (sizeof(Scalar) == 4 @@ -257,7 +257,7 @@ struct functor_traits > { // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other : (23 * NumTraits::AddCost + 12 * NumTraits::MulCost + - NumTraits::template Div::HasDiv>::Cost)) + scalar_div_cost::HasDiv>::value)) #endif }; }; @@ -514,17 +514,16 @@ struct functor_traits > { // 9 pmadd, 2 pmul, 1 div, 2 other ? (2 * NumTraits::AddCost + 6 * NumTraits::MulCost + - NumTraits::template Div::HasDiv>::Cost) + scalar_div_cost::HasDiv>::value) #else ? (11 * NumTraits::AddCost + 11 * NumTraits::MulCost + - NumTraits::template Div::HasDiv>::Cost) + scalar_div_cost::HasDiv>::value) #endif // This number assumes a naive implementation of tanh : (6 * NumTraits::AddCost + 3 * NumTraits::MulCost + - 2 * NumTraits::template Div< - packet_traits::HasDiv>::Cost + + 2 * scalar_div_cost::HasDiv>::value + functor_traits >::Cost)) }; }; diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index bbef83ea8..d4460bb77 100755 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -383,7 +383,7 @@ struct has_ReturnType template const T& return_ref(); -template +template struct has_nullary_operator { template static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref().operator()())>0)>::type * = 0); @@ -392,19 +392,19 @@ struct has_nullary_operator enum { value = sizeof(testFunctor(static_cast(0))) == sizeof(meta_yes) }; }; -template +template struct has_unary_operator { - template static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref().operator()(Index(0)))>0)>::type * = 0); + template static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref().operator()(IndexType(0)))>0)>::type * = 0); static meta_no testFunctor(...); enum { value = sizeof(testFunctor(static_cast(0))) == sizeof(meta_yes) }; }; -template +template struct has_binary_operator { - template static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref().operator()(Index(0),Index(0)))>0)>::type * = 0); + template static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref().operator()(IndexType(0),IndexType(0)))>0)>::type * = 0); static meta_no testFunctor(...); enum { value = sizeof(testFunctor(static_cast(0))) == sizeof(meta_yes) }; diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 7e43f4fea..fa60008ef 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -664,6 +664,20 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if +struct scalar_div_cost { + enum { value = 8*NumTraits::MulCost }; +}; + + +template +struct scalar_div_cost::type> { enum { value = 24 }; }; +template +struct scalar_div_cost::type> { enum { value = 21 }; }; + + #ifdef EIGEN_DEBUG_ASSIGN std::string demangle_traversal(int t) { @@ -707,7 +721,7 @@ std::string demangle_flags(int f) * This class permits to control the scalar return type of any binary operation performed on two different scalar types through (partial) template specializations. * * For instance, let \c U1, \c U2 and \c U3 be three user defined scalar types for which most operations between instances of \c U1 and \c U2 returns an \c U3. - * You can let Eigen knows that by defining: + * You can let %Eigen knows that by defining: \code template struct ScalarBinaryOpTraits { typedef U3 ReturnType; }; @@ -725,6 +739,14 @@ std::string demangle_flags(int f) struct ScalarBinaryOpTraits > { typedef U1 ReturnType; }; \endcode * + * By default, the following generic combinations are supported: + + + + + +
ScalarAScalarBBinaryOpReturnTypeNote
\c T \c T \c * \c T
\c NumTraits::Real \c T \c * \c T Only if \c NumTraits::IsComplex
\c T \c NumTraits::Real \c * \c T Only if \c NumTraits::IsComplex
+ * * \sa CwiseBinaryOp */ template > @@ -741,6 +763,17 @@ struct ScalarBinaryOpTraits typedef T ReturnType; }; +template +struct ScalarBinaryOpTraits::IsComplex,T>::type>::Real, BinaryOp> +{ + typedef T ReturnType; +}; +template +struct ScalarBinaryOpTraits::IsComplex,T>::type>::Real, T, BinaryOp> +{ + typedef T ReturnType; +}; + // For Matrix * Permutation template struct ScalarBinaryOpTraits @@ -762,18 +795,6 @@ struct ScalarBinaryOpTraits typedef void ReturnType; }; -template -struct ScalarBinaryOpTraits,BinaryOp> -{ - typedef std::complex ReturnType; -}; - -template -struct ScalarBinaryOpTraits, T,BinaryOp> -{ - typedef std::complex ReturnType; -}; - // We require Lhs and Rhs to have "compatible" scalar types. // It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths. // So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index 1632d3ac3..fbb5a61bf 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -879,14 +879,12 @@ struct Assignment >, internal::assign_ * * \sa class FullPivLU */ -#ifndef __CUDACC__ template inline const FullPivLU::PlainObject> MatrixBase::fullPivLu() const { return FullPivLU(eval()); } -#endif } // end namespace Eigen diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index 87ac6a281..6c35e88d3 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -584,14 +584,12 @@ struct Assignment >, internal::assi * * \sa class PartialPivLU */ -#ifndef __CUDACC__ template inline const PartialPivLU::PlainObject> MatrixBase::partialPivLu() const { return PartialPivLU(eval()); } -#endif /** \lu_module * @@ -601,14 +599,12 @@ MatrixBase::partialPivLu() const * * \sa class PartialPivLU */ -#ifndef __CUDACC__ template inline const PartialPivLU::PlainObject> MatrixBase::lu() const { return PartialPivLU(eval()); } -#endif } // end namespace Eigen diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h index 35e19b85b..9650781d6 100644 --- a/Eigen/src/QR/ColPivHouseholderQR.h +++ b/Eigen/src/QR/ColPivHouseholderQR.h @@ -637,7 +637,6 @@ typename ColPivHouseholderQR::HouseholderSequenceType ColPivHousehol return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate()); } -#ifndef __CUDACC__ /** \return the column-pivoting Householder QR decomposition of \c *this. * * \sa class ColPivHouseholderQR @@ -648,7 +647,6 @@ MatrixBase::colPivHouseholderQr() const { return ColPivHouseholderQR(eval()); } -#endif // __CUDACC__ } // end namespace Eigen diff --git a/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/Eigen/src/QR/CompleteOrthogonalDecomposition.h index f299d3c00..41e4ecdfd 100644 --- a/Eigen/src/QR/CompleteOrthogonalDecomposition.h +++ b/Eigen/src/QR/CompleteOrthogonalDecomposition.h @@ -547,7 +547,6 @@ CompleteOrthogonalDecomposition::householderQ() const { return m_cpqr.householderQ(); } -#ifndef __CUDACC__ /** \return the complete orthogonal decomposition of \c *this. * * \sa class CompleteOrthogonalDecomposition @@ -557,7 +556,6 @@ const CompleteOrthogonalDecomposition::PlainObject> MatrixBase::completeOrthogonalDecomposition() const { return CompleteOrthogonalDecomposition(eval()); } -#endif // __CUDACC__ } // end namespace Eigen diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h index a46d3f9f3..e0e15100d 100644 --- a/Eigen/src/QR/FullPivHouseholderQR.h +++ b/Eigen/src/QR/FullPivHouseholderQR.h @@ -660,7 +660,6 @@ inline typename FullPivHouseholderQR::MatrixQReturnType FullPivHouse return MatrixQReturnType(m_qr, m_hCoeffs, m_rows_transpositions); } -#ifndef __CUDACC__ /** \return the full-pivoting Householder QR decomposition of \c *this. * * \sa class FullPivHouseholderQR @@ -671,7 +670,6 @@ MatrixBase::fullPivHouseholderQr() const { return FullPivHouseholderQR(eval()); } -#endif // __CUDACC__ } // end namespace Eigen diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h index 5e69cfee9..3513d995c 100644 --- a/Eigen/src/QR/HouseholderQR.h +++ b/Eigen/src/QR/HouseholderQR.h @@ -393,7 +393,6 @@ void HouseholderQR::computeInPlace() m_isInitialized = true; } -#ifndef __CUDACC__ /** \return the Householder QR decomposition of \c *this. * * \sa class HouseholderQR @@ -404,7 +403,6 @@ MatrixBase::householderQr() const { return HouseholderQR(eval()); } -#endif // __CUDACC__ } // end namespace Eigen diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index 605c1a2a6..78dfd1d59 100644 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h @@ -783,7 +783,6 @@ JacobiSVD::compute(const MatrixType& matrix, unsig return *this; } -#ifndef __CUDACC__ /** \svd_module * * \return the singular value decomposition of \c *this computed by two-sided @@ -797,7 +796,6 @@ MatrixBase::jacobiSvd(unsigned int computationOptions) const { return JacobiSVD(*this, computationOptions); } -#endif // __CUDACC__ } // end namespace Eigen diff --git a/test/cuda_basic.cu b/test/cuda_basic.cu index 3cf37d221..cb2e4167a 100644 --- a/test/cuda_basic.cu +++ b/test/cuda_basic.cu @@ -20,11 +20,14 @@ #include #include +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 #include +#endif #include "main.h" #include "cuda_common.h" -#include +// Check that dense modules can be properly parsed by nvcc +#include // struct Foo{ // EIGEN_DEVICE_FUNC diff --git a/test/integer_types.cpp b/test/integer_types.cpp index 950f8e9be..a21f73a81 100644 --- a/test/integer_types.cpp +++ b/test/integer_types.cpp @@ -158,4 +158,12 @@ void test_integer_types() CALL_SUBTEST_8( integer_type_tests(Matrix(1, 5)) ); } +#ifdef EIGEN_TEST_PART_9 + VERIFY_IS_EQUAL(internal::scalar_div_cost::value, 8); + VERIFY_IS_EQUAL(internal::scalar_div_cost::value, 8); + if(sizeof(long)>sizeof(int)) { + VERIFY(internal::scalar_div_cost::value > internal::scalar_div_cost::value); + VERIFY(internal::scalar_div_cost::value > internal::scalar_div_cost::value); + } +#endif } diff --git a/test/nullary.cpp b/test/nullary.cpp index ad306c1e9..9063c6de8 100644 --- a/test/nullary.cpp +++ b/test/nullary.cpp @@ -170,5 +170,30 @@ void test_nullary() VERIFY(( internal::has_unary_operator >::value )); VERIFY(( !internal::has_binary_operator >::value )); VERIFY(( internal::functor_has_linear_access >::ret )); + + // Regression unit test for a weird MSVC bug. + // Search "nullary_wrapper_workaround_msvc" in CoreEvaluators.h for the details. + // See also traits::match. + { + MatrixXf A = MatrixXf::Random(3,3); + Ref R = 2.0*A; + VERIFY_IS_APPROX(R, A+A); + + Ref R1 = MatrixXf::Random(3,3)+A; + + VectorXi V = VectorXi::Random(3); + Ref R2 = VectorXi::LinSpaced(3,1,3)+V; + VERIFY_IS_APPROX(R2, V+Vector3i(1,2,3)); + + VERIFY(( internal::has_nullary_operator >::value )); + VERIFY(( !internal::has_unary_operator >::value )); + VERIFY(( !internal::has_binary_operator >::value )); + VERIFY(( internal::functor_has_linear_access >::ret )); + + VERIFY(( !internal::has_nullary_operator >::value )); + VERIFY(( internal::has_unary_operator >::value )); + VERIFY(( !internal::has_binary_operator >::value )); + VERIFY(( internal::functor_has_linear_access >::ret )); + } #endif } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 3f623afa4..fc75dbb5c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -25,7 +25,7 @@ struct scalar_mod_op { }; template struct functor_traits > -{ enum { Cost = NumTraits::template Div::Cost, PacketAccess = false }; }; +{ enum { Cost = scalar_div_cost::value, PacketAccess = false }; }; /** \internal @@ -38,7 +38,7 @@ struct scalar_mod2_op { }; template struct functor_traits > -{ enum { Cost = NumTraits::template Div::Cost, PacketAccess = false }; }; +{ enum { Cost = scalar_div_cost::value, PacketAccess = false }; }; template struct scalar_fmod_op { diff --git a/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h b/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h index 3b6a69aff..866a8a460 100644 --- a/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +++ b/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h @@ -628,15 +628,15 @@ ArpackGeneralizedSelfAdjointEigenSolver& m_info = Success; } - delete select; + delete[] select; } - delete v; - delete iparam; - delete ipntr; - delete workd; - delete workl; - delete resid; + delete[] v; + delete[] iparam; + delete[] ipntr; + delete[] workd; + delete[] workl; + delete[] resid; m_isInitialized = true; diff --git a/unsupported/test/cxx11_tensor_argmax_cuda.cu b/unsupported/test/cxx11_tensor_argmax_cuda.cu index c3f4bfa53..6fe8982f2 100644 --- a/unsupported/test/cxx11_tensor_argmax_cuda.cu +++ b/unsupported/test/cxx11_tensor_argmax_cuda.cu @@ -12,7 +12,9 @@ #define EIGEN_TEST_FUNC cxx11_tensor_cuda #define EIGEN_USE_GPU +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 #include +#endif #include "main.h" #include diff --git a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu index 559aae269..88c233994 100644 --- a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu @@ -13,7 +13,9 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 #include +#endif #include "main.h" #include diff --git a/unsupported/test/cxx11_tensor_contract_cuda.cu b/unsupported/test/cxx11_tensor_contract_cuda.cu index 5282dc3b9..767e9c678 100644 --- a/unsupported/test/cxx11_tensor_contract_cuda.cu +++ b/unsupported/test/cxx11_tensor_contract_cuda.cu @@ -14,7 +14,9 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 #include +#endif #include "main.h" #include diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index f1e91a879..bf216587a 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -12,7 +12,9 @@ #define EIGEN_TEST_FUNC cxx11_tensor_cuda #define EIGEN_USE_GPU +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 #include +#endif #include "main.h" #include diff --git a/unsupported/test/cxx11_tensor_device.cu b/unsupported/test/cxx11_tensor_device.cu index bbda95906..fde20ddf2 100644 --- a/unsupported/test/cxx11_tensor_device.cu +++ b/unsupported/test/cxx11_tensor_device.cu @@ -13,7 +13,9 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 #include +#endif #include "main.h" #include diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index a6375d34a..cbf401c86 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -13,7 +13,9 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 #include +#endif #include "main.h" #include diff --git a/unsupported/test/cxx11_tensor_random_cuda.cu b/unsupported/test/cxx11_tensor_random_cuda.cu index 32f71e808..b3be199e1 100644 --- a/unsupported/test/cxx11_tensor_random_cuda.cu +++ b/unsupported/test/cxx11_tensor_random_cuda.cu @@ -13,7 +13,9 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 #include +#endif #include "main.h" #include diff --git a/unsupported/test/cxx11_tensor_reduction_cuda.cu b/unsupported/test/cxx11_tensor_reduction_cuda.cu index 9b5e60e99..80e151d43 100644 --- a/unsupported/test/cxx11_tensor_reduction_cuda.cu +++ b/unsupported/test/cxx11_tensor_reduction_cuda.cu @@ -12,7 +12,9 @@ #define EIGEN_TEST_FUNC cxx11_tensor_reduction_cuda #define EIGEN_USE_GPU +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 #include +#endif #include "main.h" #include diff --git a/unsupported/test/cxx11_tensor_scan_cuda.cu b/unsupported/test/cxx11_tensor_scan_cuda.cu index c8a99f62b..761d11fd1 100644 --- a/unsupported/test/cxx11_tensor_scan_cuda.cu +++ b/unsupported/test/cxx11_tensor_scan_cuda.cu @@ -13,7 +13,9 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 #include +#endif #include "main.h" #include