From 0fc8954282140d00b47ee1d298c4d4bce35aa724 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 10:38:49 +0100 Subject: [PATCH 01/26] Improve readibility of EIGEN_DEBUG_ASSIGN mode. --- Eigen/src/Core/AssignEvaluator.h | 8 ++++---- Eigen/src/Core/util/XprHelper.h | 32 ++++++++++++++++++++++++++++++++ test/vectorization_logic.cpp | 32 +++----------------------------- 3 files changed, 39 insertions(+), 33 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 121a722f2..3667f60f2 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -125,8 +125,8 @@ public: std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; std::cerr.setf(std::ios::hex, std::ios::basefield); - EIGEN_DEBUG_VAR(DstFlags) - EIGEN_DEBUG_VAR(SrcFlags) + std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl; + std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl; std::cerr.unsetf(std::ios::hex); EIGEN_DEBUG_VAR(DstAlignment) EIGEN_DEBUG_VAR(SrcAlignment) @@ -141,11 +141,11 @@ public: EIGEN_DEBUG_VAR(MayInnerVectorize) EIGEN_DEBUG_VAR(MayLinearVectorize) EIGEN_DEBUG_VAR(MaySliceVectorize) - EIGEN_DEBUG_VAR(Traversal) + std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; EIGEN_DEBUG_VAR(UnrollingLimit) EIGEN_DEBUG_VAR(MayUnrollCompletely) EIGEN_DEBUG_VAR(MayUnrollInner) - EIGEN_DEBUG_VAR(Unrolling) + std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; std::cerr << std::endl; } #endif diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 624d8a83b..cd93b2320 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -670,6 +670,38 @@ template struct is_same_or_void { enum { value = 1 }; }; template struct is_same_or_void { enum { value = 1 }; }; template<> struct is_same_or_void { enum { value = 1 }; }; +#ifdef EIGEN_DEBUG_ASSIGN +std::string demangle_traversal(int t) +{ + if(t==DefaultTraversal) return "DefaultTraversal"; + if(t==LinearTraversal) return "LinearTraversal"; + if(t==InnerVectorizedTraversal) return "InnerVectorizedTraversal"; + if(t==LinearVectorizedTraversal) return "LinearVectorizedTraversal"; + if(t==SliceVectorizedTraversal) return "SliceVectorizedTraversal"; + return "?"; +} +std::string demangle_unrolling(int t) +{ + if(t==NoUnrolling) return "NoUnrolling"; + if(t==InnerUnrolling) return "InnerUnrolling"; + if(t==CompleteUnrolling) return "CompleteUnrolling"; + return "?"; +} +std::string demangle_flags(int f) +{ + std::string res; + if(f&RowMajorBit) res += " | RowMajor"; + if(f&PacketAccessBit) res += " | Packet"; + if(f&LinearAccessBit) res += " | Linear"; + if(f&LvalueBit) res += " | Lvalue"; + if(f&DirectAccessBit) res += " | Direct"; + if(f&NestByRefBit) res += " | NestByRef"; + if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit"; + + return res; +} +#endif + } // end namespace internal // we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp index 6ff38ed11..da60a2f3a 100644 --- a/test/vectorization_logic.cpp +++ b/test/vectorization_logic.cpp @@ -11,35 +11,9 @@ #include "main.h" #include -std::string demangle_traversal(int t) -{ - if(t==DefaultTraversal) return "DefaultTraversal"; - if(t==LinearTraversal) return "LinearTraversal"; - if(t==InnerVectorizedTraversal) return "InnerVectorizedTraversal"; - if(t==LinearVectorizedTraversal) return "LinearVectorizedTraversal"; - if(t==SliceVectorizedTraversal) return "SliceVectorizedTraversal"; - return "?"; -} -std::string demangle_unrolling(int t) -{ - if(t==NoUnrolling) return "NoUnrolling"; - if(t==InnerUnrolling) return "InnerUnrolling"; - if(t==CompleteUnrolling) return "CompleteUnrolling"; - return "?"; -} -std::string demangle_flags(int f) -{ - std::string res; - if(f&RowMajorBit) res += " | RowMajor"; - if(f&PacketAccessBit) res += " | Packet"; - if(f&LinearAccessBit) res += " | Linear"; - if(f&LvalueBit) res += " | Lvalue"; - if(f&DirectAccessBit) res += " | Direct"; - if(f&NestByRefBit) res += " | NestByRef"; - if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit"; - - return res; -} +using internal::demangle_flags; +using internal::demangle_traversal; +using internal::demangle_unrolling; template bool test_assign(const Dst&, const Src&, int traversal, int unrolling) From 73f692d16b544d619c2234259f4086cc93c577df Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 11:01:37 +0100 Subject: [PATCH 02/26] Fix ambiguous instantiation --- Eigen/src/SparseCore/SparseProduct.h | 75 +++++++++++++++++++++------- test/sparse_basic.cpp | 2 +- test/sparse_product.cpp | 4 ++ 3 files changed, 62 insertions(+), 19 deletions(-) diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h index 26680b7a7..ea2c3a8a3 100644 --- a/Eigen/src/SparseCore/SparseProduct.h +++ b/Eigen/src/SparseCore/SparseProduct.h @@ -39,6 +39,34 @@ struct generic_product_impl { template static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + evalTo(dst, lhs, rhs, typename evaluator_traits::Shape()); + } + + // dense += sparse * sparse + template + static void addTo(Dest& dst, const ActualLhs& lhs, const Rhs& rhs, int* = typename enable_if::Shape,DenseShape>::value,int*>::type(0) ) + { + typedef typename nested_eval::type LhsNested; + typedef typename nested_eval::type RhsNested; + LhsNested lhsNested(lhs); + RhsNested rhsNested(rhs); + internal::sparse_sparse_to_dense_product_selector::type, + typename remove_all::type, Dest>::run(lhsNested,rhsNested,dst); + } + + // dense -= sparse * sparse + template + static void subTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, int* = typename enable_if::Shape,DenseShape>::value,int*>::type(0) ) + { + addTo(dst, -lhs, rhs); + } + +protected: + + // sparse = sparse * sparse + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, SparseShape) { typedef typename nested_eval::type LhsNested; typedef typename nested_eval::type RhsNested; @@ -47,6 +75,14 @@ struct generic_product_impl internal::conservative_sparse_sparse_product_selector::type, typename remove_all::type, Dest>::run(lhsNested,rhsNested,dst); } + + // dense = sparse * sparse + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, DenseShape) + { + dst.setZero(); + addTo(dst, lhs, rhs); + } }; // sparse * sparse-triangular @@ -61,33 +97,36 @@ struct generic_product_impl {}; -// Dense = sparse * sparse -template< typename DstXprType, typename Lhs, typename Rhs, int Options/*, typename Scalar*/> -struct Assignment, internal::assign_op, Sparse2Dense/*, - typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type*/> +// dense = sparse-product (can be sparse*sparse, sparse*perm, etc.) +template< typename DstXprType, typename Lhs, typename Rhs> +struct Assignment, internal::assign_op, Sparse2Dense> { - typedef Product SrcXprType; + typedef Product SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { - dst.setZero(); - dst += src; + generic_product_impl::evalTo(dst,src.lhs(),src.rhs()); } }; -// Dense += sparse * sparse -template< typename DstXprType, typename Lhs, typename Rhs, int Options> -struct Assignment, internal::add_assign_op, Sparse2Dense/*, - typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type*/> +// dense += sparse-product (can be sparse*sparse, sparse*perm, etc.) +template< typename DstXprType, typename Lhs, typename Rhs> +struct Assignment, internal::add_assign_op, Sparse2Dense> { - typedef Product SrcXprType; + typedef Product SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { - typedef typename nested_eval::type LhsNested; - typedef typename nested_eval::type RhsNested; - LhsNested lhsNested(src.lhs()); - RhsNested rhsNested(src.rhs()); - internal::sparse_sparse_to_dense_product_selector::type, - typename remove_all::type, DstXprType>::run(lhsNested,rhsNested,dst); + generic_product_impl::addTo(dst,src.lhs(),src.rhs()); + } +}; + +// dense -= sparse-product (can be sparse*sparse, sparse*perm, etc.) +template< typename DstXprType, typename Lhs, typename Rhs> +struct Assignment, internal::sub_assign_op, Sparse2Dense> +{ + typedef Product SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + { + generic_product_impl::subTo(dst,src.lhs(),src.rhs()); } }; diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index e8ebd7000..d8e42e984 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -438,7 +438,7 @@ template void sparse_basic(const SparseMatrixType& re { Index i = internal::random(0,rows-1); Index j = internal::random(0,rows-1); - Index v = internal::random(); + Scalar v = internal::random(); m1.coeffRef(i,j) = v; refMat1.coeffRef(i,j) = v; VERIFY_IS_APPROX(m1, refMat1); diff --git a/test/sparse_product.cpp b/test/sparse_product.cpp index 8c83f08d7..7ec5270e8 100644 --- a/test/sparse_product.cpp +++ b/test/sparse_product.cpp @@ -79,12 +79,16 @@ template void sparse_product() // dense ?= sparse * sparse VERIFY_IS_APPROX(dm4 =m2*m3, refMat4 =refMat2*refMat3); VERIFY_IS_APPROX(dm4+=m2*m3, refMat4+=refMat2*refMat3); + VERIFY_IS_APPROX(dm4-=m2*m3, refMat4-=refMat2*refMat3); VERIFY_IS_APPROX(dm4 =m2t.transpose()*m3, refMat4 =refMat2t.transpose()*refMat3); VERIFY_IS_APPROX(dm4+=m2t.transpose()*m3, refMat4+=refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(dm4-=m2t.transpose()*m3, refMat4-=refMat2t.transpose()*refMat3); VERIFY_IS_APPROX(dm4 =m2t.transpose()*m3t.transpose(), refMat4 =refMat2t.transpose()*refMat3t.transpose()); VERIFY_IS_APPROX(dm4+=m2t.transpose()*m3t.transpose(), refMat4+=refMat2t.transpose()*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4-=m2t.transpose()*m3t.transpose(), refMat4-=refMat2t.transpose()*refMat3t.transpose()); VERIFY_IS_APPROX(dm4 =m2*m3t.transpose(), refMat4 =refMat2*refMat3t.transpose()); VERIFY_IS_APPROX(dm4+=m2*m3t.transpose(), refMat4+=refMat2*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4-=m2*m3t.transpose(), refMat4-=refMat2*refMat3t.transpose()); VERIFY_IS_APPROX(dm4 = m2*m3*s1, refMat4 = refMat2*refMat3*s1); // test aliasing From 12f50a46979b85a7beb3bae00e223f4683c08c78 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 11:04:19 +0100 Subject: [PATCH 03/26] Fix assign vectorization logic with respect to fixed outer-stride --- Eigen/src/Core/AssignEvaluator.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 3667f60f2..ba7748395 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -54,6 +54,7 @@ private: InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) : int(Dst::MaxRowsAtCompileTime), + OuterStride = int(outer_stride_at_compile_time::ret), MaxSizeAtCompileTime = Dst::SizeAtCompileTime, PacketSize = unpacket_traits::size }; @@ -65,7 +66,9 @@ private: MightVectorize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) && (functor_traits::PacketAccess), - MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 + MayInnerVectorize = MightVectorize + && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 + && int(OuterStride)!=Dynamic && int(OuterStride)%int(PacketSize)==0 && int(JointAlignment)>=int(RequiredAlignment), MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess From 8c66b6bc6157da3aaa7a8529e1fcd6e257e7693f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 11:06:42 +0100 Subject: [PATCH 04/26] Simplify evaluator::Flags for Map<> --- Eigen/src/Core/CoreEvaluators.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index c0563f534..c898b2abc 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -633,20 +633,9 @@ struct evaluator > HasNoStride = HasNoInnerStride && HasNoOuterStride, IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, - // FIXME I don't get the code below, in particular why outer-stride-at-compile-time should have any effect on PacketAccessBit... - // Let's remove the code below for 3.4 if no issue occur -// PacketAlignment = unpacket_traits::alignment, -// KeepsPacketAccess = bool(HasNoInnerStride) -// && ( bool(IsDynamicSize) -// || HasNoOuterStride -// || ( OuterStrideAtCompileTime!=Dynamic -// && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime) % PacketAlignment)==0 ) ), - KeepsPacketAccess = bool(HasNoInnerStride), - - Flags0 = evaluator::Flags, - Flags1 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) - ? int(Flags0) : int(Flags0 & ~LinearAccessBit), - Flags = KeepsPacketAccess ? int(Flags1) : (int(Flags1) & ~PacketAccessBit), + PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit), + LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit), + Flags = int( evaluator::Flags) & (LinearAccessMask&PacketAccessMask), Alignment = int(MapOptions)&int(AlignedMask) }; From 2475a1de48cb6f602ab7340d1e1afad15754a103 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 15:39:50 +0100 Subject: [PATCH 05/26] bug #1008: stabilize isfinite/isinf/isnan/hasNaN/allFinite functions for fast-math mode. --- Eigen/src/Core/BooleanRedux.h | 8 ++++ Eigen/src/Core/MathFunctions.h | 69 +++++++++++++++++++++++++++++----- 2 files changed, 67 insertions(+), 10 deletions(-) diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h index ba45cf5c3..be4cd9b3a 100644 --- a/Eigen/src/Core/BooleanRedux.h +++ b/Eigen/src/Core/BooleanRedux.h @@ -142,7 +142,11 @@ inline Eigen::Index DenseBase::count() const template inline bool DenseBase::hasNaN() const { +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isNaN().any(); +#else return !((derived().array()==derived().array()).all()); +#endif } /** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values. @@ -152,7 +156,11 @@ inline bool DenseBase::hasNaN() const template inline bool DenseBase::allFinite() const { +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isFinite().all(); +#else return !((derived()-derived()).hasNaN()); +#endif } } // end namespace Eigen diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 19b7954a9..299a9d61d 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -818,11 +818,18 @@ inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); } +// std::is* do not work with fast-math and gcc +#if EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && defined __FAST_MATH__) +#define EIGEN_USE_STD_FPCLASSIFY 1 +#else +#define EIGEN_USE_STD_FPCLASSIFY 0 +#endif + template EIGEN_DEVICE_FUNC bool (isfinite)(const T& x) { - #if EIGEN_HAS_CXX11_MATH + #if EIGEN_USE_STD_FPCLASSIFY using std::isfinite; return isfinite EIGEN_NOT_A_MACRO (x); #else @@ -830,11 +837,23 @@ bool (isfinite)(const T& x) #endif } +template +EIGEN_DEVICE_FUNC +bool (isinf)(const T& x) +{ + #if EIGEN_USE_STD_FPCLASSIFY + using std::isinf; + return isinf EIGEN_NOT_A_MACRO (x); + #else + return x>NumTraits::highest() || x::lowest(); + #endif +} + template EIGEN_DEVICE_FUNC bool (isnan)(const T& x) { - #if EIGEN_HAS_CXX11_MATH + #if EIGEN_USE_STD_FPCLASSIFY using std::isnan; return isnan EIGEN_NOT_A_MACRO (x); #else @@ -842,18 +861,48 @@ bool (isnan)(const T& x) #endif } +#if (!EIGEN_USE_STD_FPCLASSIFY) + +#if EIGEN_COMP_MSVC + +//MSVC defines a _isnan builtin function, but for double only +template<> EIGEN_DEVICE_FUNC bool (isnan)(const long double& x) { return _isnan(double(x)); } +template<> EIGEN_DEVICE_FUNC bool (isnan)(const double& x) { return _isnan(x); } +template<> EIGEN_DEVICE_FUNC bool (isnan)(const float& x) { return _isnan(double(x)); } + +#elif defined __FAST_MATH__ + +#if EIGEN_COMP_CLANG +#define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optnone)) +#elif EIGEN_COMP_GNUC +#define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optimize("no-fast-math"))) +#else +#define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC +#endif + template -EIGEN_DEVICE_FUNC -bool (isinf)(const T& x) +EIGEN_TMP_NOOPT_ATTRIB +bool isinf_helper(const T& x) { - #if EIGEN_HAS_CXX11_MATH - using std::isinf; - return isinf EIGEN_NOT_A_MACRO (x); - #else - return x>NumTraits::highest() || x::lowest(); - #endif + return x>NumTraits::highest() || x::lowest(); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const long double& x) { return !(x <= std::numeric_limits::infinity()); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const double& x) { return x!=x; } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const float& x) { return x!=x; } + +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const double& x) { return isinf_helper(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const float& x) { return isinf_helper(x); } +#if EIGEN_COMP_CLANG +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const long double& x) { return isinf(double(x)); } +#endif + +#undef EIGEN_TMP_NOOPT_ATTRIB + +#endif + +#endif + template bool (isfinite)(const std::complex& x) { From e3031d7bfa5e5f983fd670ee301776e4421a43bf Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 16:43:23 +0100 Subject: [PATCH 06/26] bug #1008: improve handling of fast-math mode for older gcc versions. --- Eigen/src/Core/MathFunctions.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 299a9d61d..6d2730960 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -819,7 +819,7 @@ inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) } // std::is* do not work with fast-math and gcc -#if EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && defined __FAST_MATH__) +#if EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__) #define EIGEN_USE_STD_FPCLASSIFY 1 #else #define EIGEN_USE_STD_FPCLASSIFY 0 @@ -870,14 +870,18 @@ template<> EIGEN_DEVICE_FUNC bool (isnan)(const long double& x) { return _isnan( template<> EIGEN_DEVICE_FUNC bool (isnan)(const double& x) { return _isnan(x); } template<> EIGEN_DEVICE_FUNC bool (isnan)(const float& x) { return _isnan(double(x)); } -#elif defined __FAST_MATH__ +#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__) #if EIGEN_COMP_CLANG -#define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optnone)) + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optnone)) #elif EIGEN_COMP_GNUC -#define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optimize("no-fast-math"))) + #if EIGEN_GNUC_AT_LEAST(5,0) + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optimize("no-finite-math-only"))) + #else + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((noinline,optimize("no-finite-math-only"))) + #endif #else -#define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC #endif template @@ -887,14 +891,13 @@ bool isinf_helper(const T& x) return x>NumTraits::highest() || x::lowest(); } -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const long double& x) { return !(x <= std::numeric_limits::infinity()); } -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const double& x) { return x!=x; } -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const float& x) { return x!=x; } - -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const double& x) { return isinf_helper(x); } -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const float& x) { return isinf_helper(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const long double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const float& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const double& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const float& x) { return __builtin_isinf(x); } #if EIGEN_COMP_CLANG -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const long double& x) { return isinf(double(x)); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const long double& x) { return __builtin_isinf(double(x)); } #endif #undef EIGEN_TMP_NOOPT_ATTRIB From 946f8850e83c3174d5f0b22f2818c88bf7a33062 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 16:44:45 +0100 Subject: [PATCH 07/26] bug #1008: add a unit test for fast-math mode and isinf/isnan/isfinite/etc. functions. --- test/CMakeLists.txt | 12 ++++++ test/fastmath.cpp | 90 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 test/fastmath.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c8a8ba6f4..822ca8f10 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -257,6 +257,18 @@ ei_add_test(dense_storage) ei_add_test(ctorleak) ei_add_test(mpl2only) +check_cxx_compiler_flag("-ffast-math" COMPILER_SUPPORT_FASTMATH) +if(COMPILER_SUPPORT_FASTMATH) + set(EIGEN_FASTMATH_FLAGS "-ffast-math") +else() + check_cxx_compiler_flag("/fp:fast" COMPILER_SUPPORT_FPFAST) + if(COMPILER_SUPPORT_FPFAST) + set(EIGEN_FASTMATH_FLAGS "/fp:fast") + endif() +endif() + +ei_add_test(fastmath " ${EIGEN_FASTMATH_FLAGS} ") + # # ei_add_test(denseLM) if(QT4_FOUND) diff --git a/test/fastmath.cpp b/test/fastmath.cpp new file mode 100644 index 000000000..2911c0544 --- /dev/null +++ b/test/fastmath.cpp @@ -0,0 +1,90 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +void check(bool b, bool ref) +{ + std::cout << b; + if(b==ref) + std::cout << " OK "; + else + std::cout << " BAD "; +} + +template +void check_inf_nan(bool dryrun) { + Matrix m(10); + m.setRandom(); + m(3) = std::numeric_limits::quiet_NaN(); + + if(dryrun) + { + std::cout << "std::isfinite(" << m(3) << ") = "; check((std::isfinite)(m(3)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(3)), false); std::cout << "\n"; + std::cout << "std::isinf(" << m(3) << ") = "; check((std::isinf)(m(3)),false); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(3)), false); std::cout << "\n"; + std::cout << "std::isnan(" << m(3) << ") = "; check((std::isnan)(m(3)),true); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(3)), true); std::cout << "\n"; + std::cout << "allFinite: "; check(m.allFinite(), 0); std::cout << "\n"; + std::cout << "hasNaN: "; check(m.hasNaN(), 1); std::cout << "\n"; + std::cout << "\n"; + } + else + { + VERIFY( !(numext::isfinite)(m(3)) ); + VERIFY( !(numext::isinf)(m(3)) ); + VERIFY( (numext::isnan)(m(3)) ); + VERIFY( !m.allFinite() ); + VERIFY( m.hasNaN() ); + } + m(4) /= 0.0; + if(dryrun) + { + std::cout << "std::isfinite(" << m(4) << ") = "; check((std::isfinite)(m(4)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(4)), false); std::cout << "\n"; + std::cout << "std::isinf(" << m(4) << ") = "; check((std::isinf)(m(4)),true); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(4)), true); std::cout << "\n"; + std::cout << "std::isnan(" << m(4) << ") = "; check((std::isnan)(m(4)),false); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(4)), false); std::cout << "\n"; + std::cout << "allFinite: "; check(m.allFinite(), 0); std::cout << "\n"; + std::cout << "hasNaN: "; check(m.hasNaN(), 1); std::cout << "\n"; + std::cout << "\n"; + } + else + { + VERIFY( !(numext::isfinite)(m(4)) ); + VERIFY( (numext::isinf)(m(4)) ); + VERIFY( !(numext::isnan)(m(4)) ); + VERIFY( !m.allFinite() ); + VERIFY( m.hasNaN() ); + } + m(3) = 0; + if(dryrun) + { + std::cout << "std::isfinite(" << m(3) << ") = "; check((std::isfinite)(m(3)),true); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(3)), true); std::cout << "\n"; + std::cout << "std::isinf(" << m(3) << ") = "; check((std::isinf)(m(3)),false); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(3)), false); std::cout << "\n"; + std::cout << "std::isnan(" << m(3) << ") = "; check((std::isnan)(m(3)),false); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(3)), false); std::cout << "\n"; + std::cout << "allFinite: "; check(m.allFinite(), 0); std::cout << "\n"; + std::cout << "hasNaN: "; check(m.hasNaN(), 0); std::cout << "\n"; + std::cout << "\n\n"; + } + else + { + VERIFY( (numext::isfinite)(m(3)) ); + VERIFY( !(numext::isinf)(m(3)) ); + VERIFY( !(numext::isnan)(m(3)) ); + VERIFY( !m.allFinite() ); + VERIFY( !m.hasNaN() ); + } +} + +void test_fastmath() { + std::cout << "*** float *** \n\n"; check_inf_nan(true); + std::cout << "*** double ***\n\n"; check_inf_nan(true); + std::cout << "*** long double *** \n\n"; check_inf_nan(true); + + check_inf_nan(false); + check_inf_nan(false); + check_inf_nan(false); +} From d4cf436cb1a8c2af96c9351114195847bc3ff1f1 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 17:35:54 +0100 Subject: [PATCH 08/26] Enable mpreal unit test for C++11 compiler only --- unsupported/test/CMakeLists.txt | 4 ++-- unsupported/test/mpreal/mpreal.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 3d82508f7..cc4ce1c59 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -47,11 +47,11 @@ ei_add_test(FFT) find_package(MPFR 2.3.0) find_package(GMP) -if(MPFR_FOUND) +if(MPFR_FOUND AND EIGEN_COMPILER_SUPPORT_CXX11) include_directories(${MPFR_INCLUDES} ./mpreal) ei_add_property(EIGEN_TESTED_BACKENDS "MPFR C++, ") set(EIGEN_MPFR_TEST_LIBRARIES ${MPFR_LIBRARIES} ${GMP_LIBRARIES}) - ei_add_test(mpreal_support "" "${EIGEN_MPFR_TEST_LIBRARIES}" ) + ei_add_test(mpreal_support "-std=c++11" "${EIGEN_MPFR_TEST_LIBRARIES}" ) else() ei_add_property(EIGEN_MISSING_BACKENDS "MPFR C++, ") endif() diff --git a/unsupported/test/mpreal/mpreal.h b/unsupported/test/mpreal/mpreal.h index 9b96ec411..c4f6cf0cb 100644 --- a/unsupported/test/mpreal/mpreal.h +++ b/unsupported/test/mpreal/mpreal.h @@ -107,7 +107,7 @@ #define MPREAL_HAVE_EXPLICIT_CONVERTERS #endif -//#define MPFR_USE_INTMAX_T // Enable 64-bit integer types - should be defined before mpfr.h +#define MPFR_USE_INTMAX_T // Enable 64-bit integer types - should be defined before mpfr.h #if defined(MPREAL_HAVE_MSVC_DEBUGVIEW) && defined(_MSC_VER) && defined(_DEBUG) #define MPREAL_MSVC_DEBUGVIEW_CODE DebugView = toString(); From 827d8a9bad6f6c8a8e0211358b51c60db18a2cfb Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 21:37:03 +0100 Subject: [PATCH 09/26] Fix false negative in redux test --- test/redux.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/redux.cpp b/test/redux.cpp index 849faf55e..bfd9a8d50 100644 --- a/test/redux.cpp +++ b/test/redux.cpp @@ -24,7 +24,7 @@ template void matrixRedux(const MatrixType& m) MatrixType m1 = MatrixType::Random(rows, cols); // The entries of m1 are uniformly distributed in [0,1], so m1.prod() is very small. This may lead to test - // failures if we underflow into denormals. Thus, we scale so that entires are close to 1. + // failures if we underflow into denormals. Thus, we scale so that entries are close to 1. MatrixType m1_for_prod = MatrixType::Ones(rows, cols) + RealScalar(0.2) * m1; VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).sum(), Scalar(1)); @@ -71,7 +71,9 @@ template void matrixRedux(const MatrixType& m) // test nesting complex expression VERIFY_EVALUATION_COUNT( (m1.matrix()*m1.matrix().transpose()).sum(), (MatrixType::SizeAtCompileTime==Dynamic ? 1 : 0) ); - VERIFY_EVALUATION_COUNT( ((m1.matrix()*m1.matrix().transpose())*Scalar(2)).sum(), (MatrixType::SizeAtCompileTime==Dynamic ? 1 : 0) ); + Matrix m2(rows,rows); + m2.setRandom(); + VERIFY_EVALUATION_COUNT( ((m1.matrix()*m1.matrix().transpose())+m2).sum(), (MatrixType::SizeAtCompileTime==Dynamic ? 1 : 0) ); } From 77ff3386b7d28c68c9e277e60f285ae1b3124b47 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 28 Oct 2015 11:42:14 +0100 Subject: [PATCH 10/26] Refactoring of the cost model: - Dynamic is now an invalid value - introduce a HugeCost constant to be used for runtime-cost values or arbitrarily huge cost - add sanity checks for cost values: must be >=0 and not too large This change provides several benefits: - it fixes shortcoming is some cost computation where the Dynamic case was not properly handled. - it simplifies cost computation logic, and should avoid future similar shortcomings. - it allows to distinguish between different level of dynamic/huge/infinite cost - it should enable further simplifications in the computation of costs (save compilation time) --- Eigen/src/Core/AssignEvaluator.h | 4 +- Eigen/src/Core/BooleanRedux.h | 8 +-- Eigen/src/Core/CoreEvaluators.h | 50 ++++++++++++++----- Eigen/src/Core/NumTraits.h | 6 +-- Eigen/src/Core/ProductEvaluators.h | 12 +++-- Eigen/src/Core/Redux.h | 8 +-- Eigen/src/Core/Visitor.h | 4 +- Eigen/src/Core/util/Constants.h | 8 +++ Eigen/src/Core/util/StaticAssert.h | 7 ++- Eigen/src/Core/util/XprHelper.h | 16 ++---- Eigen/src/SparseCore/SparseCompressedBase.h | 10 +++- Eigen/src/SparseCore/SparseCwiseBinaryOp.h | 20 ++++++-- Eigen/src/SparseCore/SparseCwiseUnaryOp.h | 14 ++++-- Eigen/src/SparseCore/SparseDenseProduct.h | 12 +++-- Eigen/src/SparseCore/SparseDiagonalProduct.h | 6 +-- Eigen/src/SparseCore/SparseProduct.h | 2 +- Eigen/src/SparseCore/SparseTriangularView.h | 2 +- Eigen/src/SparseCore/SparseVector.h | 8 +-- test/redux.cpp | 1 - test/vectorwiseop.cpp | 5 ++ .../KroneckerProduct/KroneckerTensorProduct.h | 2 +- .../Eigen/src/Skyline/SkylineProduct.h | 2 +- 22 files changed, 139 insertions(+), 68 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index ba7748395..e66cf074f 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -98,10 +98,10 @@ private: enum { UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic - && int(SrcEvaluator::CoeffReadCost) != Dynamic + && int(SrcEvaluator::CoeffReadCost) < HugeCost && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit), MayUnrollInner = int(InnerSize) != Dynamic - && int(SrcEvaluator::CoeffReadCost) != Dynamic + && int(SrcEvaluator::CoeffReadCost) < HugeCost && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit) }; diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h index be4cd9b3a..bda9f6966 100644 --- a/Eigen/src/Core/BooleanRedux.h +++ b/Eigen/src/Core/BooleanRedux.h @@ -83,8 +83,8 @@ inline bool DenseBase::all() const typedef internal::evaluator Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && Evaluator::CoeffReadCost != Dynamic - && NumTraits::AddCost != Dynamic + && Evaluator::CoeffReadCost < HugeCost + && NumTraits::AddCost < HugeCost && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; Evaluator evaluator(derived()); @@ -109,8 +109,8 @@ inline bool DenseBase::any() const typedef internal::evaluator Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && Evaluator::CoeffReadCost != Dynamic - && NumTraits::AddCost != Dynamic + && Evaluator::CoeffReadCost < HugeCost + && NumTraits::AddCost < HugeCost && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; Evaluator evaluator(derived()); diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index c898b2abc..fb0cdc99c 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -137,11 +137,15 @@ struct evaluator > m_outerStride(IsVectorAtCompileTime ? 0 : int(IsRowMajor) ? ColsAtCompileTime : RowsAtCompileTime) - {} + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { @@ -327,7 +331,9 @@ struct evaluator > EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) : m_functor(n.functor()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -376,7 +382,10 @@ struct unary_evaluator, IndexBased > EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -449,7 +458,10 @@ struct binary_evaluator, IndexBased, IndexBase : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -502,7 +514,10 @@ struct unary_evaluator, IndexBased> EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_unaryOp(op.functor()), m_argImpl(op.nestedExpression()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -559,6 +574,7 @@ struct mapbase_evaluator : evaluator_base { EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator::Flags&PacketAccessBit, internal::inner_stride_at_compile_time::ret==1), PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const @@ -713,7 +729,10 @@ struct evaluator > Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, Alignment0) }; typedef block_evaluator block_evaluator_type; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) {} + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } }; // no direct-access => dispatch to a unary evaluator @@ -831,8 +850,8 @@ struct evaluator > typedef Select XprType; enum { CoeffReadCost = evaluator::CoeffReadCost - + EIGEN_SIZE_MAX(evaluator::CoeffReadCost, - evaluator::CoeffReadCost), + + EIGEN_PLAIN_ENUM_MAX(evaluator::CoeffReadCost, + evaluator::CoeffReadCost), Flags = (unsigned int)evaluator::Flags & evaluator::Flags & HereditaryBits, @@ -843,7 +862,9 @@ struct evaluator > : m_conditionImpl(select.conditionMatrix()), m_thenImpl(select.thenMatrix()), m_elseImpl(select.elseMatrix()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -965,11 +986,11 @@ struct evaluator > typedef typename ArgType::Scalar InputScalar; typedef typename XprType::Scalar Scalar; enum { - TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(XprType::ColsAtCompileTime) + TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime) }; typedef typename MemberOp::template Cost CostOpType; enum { - CoeffReadCost = TraversalSize==Dynamic ? Dynamic + CoeffReadCost = TraversalSize==Dynamic ? HugeCost : TraversalSize * evaluator::CoeffReadCost + int(CostOpType::value), Flags = (traits::Flags&RowMajorBit) | (evaluator::Flags&HereditaryBits), @@ -979,7 +1000,10 @@ struct evaluator > EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) - {} + { + EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : int(CostOpType::value)); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index 61ec2f533..1d85dec72 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -157,9 +157,9 @@ struct NumTraits > IsInteger = NumTraits::IsInteger, IsSigned = NumTraits::IsSigned, RequireInitialization = 1, - ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::ReadCost, - AddCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::AddCost, - MulCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::MulCost + ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::ReadCost, + AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::AddCost, + MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::MulCost }; static inline RealScalar epsilon() { return NumTraits::epsilon(); } diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 04dc08957..e7677b90c 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -422,7 +422,11 @@ struct product_evaluator, ProductTag, DenseShape, m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed, // or perhaps declare them on the fly on the packet method... We have experiment to check what's best. m_innerDim(xpr.lhs().cols()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::AddCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } // Everything below here is taken from CoeffBasedProduct.h @@ -447,11 +451,11 @@ struct product_evaluator, ProductTag, DenseShape, LhsCoeffReadCost = LhsEtorType::CoeffReadCost, RhsCoeffReadCost = RhsEtorType::CoeffReadCost, CoeffReadCost = InnerSize==0 ? NumTraits::ReadCost - : (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits::AddCost==Dynamic || NumTraits::MulCost==Dynamic) ? Dynamic + : InnerSize == Dynamic ? HugeCost : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + (InnerSize - 1) * NumTraits::AddCost, - Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + Unroll = CoeffReadCost < HugeCost && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, LhsFlags = LhsEtorType::Flags, RhsFlags = RhsEtorType::Flags, @@ -736,6 +740,8 @@ public: diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) : m_diagImpl(diag), m_matImpl(mat) { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 309898b36..fcf0ba76a 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -51,9 +51,9 @@ public: public: enum { Cost = ( Derived::SizeAtCompileTime == Dynamic - || Derived::CoeffReadCost == Dynamic - || (Derived::SizeAtCompileTime!=1 && functor_traits::Cost == Dynamic) - ) ? Dynamic + || Derived::CoeffReadCost >= HugeCost + || (Derived::SizeAtCompileTime!=1 && functor_traits::Cost >= HugeCost) + ) ? HugeCost : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) @@ -61,7 +61,7 @@ public: public: enum { - Unrolling = Cost != Dynamic && Cost <= UnrollingLimit + Unrolling = Cost < HugeCost && Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling }; diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h index a4e2cebab..f3f15e9e0 100644 --- a/Eigen/src/Core/Visitor.h +++ b/Eigen/src/Core/Visitor.h @@ -110,8 +110,8 @@ void DenseBase::visit(Visitor& visitor) const ThisEvaluator thisEval(derived()); enum { unroll = SizeAtCompileTime != Dynamic - && ThisEvaluator::CoeffReadCost != Dynamic - && (SizeAtCompileTime == 1 || internal::functor_traits::Cost != Dynamic) + && ThisEvaluator::CoeffReadCost < HugeCost + && (SizeAtCompileTime == 1 || internal::functor_traits::Cost < HugeCost) && SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits::Cost <= EIGEN_UNROLLING_LIMIT }; return internal::visitor_impl::value), \ YOU_CANNOT_MIX_ARRAYS_AND_MATRICES) +// Check that a cost value is positive, and that is stay within a reasonable range +// TODO this check could be enabled for internal debugging only +#define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \ + EIGEN_STATIC_ASSERT((C)>=0 && (C)<2*HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE); #endif // EIGEN_STATIC_ASSERT_H diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index cd93b2320..209c73e1e 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -397,28 +397,20 @@ struct transfer_constness template::type> struct nested_eval { enum { - // For the purpose of this test, to keep it reasonably simple, we arbitrarily choose a value of Dynamic values. - // the choice of 10000 makes it larger than any practical fixed value and even most dynamic values. - // in extreme cases where these assumptions would be wrong, we would still at worst suffer performance issues - // (poor choice of temporaries). - // It's important that this value can still be squared without integer overflowing. - DynamicAsInteger = 10000, ScalarReadCost = NumTraits::Scalar>::ReadCost, - ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? int(DynamicAsInteger) : int(ScalarReadCost), CoeffReadCost = evaluator::CoeffReadCost, // NOTE What if an evaluator evaluate itself into a tempory? // Then CoeffReadCost will be small (e.g., 1) but we still have to evaluate, especially if n>1. // This situation is already taken care by the EvalBeforeNestingBit flag, which is turned ON // for all evaluator creating a temporary. This flag is then propagated by the parent evaluators. // Another solution could be to count the number of temps? - CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? int(DynamicAsInteger) : int(CoeffReadCost), - NAsInteger = n == Dynamic ? int(DynamicAsInteger) : n, - CostEvalAsInteger = (NAsInteger+1) * ScalarReadCostAsInteger + CoeffReadCostAsInteger, - CostNoEvalAsInteger = NAsInteger * CoeffReadCostAsInteger + NAsInteger = n == Dynamic ? HugeCost : n, + CostEval = (NAsInteger+1) * ScalarReadCost + CoeffReadCost, + CostNoEval = NAsInteger * CoeffReadCost }; typedef typename conditional< ( (int(evaluator::Flags) & EvalBeforeNestingBit) || - (int(CostEvalAsInteger) < int(CostNoEvalAsInteger)) ), + (int(CostEval) < int(CostNoEval)) ), PlainObject, typename ref_selector::type >::type type; diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h index c8a2705f9..fb795a0ed 100644 --- a/Eigen/src/SparseCore/SparseCompressedBase.h +++ b/Eigen/src/SparseCore/SparseCompressedBase.h @@ -226,8 +226,14 @@ struct evaluator > Flags = Derived::Flags }; - evaluator() : m_matrix(0) {} - explicit evaluator(const Derived &mat) : m_matrix(&mat) {} + evaluator() : m_matrix(0) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + explicit evaluator(const Derived &mat) : m_matrix(&mat) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_matrix->nonZeros(); diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index b87b6b749..abbbf397b 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -139,7 +139,10 @@ public: : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_lhsImpl.nonZerosEstimate() + m_rhsImpl.nonZerosEstimate(); @@ -220,7 +223,10 @@ public: : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return (std::min)(m_lhsImpl.nonZerosEstimate(), m_rhsImpl.nonZerosEstimate()); @@ -289,7 +295,10 @@ public: : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_rhsImpl.nonZerosEstimate(); @@ -359,7 +368,10 @@ public: : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_lhsImpl.nonZerosEstimate(); diff --git a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h index 469bac36e..fe4a97120 100644 --- a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -29,7 +29,11 @@ struct unary_evaluator, IteratorBased> Flags = XprType::Flags }; - explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {} + explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_argImpl.nonZerosEstimate(); @@ -108,7 +112,11 @@ struct unary_evaluator, IteratorBased> Flags = XprType::Flags }; - explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {} + explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } protected: typedef typename evaluator::InnerIterator EvalIterator; diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h index 2e34ae74c..87c946b9b 100644 --- a/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/Eigen/src/SparseCore/SparseDenseProduct.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -221,7 +221,7 @@ protected: public: enum { Flags = NeedToTranspose ? RowMajorBit : 0, - CoeffReadCost = Dynamic + CoeffReadCost = HugeCost }; class InnerIterator : public LhsIterator @@ -263,12 +263,16 @@ public: sparse_dense_outer_product_evaluator(const Lhs1 &lhs, const ActualRhs &rhs) : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs) - {} + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } // transpose case sparse_dense_outer_product_evaluator(const ActualRhs &rhs, const Lhs1 &lhs) : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs) - {} + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } protected: const LhsArg m_lhs; diff --git a/Eigen/src/SparseCore/SparseDiagonalProduct.h b/Eigen/src/SparseCore/SparseDiagonalProduct.h index cf31e5a53..e4af49e09 100644 --- a/Eigen/src/SparseCore/SparseDiagonalProduct.h +++ b/Eigen/src/SparseCore/SparseDiagonalProduct.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2009-2014 Gael Guennebaud +// Copyright (C) 2009-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -39,7 +39,7 @@ struct product_evaluator, ProductTag, Diagonal : public sparse_diagonal_product_evaluator { typedef Product XprType; - enum { CoeffReadCost = Dynamic, Flags = Rhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags + enum { CoeffReadCost = HugeCost, Flags = Rhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags typedef sparse_diagonal_product_evaluator Base; explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {} @@ -50,7 +50,7 @@ struct product_evaluator, ProductTag, SparseSh : public sparse_diagonal_product_evaluator, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct> { typedef Product XprType; - enum { CoeffReadCost = Dynamic, Flags = Lhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags + enum { CoeffReadCost = HugeCost, Flags = Lhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags typedef sparse_diagonal_product_evaluator, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct> Base; explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal().transpose()) {} diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h index ea2c3a8a3..cbd0db71b 100644 --- a/Eigen/src/SparseCore/SparseProduct.h +++ b/Eigen/src/SparseCore/SparseProduct.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed diff --git a/Eigen/src/SparseCore/SparseTriangularView.h b/Eigen/src/SparseCore/SparseTriangularView.h index 57d88893e..7c718e4e1 100644 --- a/Eigen/src/SparseCore/SparseTriangularView.h +++ b/Eigen/src/SparseCore/SparseTriangularView.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2009-2014 Gael Guennebaud +// Copyright (C) 2009-2015 Gael Guennebaud // Copyright (C) 2012 Désiré Nuentsa-Wakam // // This Source Code Form is subject to the terms of the Mozilla diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h index 94f8d0341..7ec73a365 100644 --- a/Eigen/src/SparseCore/SparseVector.h +++ b/Eigen/src/SparseCore/SparseVector.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -41,7 +41,6 @@ struct traits > MaxRowsAtCompileTime = RowsAtCompileTime, MaxColsAtCompileTime = ColsAtCompileTime, Flags = _Options | NestByRefBit | LvalueBit | (IsColVector ? 0 : RowMajorBit) | CompressedAccessBit, - CoeffReadCost = NumTraits::ReadCost, SupportedAccessPatterns = InnerRandomAccessPattern }; }; @@ -380,7 +379,10 @@ struct evaluator > Flags = SparseVectorType::Flags }; - explicit evaluator(const SparseVectorType &mat) : m_matrix(mat) {} + explicit evaluator(const SparseVectorType &mat) : m_matrix(mat) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_matrix.nonZeros(); diff --git a/test/redux.cpp b/test/redux.cpp index bfd9a8d50..6ddc59c18 100644 --- a/test/redux.cpp +++ b/test/redux.cpp @@ -74,7 +74,6 @@ template void matrixRedux(const MatrixType& m) Matrix m2(rows,rows); m2.setRandom(); VERIFY_EVALUATION_COUNT( ((m1.matrix()*m1.matrix().transpose())+m2).sum(), (MatrixType::SizeAtCompileTime==Dynamic ? 1 : 0) ); - } template void vectorRedux(const VectorType& w) diff --git a/test/vectorwiseop.cpp b/test/vectorwiseop.cpp index ddd9f8389..529f4298b 100644 --- a/test/vectorwiseop.cpp +++ b/test/vectorwiseop.cpp @@ -217,6 +217,11 @@ template void vectorwiseop_matrix(const MatrixType& m) VERIFY_IS_APPROX( (m1 * m1.transpose()).colwise().sum(), m1m1.colwise().sum()); Matrix tmp(rows); VERIFY_EVALUATION_COUNT( tmp = (m1 * m1.transpose()).colwise().sum(), (MatrixType::RowsAtCompileTime==Dynamic ? 1 : 0)); + + m2 = m1.rowwise() - (m1.colwise().sum()/m1.rows()).eval(); + m1 = m1.rowwise() - (m1.colwise().sum()/m1.rows()); + VERIFY_IS_APPROX( m1, m2 ); + VERIFY_EVALUATION_COUNT( m2 = (m1.rowwise() - m1.colwise().sum()/m1.rows()), (MatrixType::RowsAtCompileTime==Dynamic ? 1 : 0) ); } void test_vectorwiseop() diff --git a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h index 4406437cc..4d3e5358e 100644 --- a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +++ b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h @@ -240,7 +240,7 @@ struct traits > Flags = ((LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) | EvalBeforeNestingBit | EvalBeforeAssigningBit, - CoeffReadCost = Dynamic + CoeffReadCost = HugeCost }; typedef SparseMatrix ReturnType; diff --git a/unsupported/Eigen/src/Skyline/SkylineProduct.h b/unsupported/Eigen/src/Skyline/SkylineProduct.h index d218a7c25..d9eb814c1 100644 --- a/unsupported/Eigen/src/Skyline/SkylineProduct.h +++ b/unsupported/Eigen/src/Skyline/SkylineProduct.h @@ -49,7 +49,7 @@ struct internal::traits > { | EvalBeforeAssigningBit | EvalBeforeNestingBit, - CoeffReadCost = Dynamic + CoeffReadCost = HugeCost }; typedef typename internal::conditional Date: Wed, 28 Oct 2015 11:59:20 +0100 Subject: [PATCH 11/26] Extend vectorwiseop unit test with column/row vectors as input. --- test/vectorwiseop.cpp | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/test/vectorwiseop.cpp b/test/vectorwiseop.cpp index 529f4298b..87476f95b 100644 --- a/test/vectorwiseop.cpp +++ b/test/vectorwiseop.cpp @@ -158,16 +158,22 @@ template void vectorwiseop_matrix(const MatrixType& m) VERIFY_IS_APPROX(m2, m1.colwise() + colvec); VERIFY_IS_APPROX(m2.col(c), m1.col(c) + colvec); - VERIFY_RAISES_ASSERT(m2.colwise() += colvec.transpose()); - VERIFY_RAISES_ASSERT(m1.colwise() + colvec.transpose()); + if(rows>1) + { + VERIFY_RAISES_ASSERT(m2.colwise() += colvec.transpose()); + VERIFY_RAISES_ASSERT(m1.colwise() + colvec.transpose()); + } m2 = m1; m2.rowwise() += rowvec; VERIFY_IS_APPROX(m2, m1.rowwise() + rowvec); VERIFY_IS_APPROX(m2.row(r), m1.row(r) + rowvec); - VERIFY_RAISES_ASSERT(m2.rowwise() += rowvec.transpose()); - VERIFY_RAISES_ASSERT(m1.rowwise() + rowvec.transpose()); + if(cols>1) + { + VERIFY_RAISES_ASSERT(m2.rowwise() += rowvec.transpose()); + VERIFY_RAISES_ASSERT(m1.rowwise() + rowvec.transpose()); + } // test substraction @@ -176,16 +182,22 @@ template void vectorwiseop_matrix(const MatrixType& m) VERIFY_IS_APPROX(m2, m1.colwise() - colvec); VERIFY_IS_APPROX(m2.col(c), m1.col(c) - colvec); - VERIFY_RAISES_ASSERT(m2.colwise() -= colvec.transpose()); - VERIFY_RAISES_ASSERT(m1.colwise() - colvec.transpose()); + if(rows>1) + { + VERIFY_RAISES_ASSERT(m2.colwise() -= colvec.transpose()); + VERIFY_RAISES_ASSERT(m1.colwise() - colvec.transpose()); + } m2 = m1; m2.rowwise() -= rowvec; VERIFY_IS_APPROX(m2, m1.rowwise() - rowvec); VERIFY_IS_APPROX(m2.row(r), m1.row(r) - rowvec); - VERIFY_RAISES_ASSERT(m2.rowwise() -= rowvec.transpose()); - VERIFY_RAISES_ASSERT(m1.rowwise() - rowvec.transpose()); + if(cols>1) + { + VERIFY_RAISES_ASSERT(m2.rowwise() -= rowvec.transpose()); + VERIFY_RAISES_ASSERT(m1.rowwise() - rowvec.transpose()); + } // test norm rrres = m1.colwise().norm(); @@ -221,7 +233,7 @@ template void vectorwiseop_matrix(const MatrixType& m) m2 = m1.rowwise() - (m1.colwise().sum()/m1.rows()).eval(); m1 = m1.rowwise() - (m1.colwise().sum()/m1.rows()); VERIFY_IS_APPROX( m1, m2 ); - VERIFY_EVALUATION_COUNT( m2 = (m1.rowwise() - m1.colwise().sum()/m1.rows()), (MatrixType::RowsAtCompileTime==Dynamic ? 1 : 0) ); + VERIFY_EVALUATION_COUNT( m2 = (m1.rowwise() - m1.colwise().sum()/m1.rows()), (MatrixType::RowsAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime!=1 ? 1 : 0) ); } void test_vectorwiseop() @@ -232,4 +244,6 @@ void test_vectorwiseop() CALL_SUBTEST_4( vectorwiseop_matrix(Matrix4cf()) ); CALL_SUBTEST_5( vectorwiseop_matrix(Matrix()) ); CALL_SUBTEST_6( vectorwiseop_matrix(MatrixXd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( vectorwiseop_matrix(VectorXd(internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( vectorwiseop_matrix(RowVectorXd(internal::random(1,EIGEN_TEST_MAX_SIZE))) ); } From 1f11dd6cedc223f92f9ce99a22080dd267fcb488 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 28 Oct 2015 12:53:13 +0100 Subject: [PATCH 12/26] Add a unit test for large chains of products --- test/product_large.cpp | 11 +++++++++++ test/product_small.cpp | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/test/product_large.cpp b/test/product_large.cpp index 84c489580..7207973c2 100644 --- a/test/product_large.cpp +++ b/test/product_large.cpp @@ -61,6 +61,17 @@ void test_product_large() MatrixXf r2 = mat1.row(2)*mat2; VERIFY_IS_APPROX(r2, (mat1.row(2)*mat2).eval()); } + + { + Eigen::MatrixXd A(10,10), B, C; + A.setRandom(); + C = A; + for(int k=0; k<79; ++k) + C = C * A; + B.noalias() = (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))) + * (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))); + VERIFY_IS_APPROX(B,C); + } #endif // Regression test for bug 714: diff --git a/test/product_small.cpp b/test/product_small.cpp index 091955a0f..c561ec63b 100644 --- a/test/product_small.cpp +++ b/test/product_small.cpp @@ -56,5 +56,16 @@ void test_product_small() VERIFY_IS_APPROX(B * A.inverse(), B * A.inverse()[0]); VERIFY_IS_APPROX(A.inverse() * C, A.inverse()[0] * C); } + + { + Eigen::Matrix A, B, C; + A.setRandom(); + C = A; + for(int k=0; k<79; ++k) + C = C * A; + B.noalias() = (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))) + * (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))); + VERIFY_IS_APPROX(B,C); + } #endif } From 85313048581d22901c7940a46bd41b19e88ff47c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 28 Oct 2015 13:39:02 +0100 Subject: [PATCH 13/26] Simplify cost computations based on HugeCost being smaller that unrolling limit --- Eigen/src/Core/AssignEvaluator.h | 2 -- Eigen/src/Core/BooleanRedux.h | 4 ---- Eigen/src/Core/ProductEvaluators.h | 2 +- Eigen/src/Core/Redux.h | 12 +++--------- Eigen/src/Core/Visitor.h | 13 +++++-------- Eigen/src/Core/util/Constants.h | 2 +- Eigen/src/Core/util/StaticAssert.h | 2 +- 7 files changed, 11 insertions(+), 26 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index e66cf074f..db3bef38d 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -98,10 +98,8 @@ private: enum { UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic - && int(SrcEvaluator::CoeffReadCost) < HugeCost && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit), MayUnrollInner = int(InnerSize) != Dynamic - && int(SrcEvaluator::CoeffReadCost) < HugeCost && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit) }; diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h index bda9f6966..8409d8749 100644 --- a/Eigen/src/Core/BooleanRedux.h +++ b/Eigen/src/Core/BooleanRedux.h @@ -83,8 +83,6 @@ inline bool DenseBase::all() const typedef internal::evaluator Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && Evaluator::CoeffReadCost < HugeCost - && NumTraits::AddCost < HugeCost && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; Evaluator evaluator(derived()); @@ -109,8 +107,6 @@ inline bool DenseBase::any() const typedef internal::evaluator Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && Evaluator::CoeffReadCost < HugeCost - && NumTraits::AddCost < HugeCost && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; Evaluator evaluator(derived()); diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index e7677b90c..2927fcc0e 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -455,7 +455,7 @@ struct product_evaluator, ProductTag, DenseShape, : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + (InnerSize - 1) * NumTraits::AddCost, - Unroll = CoeffReadCost < HugeCost && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, LhsFlags = LhsEtorType::Flags, RhsFlags = RhsEtorType::Flags, diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index fcf0ba76a..d170cae29 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -50,20 +50,14 @@ public: public: enum { - Cost = ( Derived::SizeAtCompileTime == Dynamic - || Derived::CoeffReadCost >= HugeCost - || (Derived::SizeAtCompileTime!=1 && functor_traits::Cost >= HugeCost) - ) ? HugeCost - : Derived::SizeAtCompileTime * Derived::CoeffReadCost - + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, + Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost + : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) }; public: enum { - Unrolling = Cost < HugeCost && Cost <= UnrollingLimit - ? CompleteUnrolling - : NoUnrolling + Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling }; #ifdef EIGEN_DEBUG_ASSIGN diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h index f3f15e9e0..7aac0b6e1 100644 --- a/Eigen/src/Core/Visitor.h +++ b/Eigen/src/Core/Visitor.h @@ -109,14 +109,11 @@ void DenseBase::visit(Visitor& visitor) const typedef typename internal::visitor_evaluator ThisEvaluator; ThisEvaluator thisEval(derived()); - enum { unroll = SizeAtCompileTime != Dynamic - && ThisEvaluator::CoeffReadCost < HugeCost - && (SizeAtCompileTime == 1 || internal::functor_traits::Cost < HugeCost) - && SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits::Cost - <= EIGEN_UNROLLING_LIMIT }; - return internal::visitor_impl::run(thisEval, visitor); + enum { + unroll = SizeAtCompileTime != Dynamic + && SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits::Cost <= EIGEN_UNROLLING_LIMIT + }; + return internal::visitor_impl::run(thisEval, visitor); } namespace internal { diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 12238e5dd..c35077af6 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -36,7 +36,7 @@ const int Infinity = -1; * This value has to be positive to (1) simplify cost computation, and (2) allow to distinguish between a very expensive and very very expensive expressions. * It thus must also be large enough to make sure unrolling won't happen and that sub expressions will be evaluated, but not too large to avoid overflow. */ -const int HugeCost = 1000; +const int HugeCost = 10000; /** \defgroup flags Flags * \ingroup Core_Module diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 77da6cc5f..9d7302d81 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -204,6 +204,6 @@ // Check that a cost value is positive, and that is stay within a reasonable range // TODO this check could be enabled for internal debugging only #define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \ - EIGEN_STATIC_ASSERT((C)>=0 && (C)<2*HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE); + EIGEN_STATIC_ASSERT((C)>=0 && (C)<=HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE); #endif // EIGEN_STATIC_ASSERT_H From 1a842c0dc441cca1bd66a516b16d7fe6a4c0ba26 Mon Sep 17 00:00:00 2001 From: Ilya Popov Date: Wed, 28 Oct 2015 09:52:55 +0000 Subject: [PATCH 14/26] Fix typo in TutorialSparse: laplace equation contains gradient symbol (\nabla) instead of laplacian (\Delta). --- doc/TutorialSparse.dox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/TutorialSparse.dox b/doc/TutorialSparse.dox index 835c59354..fb07adaa2 100644 --- a/doc/TutorialSparse.dox +++ b/doc/TutorialSparse.dox @@ -83,7 +83,7 @@ There is no notion of compressed/uncompressed mode for a SparseVector. \section TutorialSparseExample First example -Before describing each individual class, let's start with the following typical example: solving the Laplace equation \f$ \nabla u = 0 \f$ on a regular 2D grid using a finite difference scheme and Dirichlet boundary conditions. +Before describing each individual class, let's start with the following typical example: solving the Laplace equation \f$ \Delta u = 0 \f$ on a regular 2D grid using a finite difference scheme and Dirichlet boundary conditions. Such problem can be mathematically expressed as a linear problem of the form \f$ Ax=b \f$ where \f$ x \f$ is the vector of \c m unknowns (in our case, the values of the pixels), \f$ b \f$ is the right hand side vector resulting from the boundary conditions, and \f$ A \f$ is an \f$ m \times m \f$ matrix containing only a few non-zero elements resulting from the discretization of the Laplacian operator. From 28ddb5158dbe2a633a11f77ad7145ceae08abbf3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 28 Oct 2015 16:27:20 +0100 Subject: [PATCH 15/26] Enable std::isfinite/nan/inf on MSVC 2013 and newer and clang. Fix isinf for gcc4.4 and older msvc with fast-math. --- Eigen/src/Core/MathFunctions.h | 50 +++++++++++++++------------------- test/fastmath.cpp | 8 ++++++ 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 6d2730960..515eca137 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -818,8 +818,8 @@ inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); } -// std::is* do not work with fast-math and gcc -#if EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__) +// std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang. +#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG) #define EIGEN_USE_STD_FPCLASSIFY 1 #else #define EIGEN_USE_STD_FPCLASSIFY 0 @@ -865,40 +865,34 @@ bool (isnan)(const T& x) #if EIGEN_COMP_MSVC -//MSVC defines a _isnan builtin function, but for double only -template<> EIGEN_DEVICE_FUNC bool (isnan)(const long double& x) { return _isnan(double(x)); } -template<> EIGEN_DEVICE_FUNC bool (isnan)(const double& x) { return _isnan(x); } -template<> EIGEN_DEVICE_FUNC bool (isnan)(const float& x) { return _isnan(double(x)); } - -#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__) - -#if EIGEN_COMP_CLANG - #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optnone)) -#elif EIGEN_COMP_GNUC - #if EIGEN_GNUC_AT_LEAST(5,0) - #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optimize("no-finite-math-only"))) - #else - #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((noinline,optimize("no-finite-math-only"))) - #endif -#else - #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC -#endif - -template -EIGEN_TMP_NOOPT_ATTRIB -bool isinf_helper(const T& x) +template EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x) { - return x>NumTraits::highest() || x::lowest(); + return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF; } +//MSVC defines a _isnan builtin function, but for double only +template<> EIGEN_DEVICE_FUNC bool (isnan)(const long double& x) { return _isnan(x); } +template<> EIGEN_DEVICE_FUNC bool (isnan)(const double& x) { return _isnan(x); } +template<> EIGEN_DEVICE_FUNC bool (isnan)(const float& x) { return _isnan(x); } + +template<> EIGEN_DEVICE_FUNC bool (isnan)(const long double& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC bool (isnan)(const double& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC bool (isnan)(const float& x) { return isinf_msvc_helper(x); } + +#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) + +#if EIGEN_GNUC_AT_LEAST(5,0) + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optimize("no-finite-math-only"))) +#else + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((noinline,optimize("no-finite-math-only"))) +#endif + template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const long double& x) { return __builtin_isnan(x); } template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const double& x) { return __builtin_isnan(x); } template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const float& x) { return __builtin_isnan(x); } template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const double& x) { return __builtin_isinf(x); } template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const float& x) { return __builtin_isinf(x); } -#if EIGEN_COMP_CLANG -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const long double& x) { return __builtin_isinf(double(x)); } -#endif +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const long double& x) { return __builtin_isinf(x); } #undef EIGEN_TMP_NOOPT_ATTRIB diff --git a/test/fastmath.cpp b/test/fastmath.cpp index 2911c0544..16462d54f 100644 --- a/test/fastmath.cpp +++ b/test/fastmath.cpp @@ -18,6 +18,14 @@ void check(bool b, bool ref) std::cout << " BAD "; } +#if EIGEN_COMP_MSVC < 1800 +namespace std { + template bool (isfinite)(T x) { return _finite(x); } + template bool (isnan)(T x) { return _isnan(x); } + template bool (isinf)(T x) { return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF; } +} +#endif + template void check_inf_nan(bool dryrun) { Matrix m(10); From 6759a21e49614dd7cd977e3c1fd782458f33fc9f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 28 Oct 2015 16:49:15 +0100 Subject: [PATCH 16/26] CUDA support: define more accurate min/max values for device::numeric_limits of float and double using values from cfloat header --- Eigen/src/Core/util/Meta.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 6eb409194..ef35cefb4 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -11,6 +11,10 @@ #ifndef EIGEN_META_H #define EIGEN_META_H +#if defined(__CUDA_ARCH__) +#include +#endif + namespace Eigen { namespace internal { @@ -138,16 +142,16 @@ template<> struct numeric_limits EIGEN_DEVICE_FUNC static float (max)() { return CUDART_MAX_NORMAL_F; } EIGEN_DEVICE_FUNC - static float (min)() { return __FLT_EPSILON__; } + static float (min)() { return FLT_MIN; } }; template<> struct numeric_limits { EIGEN_DEVICE_FUNC static double epsilon() { return __DBL_EPSILON__; } EIGEN_DEVICE_FUNC - static double (max)() { return CUDART_INF; } + static double (max)() { return DBL_MAX; } EIGEN_DEVICE_FUNC - static double (min)() { return __DBL_EPSILON__; } + static double (min)() { return DBL_MIN; } }; template<> struct numeric_limits { From 5b6cff5b0e7dcb7b36b44c61263da7e281800799 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 28 Oct 2015 20:18:00 +0100 Subject: [PATCH 17/26] fix typo --- test/fastmath.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/fastmath.cpp b/test/fastmath.cpp index 16462d54f..efdd5b313 100644 --- a/test/fastmath.cpp +++ b/test/fastmath.cpp @@ -18,7 +18,7 @@ void check(bool b, bool ref) std::cout << " BAD "; } -#if EIGEN_COMP_MSVC < 1800 +#if EIGEN_COMP_MSVC && EIGEN_COMP_MSVC < 1800 namespace std { template bool (isfinite)(T x) { return _finite(x); } template bool (isnan)(T x) { return _isnan(x); } From c688cc28d631c84659d5b0931957f8772c55b230 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 28 Oct 2015 20:20:05 +0100 Subject: [PATCH 18/26] fix copy/paste typo --- Eigen/src/Core/MathFunctions.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 515eca137..1820fc1c8 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -875,9 +875,9 @@ template<> EIGEN_DEVICE_FUNC bool (isnan)(const long double& x) { return _isnan( template<> EIGEN_DEVICE_FUNC bool (isnan)(const double& x) { return _isnan(x); } template<> EIGEN_DEVICE_FUNC bool (isnan)(const float& x) { return _isnan(x); } -template<> EIGEN_DEVICE_FUNC bool (isnan)(const long double& x) { return isinf_msvc_helper(x); } -template<> EIGEN_DEVICE_FUNC bool (isnan)(const double& x) { return isinf_msvc_helper(x); } -template<> EIGEN_DEVICE_FUNC bool (isnan)(const float& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC bool (isinf)(const long double& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC bool (isinf)(const double& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC bool (isinf)(const float& x) { return isinf_msvc_helper(x); } #elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) From 7a5f83ca60c667898e6d8096dac5680793ddfce9 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 29 Oct 2015 03:55:39 -0700 Subject: [PATCH 19/26] Add overloads for real times sparse operations. This avoids real to complex conversions, and also fixes a compilation issue with MSVC. --- Eigen/src/Core/ArrayBase.h | 4 +--- Eigen/src/Core/DenseBase.h | 15 ++++++--------- Eigen/src/Core/util/XprHelper.h | 8 ++++---- Eigen/src/SparseCore/SparseMatrixBase.h | 16 ++++++++++++++-- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 151c05526..66813c8ea 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -46,15 +46,13 @@ template class ArrayBase typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl; - using internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real>::operator*; - typedef typename internal::traits::StorageKind StorageKind; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; typedef DenseBase Base; + using Base::operator*; using Base::RowsAtCompileTime; using Base::ColsAtCompileTime; using Base::SizeAtCompileTime; diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 488f15061..e181dafaf 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -40,18 +40,14 @@ static inline void check_DenseIndex_is_signed() { */ template class DenseBase #ifndef EIGEN_PARSED_BY_DOXYGEN - : public internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real> + : public internal::special_scalar_op_base::Scalar, + typename NumTraits::Scalar>::Real, + DenseCoeffsBase > #else : public DenseCoeffsBase #endif // not EIGEN_PARSED_BY_DOXYGEN { public: - using internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real>::operator*; - using internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real>::operator/; - /** Inner iterator type to iterate over the coefficients of a row or column. * \sa class InnerIterator @@ -77,9 +73,10 @@ template class DenseBase typedef Scalar value_type; typedef typename NumTraits::Real RealScalar; + typedef internal::special_scalar_op_base > Base; - typedef internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real> Base; + using Base::operator*; + using Base::operator/; using Base::derived; using Base::const_cast_derived; using Base::rows; diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 209c73e1e..f9e2959cc 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -452,9 +452,9 @@ struct generic_xpr_base /** \internal Helper base class to add a scalar multiple operator * overloads for complex types */ -template::value > -struct special_scalar_op_base : public DenseCoeffsBase +struct special_scalar_op_base : public BaseType { // dummy operator* so that the // "using special_scalar_op_base::operator*" compiles @@ -463,8 +463,8 @@ struct special_scalar_op_base : public DenseCoeffsBase void operator/(dummy) const; }; -template -struct special_scalar_op_base : public DenseCoeffsBase +template +struct special_scalar_op_base : public BaseType { const CwiseUnaryOp, Derived> operator*(const OtherScalar& scalar) const diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index 38eb1c37a..74b498a47 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -23,7 +23,14 @@ namespace Eigen { * This class can be extended with the help of the plugin mechanism described on the page * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_SPARSEMATRIXBASE_PLUGIN. */ -template class SparseMatrixBase : public EigenBase +template class SparseMatrixBase +#ifndef EIGEN_PARSED_BY_DOXYGEN + : public internal::special_scalar_op_base::Scalar, + typename NumTraits::Scalar>::Real, + EigenBase > +#else + : public EigenBase +#endif // not EIGEN_PARSED_BY_DOXYGEN { public: @@ -42,7 +49,12 @@ template class SparseMatrixBase : public EigenBase >::type PacketReturnType; typedef SparseMatrixBase StorageBaseType; - typedef EigenBase Base; + typedef typename NumTraits::Real RealScalar; + typedef internal::special_scalar_op_base > Base; + + using Base::operator*; + using Base::operator/; + typedef Matrix IndexVector; typedef Matrix ScalarVector; From 568d488a2778b6e539a417b1ab2b62d1a096784e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 29 Oct 2015 13:16:15 +0100 Subject: [PATCH 20/26] Fusion the two similar specialization of Sparse2Dense Assignment. This change also fixes a compilation issue with MSVC<=2013. --- Eigen/src/SparseCore/SparseAssign.h | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h index 4b663a59e..cb154d1c2 100644 --- a/Eigen/src/SparseCore/SparseAssign.h +++ b/Eigen/src/SparseCore/SparseAssign.h @@ -136,9 +136,13 @@ struct Assignment template< typename DstXprType, typename SrcXprType, typename Functor> struct Assignment { + typedef typename DstXprType::Scalar Scalar; static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + + if(internal::is_same >::value) + dst.setZero(); internal::evaluator srcEval(src); internal::evaluator dstEval(dst); @@ -149,23 +153,6 @@ struct Assignment } }; -template< typename DstXprType, typename SrcXprType> -struct Assignment, Sparse2Dense> -{ - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) - { - eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); - - dst.setZero(); - internal::evaluator srcEval(src); - internal::evaluator dstEval(dst); - const Index outerEvaluationSize = (internal::evaluator::Flags&RowMajorBit) ? src.rows() : src.cols(); - for (Index j=0; j::InnerIterator i(srcEval,j); i; ++i) - dstEval.coeffRef(i.row(),i.col()) = i.value(); - } -}; - // Specialization for "dst = dec.solve(rhs)" // NOTE we need to specialize it for Sparse2Sparse to avoid ambiguous specialization error template From 7cfbe35e49fd43d646d2aecf7b93630b3916f2f8 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 29 Oct 2015 21:05:52 +0100 Subject: [PATCH 21/26] Fix duplicated declaration --- Eigen/src/SparseCore/SparseMatrixBase.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index 74b498a47..ff417302f 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -49,11 +49,6 @@ template class SparseMatrixBase >::type PacketReturnType; typedef SparseMatrixBase StorageBaseType; - typedef typename NumTraits::Real RealScalar; - typedef internal::special_scalar_op_base > Base; - - using Base::operator*; - using Base::operator/; typedef Matrix IndexVector; typedef Matrix ScalarVector; @@ -146,6 +141,10 @@ template class SparseMatrixBase inline Derived& derived() { return *static_cast(this); } inline Derived& const_cast_derived() const { return *static_cast(const_cast(this)); } + + typedef internal::special_scalar_op_base > Base; + using Base::operator*; + using Base::operator/; #endif // not EIGEN_PARSED_BY_DOXYGEN #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::SparseMatrixBase From 05a0ee25dfcf5f16347e7e8a8903f91b89d48166 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 29 Oct 2015 21:06:07 +0100 Subject: [PATCH 22/26] Fix warning. --- doc/special_examples/random_cpp11.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/special_examples/random_cpp11.cpp b/doc/special_examples/random_cpp11.cpp index ccd7c77d0..adc3c110c 100644 --- a/doc/special_examples/random_cpp11.cpp +++ b/doc/special_examples/random_cpp11.cpp @@ -7,7 +7,7 @@ using namespace Eigen; int main() { std::default_random_engine generator; std::poisson_distribution distribution(4.1); - auto poisson = [&] (int) {return distribution(generator);}; + auto poisson = [&] (Eigen::Index) {return distribution(generator);}; RowVectorXi v = RowVectorXi::NullaryExpr(10, poisson ); std::cout << v << "\n"; From ac142773a74120d65b323704a902761a9b7375aa Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 29 Oct 2015 13:13:39 -0700 Subject: [PATCH 23/26] Don't call internal::check_rows_cols_for_overflow twice in PlainObjectBase::resize since this is extremely expensive for small arrays --- Eigen/src/Core/PlainObjectBase.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 48e29ebdc..6f1350dc0 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -263,7 +263,6 @@ class PlainObjectBase : public internal::dense_xpr_base::type m_storage.resize(size, rows, cols); if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED #else - internal::check_rows_cols_for_overflow::run(rows, cols); m_storage.resize(rows*cols, rows, cols); #endif } From 0974a579101b50dc87a1a3f39cdaab37b2dcad23 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 29 Oct 2015 15:00:06 -0700 Subject: [PATCH 24/26] Silenced compiler warning --- unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 89e28bdb5..145ca0d64 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -402,7 +402,7 @@ template struct sizes_match_below_dim { - static inline bool run(Dims1& dims1, Dims2& dims2) { + static inline bool run(Dims1&, Dims2&) { return false; } }; From 09ea3a7acdfa1d382de7264b080921205087ce57 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 29 Oct 2015 16:22:52 -0700 Subject: [PATCH 25/26] Silenced a few more compilation warnings --- unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h index 96a7d5c20..7cd04a99e 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h @@ -354,7 +354,7 @@ struct h_array_reduce template struct h_array_reduce { - constexpr static inline T run(const std::array& arr, T identity) + constexpr static inline T run(const std::array&, T identity) { return identity; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index 62f5ff923..10def5349 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -219,8 +219,8 @@ struct TensorEvaluator, D ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1)); if (!is_power_of_two) { ComplexScalar pos_j_base = ComplexScalar(std::cos(M_PI/line_len), std::sin(M_PI/line_len)); - for (int i = 0; i < line_len + 1; ++i) { - pos_j_base_powered[i] = std::pow(pos_j_base, i * i); + for (int j = 0; j < line_len + 1; ++j) { + pos_j_base_powered[j] = std::pow(pos_j_base, j * j); } } From c444a0a8c3925ed07dc639259d616e771b28aef0 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 29 Oct 2015 16:39:47 -0700 Subject: [PATCH 26/26] Consistently use the same index type in the fft codebase. --- .../Eigen/CXX11/src/Tensor/TensorFFT.h | 64 +++++++++---------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index 10def5349..215a4ebad 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -200,7 +200,7 @@ struct TensorEvaluator, D const bool write_to_out = internal::is_same::value; ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size); - for (int i = 0; i < m_size; ++i) { + for (Index i = 0; i < m_size; ++i) { buf[i] = MakeComplex::value>()(m_impl.coeff(i)); } @@ -211,15 +211,15 @@ struct TensorEvaluator, D eigen_assert(line_len >= 1); ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len); const bool is_power_of_two = isPowerOfTwo(line_len); - const int good_composite = is_power_of_two ? 0 : findGoodComposite(line_len); - const int log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite); + const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len); + const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite); ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1)); if (!is_power_of_two) { ComplexScalar pos_j_base = ComplexScalar(std::cos(M_PI/line_len), std::sin(M_PI/line_len)); - for (int j = 0; j < line_len + 1; ++j) { + for (Index j = 0; j < line_len + 1; ++j) { pos_j_base_powered[j] = std::pow(pos_j_base, j * j); } } @@ -228,7 +228,7 @@ struct TensorEvaluator, D Index base_offset = getBaseOffsetFromIndex(partial_index, dim); // get data into line_buf - for (int j = 0; j < line_len; ++j) { + for (Index j = 0; j < line_len; ++j) { Index offset = getIndexFromOffset(base_offset, dim, j); line_buf[j] = buf[offset]; } @@ -242,7 +242,7 @@ struct TensorEvaluator, D } // write back - for (int j = 0; j < line_len; ++j) { + for (Index j = 0; j < line_len; ++j) { const ComplexScalar div_factor = (FFTDir == FFT_FORWARD) ? ComplexScalar(1, 0) : ComplexScalar(line_len, 0); Index offset = getIndexFromOffset(base_offset, dim, j); buf[offset] = line_buf[j] / div_factor; @@ -257,45 +257,45 @@ struct TensorEvaluator, D } if(!write_to_out) { - for (int i = 0; i < m_size; ++i) { + for (Index i = 0; i < m_size; ++i) { data[i] = PartOf()(buf[i]); } m_device.deallocate(buf); } } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(int x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(Index x) { eigen_assert(x > 0); return !(x & (x - 1)); } // The composite number for padding, used in Bluestein's FFT algorithm - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int findGoodComposite(int n) { - int i = 2; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index findGoodComposite(Index n) { + Index i = 2; while (i < 2 * n - 1) i *= 2; return i; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int getLog2(int m) { - int log2m = 0; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index getLog2(Index m) { + Index log2m = 0; while (m >>= 1) log2m++; return log2m; } // Call Cooley Tukey algorithm directly, data length must be power of 2 - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, int line_len, int log_len) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, Index line_len, Index log_len) { eigen_assert(isPowerOfTwo(line_len)); scramble_FFT(line_buf, line_len); compute_1D_Butterfly(line_buf, line_len, log_len); } // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, int line_len, int good_composite, int log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) { - int n = line_len; - int m = good_composite; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) { + Index n = line_len; + Index m = good_composite; ComplexScalar* data = line_buf; - for (int i = 0; i < n; ++i) { + for (Index i = 0; i < n; ++i) { if(FFTDir == FFT_FORWARD) { a[i] = data[i] * std::conj(pos_j_base_powered[i]); } @@ -303,11 +303,11 @@ struct TensorEvaluator, D a[i] = data[i] * pos_j_base_powered[i]; } } - for (int i = n; i < m; ++i) { + for (Index i = n; i < m; ++i) { a[i] = ComplexScalar(0, 0); } - for (int i = 0; i < n; ++i) { + for (Index i = 0; i < n; ++i) { if(FFTDir == FFT_FORWARD) { b[i] = pos_j_base_powered[i]; } @@ -315,10 +315,10 @@ struct TensorEvaluator, D b[i] = std::conj(pos_j_base_powered[i]); } } - for (int i = n; i < m - n; ++i) { + for (Index i = n; i < m - n; ++i) { b[i] = ComplexScalar(0, 0); } - for (int i = m - n; i < m; ++i) { + for (Index i = m - n; i < m; ++i) { if(FFTDir == FFT_FORWARD) { b[i] = pos_j_base_powered[m-i]; } @@ -333,7 +333,7 @@ struct TensorEvaluator, D scramble_FFT(b, m); compute_1D_Butterfly(b, m, log_len); - for (int i = 0; i < m; ++i) { + for (Index i = 0; i < m; ++i) { a[i] *= b[i]; } @@ -341,11 +341,11 @@ struct TensorEvaluator, D compute_1D_Butterfly(a, m, log_len); //Do the scaling after ifft - for (int i = 0; i < m; ++i) { + for (Index i = 0; i < m; ++i) { a[i] /= m; } - for (int i = 0; i < n; ++i) { + for (Index i = 0; i < n; ++i) { if(FFTDir == FFT_FORWARD) { data[i] = a[i] * std::conj(pos_j_base_powered[i]); } @@ -355,14 +355,14 @@ struct TensorEvaluator, D } } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, int n) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, Index n) { eigen_assert(isPowerOfTwo(n)); - int j = 1; - for (int i = 1; i < n; ++i){ + Index j = 1; + for (Index i = 1; i < n; ++i){ if (j > i) { std::swap(data[j-1], data[i-1]); } - int m = n >> 1; + Index m = n >> 1; while (m >= 2 && j > m) { j -= m; m >>= 1; @@ -372,7 +372,7 @@ struct TensorEvaluator, D } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly(ComplexScalar* data, int n, int n_power_of_2) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly(ComplexScalar* data, Index n, Index n_power_of_2) { eigen_assert(isPowerOfTwo(n)); if (n == 1) { return; @@ -467,7 +467,7 @@ struct TensorEvaluator, D const ComplexScalar wp(wtemp, wpi); ComplexScalar w(1.0, 0.0); - for(int i = 0; i < n/2; i++) { + for(Index i = 0; i < n/2; i++) { ComplexScalar temp(data[i + n/2] * w); data[i + n/2] = data[i] - temp; data[i] += temp; @@ -490,7 +490,7 @@ struct TensorEvaluator, D result += index; } else { - for (int i = 0; i < omitted_dim; ++i) { + for (Index i = 0; i < omitted_dim; ++i) { const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; const Index idx = index / partial_m_stride; index -= idx * partial_m_stride; @@ -508,7 +508,7 @@ struct TensorEvaluator, D } protected: - int m_size; + Index m_size; const FFT& m_fft; Dimensions m_dimensions; array m_strides;