diff --git a/Eigen/Core b/Eigen/Core index b7205bda5..16007d0c9 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -310,6 +310,7 @@ using std::ptrdiff_t; #include "src/Core/arch/NEON/PacketMath.h" #include "src/Core/arch/NEON/MathFunctions.h" #include "src/Core/arch/NEON/Complex.h" + #include "src/Core/arch/NEON/BlockingSizesLookupTables.h" #endif #if defined EIGEN_VECTORIZE_CUDA @@ -383,6 +384,7 @@ using std::ptrdiff_t; #include "src/Core/Inverse.h" #include "src/Core/TriangularMatrix.h" #include "src/Core/SelfAdjointView.h" +#include "src/Core/products/LookupBlockingSizesTable.h" #include "src/Core/products/GeneralBlockPanelKernel.h" #include "src/Core/products/Parallelizer.h" #include "src/Core/ProductEvaluators.h" diff --git a/Eigen/IterativeLinearSolvers b/Eigen/IterativeLinearSolvers index c06668bd2..7fab9eed0 100644 --- a/Eigen/IterativeLinearSolvers +++ b/Eigen/IterativeLinearSolvers @@ -12,24 +12,26 @@ * This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse. * Those solvers are accessible via the following classes: * - ConjugateGradient for selfadjoint (hermitian) matrices, + * - LeastSquaresConjugateGradient for rectangular least-square problems, * - BiCGSTAB for general square matrices. * * These iterative solvers are associated with some preconditioners: * - IdentityPreconditioner - not really useful * - DiagonalPreconditioner - also called JAcobi preconditioner, work very well on diagonal dominant matrices. - * - IncompleteILUT - incomplete LU factorization with dual thresholding + * - IncompleteLUT - incomplete LU factorization with dual thresholding * * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport. * - * \code - * #include - * \endcode + \code + #include + \endcode */ #include "src/IterativeLinearSolvers/SolveWithGuess.h" #include "src/IterativeLinearSolvers/IterativeSolverBase.h" #include "src/IterativeLinearSolvers/BasicPreconditioners.h" #include "src/IterativeLinearSolvers/ConjugateGradient.h" +#include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h" #include "src/IterativeLinearSolvers/BiCGSTAB.h" #include "src/IterativeLinearSolvers/IncompleteLUT.h" diff --git a/Eigen/Sparse b/Eigen/Sparse index 7cc9c0913..a540f0eec 100644 --- a/Eigen/Sparse +++ b/Eigen/Sparse @@ -11,9 +11,9 @@ * - \ref SparseQR_Module * - \ref IterativeLinearSolvers_Module * - * \code - * #include - * \endcode + \code + #include + \endcode */ #include "SparseCore" diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index f46f7b758..93a726483 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -226,6 +226,11 @@ template class LDLT #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } /** \internal * Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U. @@ -424,6 +429,8 @@ template struct LDLT_Traits template LDLT& LDLT::compute(const MatrixType& a) { + check_template_parameters(); + eigen_assert(a.rows()==a.cols()); const Index size = a.rows(); diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h index 629c87161..745b74d95 100644 --- a/Eigen/src/Cholesky/LLT.h +++ b/Eigen/src/Cholesky/LLT.h @@ -170,6 +170,12 @@ template class LLT #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + /** \internal * Used to compute and store L * The strict upper part is not used and even not initialized. @@ -377,6 +383,8 @@ template struct LLT_Traits template LLT& LLT::compute(const MatrixType& a) { + check_template_parameters(); + eigen_assert(a.rows()==a.cols()); const Index size = a.rows(); m_matrix.resize(size, size); diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 9485080d3..ce00566a5 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -647,11 +647,15 @@ struct evaluator > HasNoStride = HasNoInnerStride && HasNoOuterStride, IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, + + // TODO: should check for smaller packet types once we can handle multi-sized packet types + AlignBytes = int(packet_traits::size) * sizeof(Scalar), + KeepsPacketAccess = bool(HasNoInnerStride) && ( bool(IsDynamicSize) || HasNoOuterStride || ( OuterStrideAtCompileTime!=Dynamic - && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ), + && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime) % AlignBytes)==0 ) ), Flags0 = evaluator::Flags, Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) @@ -717,7 +721,10 @@ struct evaluator > && (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, - MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0, + // TODO: should check for smaller packet types once we can handle multi-sized packet types + AlignBytes = int(packet_traits::size) * sizeof(Scalar), + + MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignedBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator::Flags&LinearAccessBit))) ? LinearAccessBit : 0, FlagsRowMajorBit = XprType::Flags&RowMajorBit, Flags0 = evaluator::Flags & ( (HereditaryBits & ~RowMajorBit) | @@ -825,12 +832,16 @@ struct block_evaluator::PlainObject> { typedef Block XprType; + typedef typename XprType::Scalar Scalar; EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) : mapbase_evaluator(block) { + // TODO: should check for smaller packet types once we can handle multi-sized packet types + const int AlignBytes = int(packet_traits::size) * sizeof(Scalar); + EIGEN_ONLY_USED_FOR_DEBUG(AlignBytes) // FIXME this should be an internal assertion - eigen_assert(EIGEN_IMPLIES(evaluator::Flags&AlignedBit, (size_t(block.data()) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned"); + eigen_assert(EIGEN_IMPLIES(evaluator::Flags&AlignedBit, (size_t(block.data()) % AlignBytes) == 0) && "data is not aligned"); } }; diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h index 9186f59a7..ab41641f4 100644 --- a/Eigen/src/Core/DenseStorage.h +++ b/Eigen/src/Core/DenseStorage.h @@ -34,14 +34,35 @@ void check_static_allocation_size() #endif } +template::type, + bool Match = bool((Size%unpacket_traits::size)==0), + bool TryHalf = bool(int(unpacket_traits::size) > Size) + && bool(int(unpacket_traits::size) > int(unpacket_traits::half>::size)) > +struct compute_default_alignment +{ + enum { value = 0 }; +}; + +template +struct compute_default_alignment // Match +{ + enum { value = sizeof(T) * unpacket_traits::size }; +}; + +template +struct compute_default_alignment +{ + // current packet too large, try with an half-packet + enum { value = compute_default_alignment::half>::value }; +}; + /** \internal * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned: * to 16 bytes boundary if the total size is a multiple of 16 bytes. */ template + : compute_default_alignment::value > struct plain_array { T array[Size]; @@ -81,14 +102,71 @@ struct plain_array #endif template -struct plain_array +struct plain_array { - EIGEN_USER_ALIGN_DEFAULT T array[Size]; + EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size]; EIGEN_DEVICE_FUNC plain_array() { - EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(EIGEN_ALIGN_BYTES-1); + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63); check_static_allocation_size(); } diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 878f38e92..e1b233d82 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -473,48 +473,48 @@ struct random_default_impl }; enum { - floor_log2_terminate, - floor_log2_move_up, - floor_log2_move_down, - floor_log2_bogus + meta_floor_log2_terminate, + meta_floor_log2_move_up, + meta_floor_log2_move_down, + meta_floor_log2_bogus }; -template struct floor_log2_selector +template struct meta_floor_log2_selector { enum { middle = (lower + upper) / 2, - value = (upper <= lower + 1) ? int(floor_log2_terminate) - : (n < (1 << middle)) ? int(floor_log2_move_down) - : (n==0) ? int(floor_log2_bogus) - : int(floor_log2_move_up) + value = (upper <= lower + 1) ? int(meta_floor_log2_terminate) + : (n < (1 << middle)) ? int(meta_floor_log2_move_down) + : (n==0) ? int(meta_floor_log2_bogus) + : int(meta_floor_log2_move_up) }; }; template::value> -struct floor_log2 {}; + int selector = meta_floor_log2_selector::value> +struct meta_floor_log2 {}; template -struct floor_log2 +struct meta_floor_log2 { - enum { value = floor_log2::middle>::value }; + enum { value = meta_floor_log2::middle>::value }; }; template -struct floor_log2 +struct meta_floor_log2 { - enum { value = floor_log2::middle, upper>::value }; + enum { value = meta_floor_log2::middle, upper>::value }; }; template -struct floor_log2 +struct meta_floor_log2 { enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower }; }; template -struct floor_log2 +struct meta_floor_log2 { // no value, error at compile time }; @@ -522,11 +522,24 @@ struct floor_log2 template struct random_default_impl { - typedef typename NumTraits::NonInteger NonInteger; - static inline Scalar run(const Scalar& x, const Scalar& y) - { - return x + Scalar((NonInteger(y)-x+1) * std::rand() / (RAND_MAX + NonInteger(1))); + { + using std::max; + using std::min; + typedef typename conditional::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; + if(y range); + + return Scalar(ScalarX(x) + offset); } static inline Scalar run() @@ -534,7 +547,7 @@ struct random_default_impl #ifdef EIGEN_MAKING_DOCS return run(Scalar(NumTraits::IsSigned ? -10 : 0), Scalar(10)); #else - enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value, + enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value, scalar_bits = sizeof(Scalar) * CHAR_BIT, shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)), offset = NumTraits::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0 diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index 0cb117949..ea5a2bd5c 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -105,7 +105,8 @@ struct traits > OuterStrideMatch = Derived::IsVectorAtCompileTime || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime), AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits::Flags&AlignedBit)==AlignedBit), - MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch + ScalarTypeMatch = internal::is_same::value, + MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch }; typedef typename internal::conditional::type type; }; @@ -184,9 +185,11 @@ protected: template class Ref : public RefBase > { + private: typedef internal::traits Traits; template - EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase& expr); + EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0); public: typedef RefBase Base; @@ -195,13 +198,15 @@ template class Ref #ifndef EIGEN_PARSED_BY_DOXYGEN template - EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase& expr) + EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) { EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); Base::construct(expr.derived()); } template - EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr) + EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) #else template inline Ref(DenseBase& expr) @@ -228,7 +233,8 @@ template class Ref< EIGEN_DENSE_PUBLIC_INTERFACE(Ref) template - EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr) + EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr, + typename internal::enable_if::ScalarTypeMatch),Derived>::type* = 0) { // std::cout << match_helper::HasDirectAccess << "," << match_helper::OuterStrideMatch << "," << match_helper::InnerStrideMatch << "\n"; // std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n"; diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 19749c832..ceed1d1ef 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -197,21 +197,21 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro(cons } #endif -template<> EIGEN_DEVICE_FUNC inline float4 pgather(const float* from, int stride) { +template<> EIGEN_DEVICE_FUNC inline float4 pgather(const float* from, Index stride) { return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]); } -template<> EIGEN_DEVICE_FUNC inline double2 pgather(const double* from, int stride) { +template<> EIGEN_DEVICE_FUNC inline double2 pgather(const double* from, Index stride) { return make_double2(from[0*stride], from[1*stride]); } -template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, const float4& from, int stride) { +template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, const float4& from, Index stride) { to[stride*0] = from.x; to[stride*1] = from.y; to[stride*2] = from.z; to[stride*3] = from.w; } -template<> EIGEN_DEVICE_FUNC inline void pscatter(double* to, const double2& from, int stride) { +template<> EIGEN_DEVICE_FUNC inline void pscatter(double* to, const double2& from, Index stride) { to[stride*0] = from.x; to[stride*1] = from.y; } @@ -245,14 +245,14 @@ template<> EIGEN_DEVICE_FUNC inline double predux_min(const double2& a) } template<> EIGEN_DEVICE_FUNC inline float4 pabs(const float4& a) { - return make_float4(fabs(a.x), fabs(a.y), fabs(a.z), fabs(a.w)); + return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w)); } template<> EIGEN_DEVICE_FUNC inline double2 pabs(const double2& a) { - return make_double2(abs(a.x), abs(a.y)); + return make_double2(fabs(a.x), fabs(a.y)); } -template<> EIGEN_DEVICE_FUNC inline void +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { double tmp = kernel.packet[0].y; kernel.packet[0].y = kernel.packet[1].x; @@ -279,7 +279,7 @@ ptranspose(PacketBlock& kernel) { kernel.packet[3].z = tmp; } -template<> EIGEN_DEVICE_FUNC inline void +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { double tmp = kernel.packet[0].y; kernel.packet[0].y = kernel.packet[1].x; diff --git a/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h b/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h new file mode 100644 index 000000000..5007c155d --- /dev/null +++ b/Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h @@ -0,0 +1,110 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H +#define EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H + +namespace Eigen { +namespace internal { + +/* The following lookup table was generated from measurements on a Nexus 5, + * which has a Qualcomm Krait 400 CPU. This is very representative of current + * 32bit (ARMv7) Android devices. On the other hand, I don't know how + * representative that is outside of these conditions. Accordingly, + * let's only use this lookup table on ARM 32bit on Android for now. + * + * Measurements were single-threaded, with Scalar=float, compiled with + * -mfpu=neon-vfpv4, so the pmadd instruction used was VFMA.F32. + * + * The device was cooled, allowing it to run a the max clock speed throughout. + * This may not be representative of real-world thermal conditions. + * + * The benchmark attempted to flush caches to test cold-cache performance. + */ +#if EIGEN_ARCH_ARM && EIGEN_OS_ANDROID +template<> +struct BlockingSizesLookupTable { + static const size_t BaseSize = 16; + static const size_t NumSizes = 8; + static const unsigned short* Data() { + static const unsigned short data[512] = { + 0x444, 0x445, 0x446, 0x447, 0x448, 0x449, 0x447, 0x447, + 0x454, 0x455, 0x456, 0x457, 0x458, 0x459, 0x45a, 0x456, + 0x464, 0x465, 0x466, 0x467, 0x468, 0x469, 0x46a, 0x467, + 0x474, 0x475, 0x476, 0x467, 0x478, 0x479, 0x476, 0x478, + 0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x476, 0x476, + 0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x496, 0x488, + 0x474, 0x475, 0x476, 0x4a6, 0x496, 0x496, 0x495, 0x4a6, + 0x474, 0x475, 0x466, 0x4a6, 0x497, 0x4a5, 0x496, 0x4a5, + 0x544, 0x545, 0x546, 0x547, 0x548, 0x549, 0x54a, 0x54b, + 0x554, 0x555, 0x556, 0x557, 0x558, 0x559, 0x55a, 0x55b, + 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x56b, + 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x576, + 0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x587, + 0x564, 0x565, 0x566, 0x567, 0x596, 0x596, 0x596, 0x597, + 0x574, 0x565, 0x566, 0x596, 0x596, 0x5a6, 0x5a6, 0x5a6, + 0x564, 0x565, 0x5a6, 0x596, 0x5a6, 0x5a6, 0x5a6, 0x5a6, + 0x644, 0x645, 0x646, 0x647, 0x648, 0x649, 0x64a, 0x64b, + 0x644, 0x655, 0x656, 0x657, 0x658, 0x659, 0x65a, 0x65b, + 0x664, 0x665, 0x666, 0x667, 0x668, 0x669, 0x65a, 0x667, + 0x654, 0x665, 0x676, 0x677, 0x678, 0x679, 0x67a, 0x675, + 0x684, 0x675, 0x686, 0x687, 0x688, 0x688, 0x687, 0x686, + 0x664, 0x685, 0x666, 0x677, 0x697, 0x696, 0x697, 0x697, + 0x664, 0x665, 0x696, 0x696, 0x685, 0x6a6, 0x696, 0x696, + 0x664, 0x675, 0x686, 0x696, 0x6a6, 0x696, 0x696, 0x696, + 0x744, 0x745, 0x746, 0x747, 0x748, 0x749, 0x74a, 0x747, + 0x754, 0x755, 0x756, 0x757, 0x758, 0x759, 0x75a, 0x757, + 0x764, 0x765, 0x756, 0x767, 0x768, 0x759, 0x75a, 0x766, + 0x744, 0x755, 0x766, 0x777, 0x768, 0x759, 0x778, 0x777, + 0x744, 0x745, 0x766, 0x777, 0x788, 0x786, 0x786, 0x788, + 0x754, 0x755, 0x766, 0x787, 0x796, 0x796, 0x787, 0x796, + 0x684, 0x695, 0x696, 0x6a6, 0x795, 0x786, 0x795, 0x796, + 0x684, 0x695, 0x696, 0x795, 0x786, 0x796, 0x795, 0x796, + 0x844, 0x845, 0x846, 0x847, 0x848, 0x849, 0x848, 0x848, + 0x844, 0x855, 0x846, 0x847, 0x848, 0x849, 0x855, 0x857, + 0x844, 0x845, 0x846, 0x857, 0x848, 0x859, 0x866, 0x865, + 0x844, 0x855, 0x846, 0x847, 0x878, 0x859, 0x877, 0x877, + 0x844, 0x855, 0x846, 0x867, 0x886, 0x887, 0x885, 0x886, + 0x784, 0x785, 0x786, 0x877, 0x897, 0x885, 0x896, 0x896, + 0x684, 0x695, 0x686, 0x886, 0x885, 0x885, 0x886, 0x896, + 0x694, 0x6a5, 0x6a6, 0x885, 0x885, 0x886, 0x896, 0x896, + 0x944, 0x945, 0x946, 0x947, 0x948, 0x847, 0x847, 0x848, + 0x954, 0x855, 0x856, 0x947, 0x858, 0x857, 0x858, 0x858, + 0x944, 0x945, 0x946, 0x867, 0x948, 0x866, 0x867, 0x867, + 0x944, 0x975, 0x976, 0x877, 0x877, 0x877, 0x877, 0x877, + 0x784, 0x785, 0x886, 0x887, 0x886, 0x887, 0x887, 0x887, + 0x784, 0x785, 0x786, 0x796, 0x887, 0x897, 0x896, 0x896, + 0x684, 0x695, 0x6a6, 0x886, 0x886, 0x896, 0x896, 0x896, + 0x6a4, 0x6a5, 0x696, 0x896, 0x886, 0x896, 0x896, 0x896, + 0xa44, 0xa45, 0xa46, 0xa47, 0x847, 0x848, 0x847, 0x848, + 0xa44, 0xa45, 0x856, 0x857, 0x857, 0x857, 0x857, 0x857, + 0xa44, 0xa65, 0x866, 0x867, 0x867, 0x867, 0x867, 0x867, + 0x774, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877, + 0x784, 0x785, 0x886, 0x887, 0x887, 0x887, 0x887, 0x887, + 0x784, 0x785, 0x786, 0x787, 0x887, 0x896, 0x897, 0x897, + 0x684, 0x6a5, 0x696, 0x886, 0x886, 0x896, 0x896, 0x896, + 0x684, 0x6a5, 0x6a5, 0x886, 0x886, 0x896, 0x896, 0x896, + 0xb44, 0x845, 0x846, 0x847, 0x847, 0x945, 0x846, 0x946, + 0xb54, 0x855, 0x856, 0x857, 0x857, 0x856, 0x857, 0x856, + 0x864, 0x865, 0x866, 0x867, 0x867, 0x866, 0x866, 0x867, + 0x864, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877, + 0x784, 0x885, 0x886, 0x787, 0x887, 0x887, 0x887, 0x887, + 0x784, 0x785, 0x786, 0x796, 0x886, 0x897, 0x897, 0x897, + 0x684, 0x695, 0x696, 0x886, 0x896, 0x896, 0x896, 0x896, + 0x684, 0x685, 0x696, 0xb57, 0x896, 0x896, 0x896, 0x896 + }; + return data; + } +}; +#endif + +} +} + +#endif // EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index 154daa7a7..c7fb12fe8 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -272,7 +272,7 @@ ptranspose(PacketBlock& kernel) { } //---------- double ---------- -#if EIGEN_ARCH_ARM64 +#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG static uint64x2_t p2ul_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x0, 0x8000000000000000); diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 8dd1e1370..ce0abfd80 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -518,7 +518,19 @@ ptranspose(PacketBlock& kernel) { } //---------- double ---------- -#if EIGEN_ARCH_ARM64 + +// Clang 3.5 in the iOS toolchain has an ICE triggered by NEON intrisics for double. +// Confirmed at least with __apple_build_version__ = 6000054. +#ifdef __apple_build_version__ +// Let's hope that by the time __apple_build_version__ hits the 601* range, the bug will be fixed. +// https://gist.github.com/yamaya/2924292 suggests that the 3 first digits are only updated with +// major toolchain updates. +#define EIGEN_APPLE_DOUBLE_NEON_BUG (__apple_build_version__ < 6010000) +#else +#define EIGEN_APPLE_DOUBLE_NEON_BUG 0 +#endif + +#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG #if (EIGEN_COMP_GNUC_STRICT && defined(__ANDROID__)) || defined(__apple_build_version__) // Bug 907: workaround missing declarations of the following two functions in the ADK @@ -541,12 +553,12 @@ typedef float64x1_t Packet1d; template<> struct packet_traits : default_packet_traits { typedef Packet2d type; - typedef Packet1d half; + typedef Packet2d half; enum { Vectorizable = 1, AlignedOnScalar = 1, size = 2, - HasHalfPacket=1, + HasHalfPacket=0, HasDiv = 1, // FIXME check the Has* diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index a3b5f50b1..4e03761ea 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -55,6 +55,34 @@ struct functor_traits > }; }; +/** \internal + * \brief Template functor to compute the score of a scalar, to chose a pivot + * + * \sa class CwiseUnaryOp + */ +template struct scalar_score_coeff_op : scalar_abs_op +{ + typedef void Score_is_abs; +}; +template +struct functor_traits > : functor_traits > {}; + +/* Avoid recomputing abs when we know the score and they are the same. Not a true Eigen functor. */ +template struct abs_knowing_score +{ + EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score) + typedef typename NumTraits::Real result_type; + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a, const Score&) const { using std::abs; return abs(a); } +}; +template struct abs_knowing_score::Score_is_abs> +{ + EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score) + typedef typename NumTraits::Real result_type; + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scal&, const result_type& a) const { return a; } +}; + /** \internal * \brief Template functor to compute the squared absolute value of a scalar * diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 9061fd936..d32377a00 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -25,21 +25,31 @@ inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff return a<=0 ? b : a; } +#if EIGEN_ARCH_i386_OR_x86_64 +const std::ptrdiff_t defaultL1CacheSize = 32*1024; +const std::ptrdiff_t defaultL2CacheSize = 256*1024; +const std::ptrdiff_t defaultL3CacheSize = 2*1024*1024; +#else +const std::ptrdiff_t defaultL1CacheSize = 16*1024; +const std::ptrdiff_t defaultL2CacheSize = 512*1024; +const std::ptrdiff_t defaultL3CacheSize = 512*1024; +#endif + /** \internal */ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3) { static bool m_cache_sizes_initialized = false; - static std::ptrdiff_t m_l1CacheSize = 32*1024; - static std::ptrdiff_t m_l2CacheSize = 256*1024; - static std::ptrdiff_t m_l3CacheSize = 2*1024*1024; + static std::ptrdiff_t m_l1CacheSize = 0; + static std::ptrdiff_t m_l2CacheSize = 0; + static std::ptrdiff_t m_l3CacheSize = 0; if(!m_cache_sizes_initialized) { int l1CacheSize, l2CacheSize, l3CacheSize; queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize); - m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, 8*1024); - m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, 256*1024); - m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, 8*1024*1024); + m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize); + m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize); + m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize); m_cache_sizes_initialized = true; } @@ -64,43 +74,23 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff } } -/** \brief Computes the blocking parameters for a m x k times k x n matrix product - * - * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension. - * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension. - * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension. - * - * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar, - * this function computes the blocking size parameters along the respective dimensions - * for matrix products and related algorithms. The blocking sizes depends on various - * parameters: - * - the L1 and L2 cache sizes, - * - the register level blocking sizes defined by gebp_traits, - * - the number of scalars that fit into a packet (when vectorization is enabled). - * - * \sa setCpuCacheSizes */ +/* Helper for computeProductBlockingSizes. + * + * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar, + * this function computes the blocking size parameters along the respective dimensions + * for matrix products and related algorithms. The blocking sizes depends on various + * parameters: + * - the L1 and L2 cache sizes, + * - the register level blocking sizes defined by gebp_traits, + * - the number of scalars that fit into a packet (when vectorization is enabled). + * + * \sa setCpuCacheSizes */ template -void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1) +void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1) { typedef gebp_traits Traits; -#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES - EIGEN_UNUSED_VARIABLE(num_threads); - enum { - kr = 8, - mr = Traits::mr, - nr = Traits::nr - }; - k = std::min(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K); - if (k > kr) k -= k % kr; - m = std::min(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M); - if (m > mr) m -= m % mr; - n = std::min(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N); - if (n > nr) n -= n % nr; - return; -#endif - // Explanations: // Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and // kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed @@ -155,7 +145,7 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads // In unit tests we do not want to use extra large matrices, // so we reduce the cache size to check the blocking strategy is not flawed #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS - l1 = 4*1024; + l1 = 9*1024; l2 = 32*1024; l3 = 512*1024; #endif @@ -164,7 +154,7 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads // Perhaps it would make more sense to consider k*n*m?? // Note that for very tiny problem, this function should be bypassed anyway // because we use the coefficient-based implementation for them. - if(std::max(k,std::max(m,n))<48) + if((std::max)(k,(std::max)(m,n))<48) return; typedef typename Traits::ResScalar ResScalar; @@ -211,8 +201,22 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads // Here, nc is chosen such that a block of kc x nc of the rhs fit within half of L2. // The second half is implicitly reserved to access the result and lhs coefficients. // When k= Index(Traits::nr*sizeof(RhsScalar))*k) + { + // L1 blocking + max_nc = remaining_l1 / (k*sizeof(RhsScalar)); + } + else + { + // L2 blocking + max_nc = (3*actual_l2)/(2*2*max_kc*sizeof(RhsScalar)); + } // WARNING Below, we assume that Traits::nr is a power of two. Index nc = std::min(actual_l2/(2*k*sizeof(RhsScalar)), max_nc) & (~(Traits::nr-1)); if(n>nc) @@ -228,6 +232,7 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads { // So far, no blocking at all, i.e., kc==k, and nc==n. // In this case, let's perform a blocking over the rows such that the packed lhs data is kept in cache L1/L2 + // TODO: part of this blocking strategy is now implemented within the kernel itself, so the L1-based heuristic here should be obsolete. Index problem_size = k*n*sizeof(LhsScalar); Index actual_lm = actual_l2; Index max_mc = m; @@ -254,6 +259,60 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads } } +inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n) +{ +#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES + if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) { + k = std::min(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K); + m = std::min(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M); + n = std::min(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N); + return true; + } +#else + EIGEN_UNUSED_VARIABLE(k) + EIGEN_UNUSED_VARIABLE(m) + EIGEN_UNUSED_VARIABLE(n) +#endif + return false; +} + +/** \brief Computes the blocking parameters for a m x k times k x n matrix product + * + * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension. + * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension. + * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension. + * + * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar, + * this function computes the blocking size parameters along the respective dimensions + * for matrix products and related algorithms. + * + * The blocking size parameters may be evaluated: + * - either by a heuristic based on cache sizes; + * - or using a precomputed lookup table; + * - or using fixed prescribed values (for testing purposes). + * + * \sa setCpuCacheSizes */ + +template +void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1) +{ + if (!useSpecificBlockingSizes(k, m, n)) { + if (!lookupBlockingSizesFromTable(k, m, n, num_threads)) { + evaluateProductBlockingSizesHeuristic(k, m, n, num_threads); + } + } + + typedef gebp_traits Traits; + enum { + kr = 8, + mr = Traits::mr, + nr = Traits::nr + }; + if (k > kr) k -= k % kr; + if (m > mr) m -= m % mr; + if (n > nr) n -= n % nr; +} + template inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1) { @@ -949,19 +1008,31 @@ void gebp_kernel=3*Traits::LhsProgress) - { + { PossiblyRotatingKernelHelper possiblyRotatingKernelHelper(traits); - - // loops on each largest micro horizontal panel of lhs (3*Traits::LhsProgress x depth) - for(Index i=0; i(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) )); + for(Index i1=0; i1=2*Traits::LhsProgress) { - // loops on each largest micro horizontal panel of lhs (2*LhsProgress x depth) - for(Index i=peeled_mc3; i(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) )); + + for(Index i1=peeled_mc3; i1 static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha) { eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols()); + if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0) + return; typename internal::add_const_on_value_type::type lhs = LhsBlasTraits::extract(a_lhs); typename internal::add_const_on_value_type::type rhs = RhsBlasTraits::extract(a_rhs); diff --git a/Eigen/src/Core/products/LookupBlockingSizesTable.h b/Eigen/src/Core/products/LookupBlockingSizesTable.h new file mode 100644 index 000000000..5ab4525df --- /dev/null +++ b/Eigen/src/Core/products/LookupBlockingSizesTable.h @@ -0,0 +1,89 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H +#define EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H + +namespace Eigen { + +namespace internal { + +template ::NumSizes != 0 > +struct LookupBlockingSizesFromTableImpl +{ + static bool run(Index&, Index&, Index&, Index) + { + return false; + } +}; + +inline size_t floor_log2_helper(unsigned short& x, size_t offset) +{ + unsigned short y = x >> offset; + if (y) { + x = y; + return offset; + } else { + return 0; + } +} + +inline size_t floor_log2(unsigned short x) +{ + return floor_log2_helper(x, 8) + + floor_log2_helper(x, 4) + + floor_log2_helper(x, 2) + + floor_log2_helper(x, 1); +} + +inline size_t ceil_log2(unsigned short x) +{ + return x > 1 ? floor_log2(x - 1) + 1 : 0; +} + +template +struct LookupBlockingSizesFromTableImpl +{ + static bool run(Index& k, Index& m, Index& n, Index) + { + using std::min; + using std::max; + typedef BlockingSizesLookupTable Table; + const unsigned short minsize = Table::BaseSize; + const unsigned short maxsize = minsize << (Table::NumSizes - 1); + const unsigned short k_clamped = max(minsize, min(k, maxsize)); + const unsigned short m_clamped = max(minsize, min(m, maxsize)); + const unsigned short n_clamped = max(minsize, min(n, maxsize)); + const size_t k_index = ceil_log2(k_clamped / minsize); + const size_t m_index = ceil_log2(m_clamped / minsize); + const size_t n_index = ceil_log2(n_clamped / minsize); + const size_t index = n_index + Table::NumSizes * (m_index + Table::NumSizes * k_index); + const unsigned short table_entry = Table::Data()[index]; + k = min(k, 1 << ((table_entry & 0xf00) >> 8)); + m = min(m, 1 << ((table_entry & 0x0f0) >> 4)); + n = min(n, 1 << ((table_entry & 0x00f) >> 0)); + return true; + } +}; + +template +bool lookupBlockingSizesFromTable(Index& k, Index& m, Index& n, Index num_threads) +{ + return LookupBlockingSizesFromTableImpl::run(k, m, n, num_threads); +} + +} + +} + +#endif // EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 9bfa45106..ffeb5ac5f 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -214,7 +214,7 @@ class blas_data_mapper { } template - EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, SubPacket p) const { + EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const { pscatter(&operator()(i, j), p, m_stride); } diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 0d24beb5a..503d5acdf 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -288,6 +288,14 @@ struct stem_function typedef std::complex::Real> ComplexScalar; typedef ComplexScalar type(ComplexScalar, int); }; + +template +struct BlockingSizesLookupTable +{ + static const size_t NumSizes = 0; +}; + } } // end namespace Eigen diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index aaea9f035..6b294e77f 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -318,6 +318,9 @@ // Defined the boundary (in bytes) on which the data needs to be aligned. Note // that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be // aligned at all regardless of the value of this #define. +// TODO should be renamed EIGEN_MAXIMAL_ALIGN_BYTES, +// for instance with AVX 1 EIGEN_MAXIMAL_ALIGN_BYTES=32 while for 'int' 16 bytes alignment is always enough, +// and 16 bytes alignment is also enough for Vector4f. #define EIGEN_ALIGN_BYTES 16 #ifdef EIGEN_DONT_ALIGN diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 528ebe297..562f425bd 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -159,13 +159,16 @@ class compute_matrix_evaluator_flags enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0, is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic, + + // TODO: should check for smaller packet types once we can handle multi-sized packet types + align_bytes = int(packet_traits::size) * sizeof(Scalar), aligned_bit = ( ((Options&DontAlign)==0) && ( #if EIGEN_ALIGN_STATICALLY - ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) + ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0)) #else 0 #endif diff --git a/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/Eigen/src/Eigenvalues/ComplexEigenSolver.h index 075a62848..6b010c312 100644 --- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h +++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h @@ -234,6 +234,12 @@ template class ComplexEigenSolver } protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + EigenvectorType m_eivec; EigenvalueType m_eivalues; ComplexSchur m_schur; @@ -251,6 +257,8 @@ template ComplexEigenSolver& ComplexEigenSolver::compute(const MatrixType& matrix, bool computeEigenvectors) { + check_template_parameters(); + // this code is inspired from Jampack eigen_assert(matrix.cols() == matrix.rows()); diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h index a63a42341..167cd99ab 100644 --- a/Eigen/src/Eigenvalues/EigenSolver.h +++ b/Eigen/src/Eigenvalues/EigenSolver.h @@ -299,6 +299,13 @@ template class EigenSolver void doComputeEigenvectors(); protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + EIGEN_STATIC_ASSERT(!NumTraits::IsComplex, NUMERIC_TYPE_MUST_BE_REAL); + } + MatrixType m_eivec; EigenvalueType m_eivalues; bool m_isInitialized; @@ -366,6 +373,8 @@ template EigenSolver& EigenSolver::compute(const MatrixType& matrix, bool computeEigenvectors) { + check_template_parameters(); + using std::sqrt; using std::abs; using numext::isfinite; diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index c9da6740a..e2e28cd4a 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -263,6 +263,13 @@ template class GeneralizedEigenSolver } protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + EIGEN_STATIC_ASSERT(!NumTraits::IsComplex, NUMERIC_TYPE_MUST_BE_REAL); + } + MatrixType m_eivec; ComplexVectorType m_alphas; VectorType m_betas; @@ -290,6 +297,8 @@ template GeneralizedEigenSolver& GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors) { + check_template_parameters(); + using std::sqrt; using std::abs; eigen_assert(A.cols() == A.rows() && B.cols() == A.rows() && B.cols() == B.rows()); diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h index 66d1154cf..1dcfacf0b 100644 --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h @@ -347,6 +347,11 @@ template class SelfAdjointEigenSolver static const int m_maxIterations = 30; protected: + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_eivec; RealVectorType m_eivalues; typename TridiagonalizationType::SubDiagonalType m_subdiag; @@ -382,6 +387,8 @@ EIGEN_DEVICE_FUNC SelfAdjointEigenSolver& SelfAdjointEigenSolver ::compute(const MatrixType& matrix, int options) { + check_template_parameters(); + using std::abs; eigen_assert(matrix.cols() == matrix.rows()); eigen_assert((options&~(EigVecMask|GenEigMask))==0 diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index e1ad803bb..e90ce77eb 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -441,7 +441,7 @@ QuaternionBase::operator* (const QuaternionBase& other) c YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) return internal::quat_product::Scalar, - internal::traits::IsAligned && internal::traits::IsAligned>::run(*this, other); + (internal::traits::IsAligned && internal::traits::IsAligned)?Aligned:Unaligned>::run(*this, other); } /** \sa operator*(Quaternion) */ @@ -646,6 +646,16 @@ inline Quaternion::Scalar> QuaternionBase struct quat_conj +{ + static EIGEN_STRONG_INLINE Quaternion run(const QuaternionBase& q){ + return Quaternion(q.w(),-q.x(),-q.y(),-q.z()); + } +}; +} + /** \returns the conjugate of the \c *this which is equal to the multiplicative inverse * if the quaternion is normalized. * The conjugate of a quaternion represents the opposite rotation. @@ -656,7 +666,10 @@ template inline Quaternion::Scalar> QuaternionBase::conjugate() const { - return Quaternion(this->w(),-this->x(),-this->y(),-this->z()); + return internal::quat_conj::Scalar, + internal::traits::IsAligned?Aligned:Unaligned>::run(*this); + } /** \returns the angle (in radian) between two rotations @@ -667,12 +680,10 @@ template inline typename internal::traits::Scalar QuaternionBase::angularDistance(const QuaternionBase& other) const { - using std::acos; + using std::atan2; using std::abs; - Scalar d = abs(this->dot(other)); - if (d>=Scalar(1)) - return Scalar(0); - return Scalar(2) * acos(d); + Quaternion d = (*this) * other.conjugate(); + return Scalar(2) * atan2( d.vec().norm(), abs(d.w()) ); } diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h index 3d8284f2d..e59c32c56 100644 --- a/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -20,23 +20,35 @@ struct quat_product { static inline Quaternion run(const QuaternionBase& _a, const QuaternionBase& _b) { - const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0,0,0,0x80000000)); Quaternion res; + const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f); __m128 a = _a.coeffs().template packet(0); __m128 b = _b.coeffs().template packet(0); - __m128 flip1 = _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a,1,2,0,2), - vec4f_swizzle1(b,2,0,1,2)),mask); - __m128 flip2 = _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a,3,3,3,1), - vec4f_swizzle1(b,0,1,2,1)),mask); + __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2)); + __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1)); pstore(&res.x(), _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)), _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0), vec4f_swizzle1(b,1,2,0,0))), - _mm_add_ps(flip1,flip2))); + _mm_xor_ps(mask,_mm_add_ps(s1,s2)))); + return res; } }; +template +struct quat_conj +{ + static inline Quaternion run(const QuaternionBase& q) + { + Quaternion res; + const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f); + pstore(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet(0))); + return res; + } +}; + + template struct cross3_impl { @@ -56,8 +68,8 @@ struct cross3_impl -template -struct quat_product +template +struct quat_product { static inline Quaternion run(const QuaternionBase& _a, const QuaternionBase& _b) { @@ -66,8 +78,8 @@ struct quat_product Quaternion res; const double* a = _a.coeffs().data(); - Packet2d b_xy = _b.coeffs().template packet(0); - Packet2d b_zw = _b.coeffs().template packet(2); + Packet2d b_xy = _b.coeffs().template packet(0); + Packet2d b_zw = _b.coeffs().template packet(2); Packet2d a_xx = pset1(a[0]); Packet2d a_yy = pset1(a[1]); Packet2d a_zz = pset1(a[2]); @@ -108,6 +120,20 @@ struct quat_product } }; +template +struct quat_conj +{ + static inline Quaternion run(const QuaternionBase& q) + { + Quaternion res; + const __m128d mask0 = _mm_setr_pd(-0.,-0.); + const __m128d mask2 = _mm_setr_pd(-0.,0.); + pstore(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet(0))); + pstore(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet(2))); + return res; + } +}; + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h index a09f81225..3710a8209 100644 --- a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +++ b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h @@ -17,9 +17,9 @@ namespace Eigen { * * This class allows to approximately solve for A.x = b problems assuming A is a diagonal matrix. * In other words, this preconditioner neglects all off diagonal entries and, in Eigen's language, solves for: - * \code - * A.diagonal().asDiagonal() . x = b - * \endcode + \code + A.diagonal().asDiagonal() . x = b + \endcode * * \tparam _Scalar the type of the scalar. * @@ -28,6 +28,7 @@ namespace Eigen { * * \note A variant that has yet to be implemented would attempt to preserve the norm of each column. * + * \sa class LeastSquareDiagonalPreconditioner, class ConjugateGradient */ template class DiagonalPreconditioner @@ -100,6 +101,69 @@ class DiagonalPreconditioner bool m_isInitialized; }; +/** \ingroup IterativeLinearSolvers_Module + * \brief Jacobi preconditioner for LeastSquaresConjugateGradient + * + * This class allows to approximately solve for A' A x = A' b problems assuming A' A is a diagonal matrix. + * In other words, this preconditioner neglects all off diagonal entries and, in Eigen's language, solves for: + \code + (A.adjoint() * A).diagonal().asDiagonal() * x = b + \endcode + * + * \tparam _Scalar the type of the scalar. + * + * The diagonal entries are pre-inverted and stored into a dense vector. + * + * \sa class LeastSquaresConjugateGradient, class DiagonalPreconditioner + */ +template +class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar> +{ + typedef _Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef DiagonalPreconditioner<_Scalar> Base; + using Base::m_invdiag; + public: + + LeastSquareDiagonalPreconditioner() : Base() {} + + template + explicit LeastSquareDiagonalPreconditioner(const MatType& mat) : Base() + { + compute(mat); + } + + template + LeastSquareDiagonalPreconditioner& analyzePattern(const MatType& ) + { + return *this; + } + + template + LeastSquareDiagonalPreconditioner& factorize(const MatType& mat) + { + // Compute the inverse squared-norm of each column of mat + m_invdiag.resize(mat.cols()); + for(Index j=0; j0) + m_invdiag(j) = RealScalar(1)/sum; + else + m_invdiag(j) = RealScalar(1); + } + Base::m_isInitialized = true; + return *this; + } + + template + LeastSquareDiagonalPreconditioner& compute(const MatType& mat) + { + return factorize(mat); + } + + protected: +}; /** \ingroup IterativeLinearSolvers_Module * \brief A naive preconditioner which approximates any matrix as the identity matrix diff --git a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h index a799c3ef5..9e7dd1404 100644 --- a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +++ b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h @@ -60,29 +60,29 @@ void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x, } VectorType p(n); - p = precond.solve(residual); //initial search direction + p = precond.solve(residual); // initial search direction VectorType z(n), tmp(n); RealScalar absNew = numext::real(residual.dot(p)); // the square of the absolute value of r scaled by invM Index i = 0; while(i < maxIters) { - tmp.noalias() = mat * p; // the bottleneck of the algorithm + tmp.noalias() = mat * p; // the bottleneck of the algorithm - Scalar alpha = absNew / p.dot(tmp); // the amount we travel on dir - x += alpha * p; // update solution - residual -= alpha * tmp; // update residue + Scalar alpha = absNew / p.dot(tmp); // the amount we travel on dir + x += alpha * p; // update solution + residual -= alpha * tmp; // update residual residualNorm2 = residual.squaredNorm(); if(residualNorm2 < threshold) break; - z = precond.solve(residual); // approximately solve for "A z = residual" + z = precond.solve(residual); // approximately solve for "A z = residual" RealScalar absOld = absNew; absNew = numext::real(residual.dot(z)); // update the absolute value of r - RealScalar beta = absNew / absOld; // calculate the Gram-Schmidt value used to create the new search direction - p = z + beta * p; // update search direction + RealScalar beta = absNew / absOld; // calculate the Gram-Schmidt value used to create the new search direction + p = z + beta * p; // update search direction i++; } tol_error = sqrt(residualNorm2 / rhsNorm2); @@ -122,24 +122,24 @@ struct traits > * and NumTraits::epsilon() for the tolerance. * * This class can be used as the direct solver classes. Here is a typical usage example: - * \code - * int n = 10000; - * VectorXd x(n), b(n); - * SparseMatrix A(n,n); - * // fill A and b - * ConjugateGradient > cg; - * cg.compute(A); - * x = cg.solve(b); - * std::cout << "#iterations: " << cg.iterations() << std::endl; - * std::cout << "estimated error: " << cg.error() << std::endl; - * // update b, and solve again - * x = cg.solve(b); - * \endcode + \code + int n = 10000; + VectorXd x(n), b(n); + SparseMatrix A(n,n); + // fill A and b + ConjugateGradient > cg; + cg.compute(A); + x = cg.solve(b); + std::cout << "#iterations: " << cg.iterations() << std::endl; + std::cout << "estimated error: " << cg.error() << std::endl; + // update b, and solve again + x = cg.solve(b); + \endcode * * By default the iterations start with x=0 as an initial guess of the solution. * One can control the start using the solveWithGuess() method. * - * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner + * \sa class LeastSquaresConjugateGradient, class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner */ template< typename _MatrixType, int _UpLo, typename _Preconditioner> class ConjugateGradient : public IterativeSolverBase > @@ -185,7 +185,7 @@ public: { typedef typename internal::conditional&, - SparseSelfAdjointView, UpLo> + typename Ref::template ConstSelfAdjointViewReturnType::Type >::type MatrixWrapperType; m_iterations = Base::maxIterations(); m_error = Base::m_tolerance; @@ -208,7 +208,7 @@ public: template void _solve_impl(const MatrixBase& b, Dest& x) const { - x.setOnes(); + x.setZero(); _solve_with_guess_impl(b.derived(),x); } diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h index 6d63d45e4..b7f8debb3 100644 --- a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +++ b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h @@ -93,21 +93,23 @@ Index QuickSplit(VectorV &row, VectorI &ind, Index ncut) * alternatively, on GMANE: * http://comments.gmane.org/gmane.comp.lib.eigen/3302 */ -template -class IncompleteLUT : public SparseSolverBase > +template +class IncompleteLUT : public SparseSolverBase > { protected: - typedef SparseSolverBase > Base; + typedef SparseSolverBase Base; using Base::m_isInitialized; public: typedef _Scalar Scalar; + typedef _StorageIndex StorageIndex; typedef typename NumTraits::Real RealScalar; typedef Matrix Vector; - typedef SparseMatrix FactorType; - typedef SparseMatrix PermutType; - typedef typename FactorType::StorageIndex StorageIndex; + typedef Matrix VectorI; + typedef SparseMatrix FactorType; public: + + // this typedef is only to export the scalar type and compile-time dimensions to solve_retval typedef Matrix MatrixType; IncompleteLUT() @@ -151,7 +153,7 @@ class IncompleteLUT : public SparseSolverBase > * **/ template - IncompleteLUT& compute(const MatrixType& amat) + IncompleteLUT& compute(const MatrixType& amat) { analyzePattern(amat); factorize(amat); @@ -197,8 +199,8 @@ protected: * Set control parameter droptol * \param droptol Drop any element whose magnitude is less than this tolerance **/ -template -void IncompleteLUT::setDroptol(const RealScalar& droptol) +template +void IncompleteLUT::setDroptol(const RealScalar& droptol) { this->m_droptol = droptol; } @@ -207,15 +209,15 @@ void IncompleteLUT::setDroptol(const RealScalar& droptol) * Set control parameter fillfactor * \param fillfactor This is used to compute the number @p fill_in of largest elements to keep on each row. **/ -template -void IncompleteLUT::setFillfactor(int fillfactor) +template +void IncompleteLUT::setFillfactor(int fillfactor) { this->m_fillfactor = fillfactor; } -template +template template -void IncompleteLUT::analyzePattern(const _MatrixType& amat) +void IncompleteLUT::analyzePattern(const _MatrixType& amat) { // Compute the Fill-reducing permutation SparseMatrix mat1 = amat; @@ -232,9 +234,9 @@ void IncompleteLUT::analyzePattern(const _MatrixType& amat) m_analysisIsOk = true; } -template +template template -void IncompleteLUT::factorize(const _MatrixType& amat) +void IncompleteLUT::factorize(const _MatrixType& amat) { using std::sqrt; using std::swap; @@ -246,8 +248,8 @@ void IncompleteLUT::factorize(const _MatrixType& amat) m_lu.resize(n,n); // Declare Working vectors and variables Vector u(n) ; // real values of the row -- maximum size is n -- - VectorXi ju(n); // column position of the values in u -- maximum size is n - VectorXi jr(n); // Indicate the position of the nonzero elements in the vector u -- A zero location is indicated by -1 + VectorI ju(n); // column position of the values in u -- maximum size is n + VectorI jr(n); // Indicate the position of the nonzero elements in the vector u -- A zero location is indicated by -1 // Apply the fill-reducing permutation eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); @@ -398,7 +400,7 @@ void IncompleteLUT::factorize(const _MatrixType& amat) sizel = len; len = (std::min)(sizel, nnzL); typename Vector::SegmentReturnType ul(u.segment(0, sizel)); - typename VectorXi::SegmentReturnType jul(ju.segment(0, sizel)); + typename VectorI::SegmentReturnType jul(ju.segment(0, sizel)); internal::QuickSplit(ul, jul, len); // store the largest m_fill elements of the L part @@ -427,14 +429,13 @@ void IncompleteLUT::factorize(const _MatrixType& amat) sizeu = len + 1; // +1 to take into account the diagonal element len = (std::min)(sizeu, nnzU); typename Vector::SegmentReturnType uu(u.segment(ii+1, sizeu-1)); - typename VectorXi::SegmentReturnType juu(ju.segment(ii+1, sizeu-1)); + typename VectorI::SegmentReturnType juu(ju.segment(ii+1, sizeu-1)); internal::QuickSplit(uu, juu, len); // store the largest elements of the U part for(Index k = ii + 1; k < ii + len; k++) m_lu.insertBackByOuterInnerUnordered(ii,ju(k)) = u(k); } - m_lu.finalize(); m_lu.makeCompressed(); diff --git a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h index 46bc0ac78..6477b9de2 100644 --- a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +++ b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h @@ -52,9 +52,9 @@ public: * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - template - explicit IterativeSolverBase(const SparseMatrixBase& A) - : mp_matrix(A) + template + explicit IterativeSolverBase(const EigenBase& A) + : mp_matrix(A.derived()) { init(); compute(mp_matrix); @@ -67,8 +67,8 @@ public: * Currently, this function mostly calls analyzePattern on the preconditioner. In the future * we might, for instance, implement column reordering for faster matrix vector products. */ - template - Derived& analyzePattern(const SparseMatrixBase& A) + template + Derived& analyzePattern(const EigenBase& A) { grab(A.derived()); m_preconditioner.analyzePattern(mp_matrix); @@ -87,8 +87,8 @@ public: * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - template - Derived& factorize(const SparseMatrixBase& A) + template + Derived& factorize(const EigenBase& A) { eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); grab(A.derived()); @@ -108,8 +108,8 @@ public: * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - template - Derived& compute(const SparseMatrixBase& A) + template + Derived& compute(const EigenBase& A) { grab(A.derived()); m_preconditioner.compute(mp_matrix); @@ -223,11 +223,11 @@ protected: m_tolerance = NumTraits::epsilon(); } - template - void grab(const SparseMatrixBase &A) + template + void grab(const EigenBase &A) { mp_matrix.~Ref(); - ::new (&mp_matrix) Ref(A); + ::new (&mp_matrix) Ref(A.derived()); } void grab(const Ref &A) diff --git a/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h b/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h new file mode 100644 index 000000000..1d819927e --- /dev/null +++ b/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h @@ -0,0 +1,213 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LEAST_SQUARE_CONJUGATE_GRADIENT_H +#define EIGEN_LEAST_SQUARE_CONJUGATE_GRADIENT_H + +namespace Eigen { + +namespace internal { + +/** \internal Low-level conjugate gradient algorithm for least-square problems + * \param mat The matrix A + * \param rhs The right hand side vector b + * \param x On input and initial solution, on output the computed solution. + * \param precond A preconditioner being able to efficiently solve for an + * approximation of A'Ax=b (regardless of b) + * \param iters On input the max number of iteration, on output the number of performed iterations. + * \param tol_error On input the tolerance error, on output an estimation of the relative error. + */ +template +EIGEN_DONT_INLINE +void least_square_conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x, + const Preconditioner& precond, Index& iters, + typename Dest::RealScalar& tol_error) +{ + using std::sqrt; + using std::abs; + typedef typename Dest::RealScalar RealScalar; + typedef typename Dest::Scalar Scalar; + typedef Matrix VectorType; + + RealScalar tol = tol_error; + Index maxIters = iters; + + Index m = mat.rows(), n = mat.cols(); + + VectorType residual = rhs - mat * x; + VectorType normal_residual = mat.adjoint() * residual; + + RealScalar rhsNorm2 = (mat.adjoint()*rhs).squaredNorm(); + if(rhsNorm2 == 0) + { + x.setZero(); + iters = 0; + tol_error = 0; + return; + } + RealScalar threshold = tol*tol*rhsNorm2; + RealScalar residualNorm2 = normal_residual.squaredNorm(); + if (residualNorm2 < threshold) + { + iters = 0; + tol_error = sqrt(residualNorm2 / rhsNorm2); + return; + } + + VectorType p(n); + p = precond.solve(normal_residual); // initial search direction + + VectorType z(n), tmp(m); + RealScalar absNew = numext::real(normal_residual.dot(p)); // the square of the absolute value of r scaled by invM + Index i = 0; + while(i < maxIters) + { + tmp.noalias() = mat * p; + + Scalar alpha = absNew / tmp.squaredNorm(); // the amount we travel on dir + x += alpha * p; // update solution + residual -= alpha * tmp; // update residual + normal_residual = mat.adjoint() * residual; // update residual of the normal equation + + residualNorm2 = normal_residual.squaredNorm(); + if(residualNorm2 < threshold) + break; + + z = precond.solve(normal_residual); // approximately solve for "A'A z = normal_residual" + + RealScalar absOld = absNew; + absNew = numext::real(normal_residual.dot(z)); // update the absolute value of r + RealScalar beta = absNew / absOld; // calculate the Gram-Schmidt value used to create the new search direction + p = z + beta * p; // update search direction + i++; + } + tol_error = sqrt(residualNorm2 / rhsNorm2); + iters = i; +} + +} + +template< typename _MatrixType, + typename _Preconditioner = LeastSquareDiagonalPreconditioner > +class LeastSquaresConjugateGradient; + +namespace internal { + +template< typename _MatrixType, typename _Preconditioner> +struct traits > +{ + typedef _MatrixType MatrixType; + typedef _Preconditioner Preconditioner; +}; + +} + +/** \ingroup IterativeLinearSolvers_Module + * \brief A conjugate gradient solver for sparse (or dense) least-square problems + * + * This class allows to solve for A x = b linear problems using an iterative conjugate gradient algorithm. + * The matrix A can be non symmetric and rectangular, but the matrix A' A should be positive-definite to guaranty stability. + * Otherwise, the SparseLU or SparseQR classes might be preferable. + * The matrix A and the vectors x and b can be either dense or sparse. + * + * \tparam _MatrixType the type of the matrix A, can be a dense or a sparse matrix. + * \tparam _Preconditioner the type of the preconditioner. Default is LeastSquareDiagonalPreconditioner + * + * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations() + * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations + * and NumTraits::epsilon() for the tolerance. + * + * This class can be used as the direct solver classes. Here is a typical usage example: + \code + int m=1000000, n = 10000; + VectorXd x(n), b(m); + SparseMatrix A(m,n); + // fill A and b + LeastSquaresConjugateGradient > lscg; + lscg.compute(A); + x = lscg.solve(b); + std::cout << "#iterations: " << lscg.iterations() << std::endl; + std::cout << "estimated error: " << lscg.error() << std::endl; + // update b, and solve again + x = lscg.solve(b); + \endcode + * + * By default the iterations start with x=0 as an initial guess of the solution. + * One can control the start using the solveWithGuess() method. + * + * \sa class ConjugateGradient, SparseLU, SparseQR + */ +template< typename _MatrixType, typename _Preconditioner> +class LeastSquaresConjugateGradient : public IterativeSolverBase > +{ + typedef IterativeSolverBase Base; + using Base::mp_matrix; + using Base::m_error; + using Base::m_iterations; + using Base::m_info; + using Base::m_isInitialized; +public: + typedef _MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef _Preconditioner Preconditioner; + +public: + + /** Default constructor. */ + LeastSquaresConjugateGradient() : Base() {} + + /** Initialize the solver with matrix \a A for further \c Ax=b solving. + * + * This constructor is a shortcut for the default constructor followed + * by a call to compute(). + * + * \warning this class stores a reference to the matrix A as well as some + * precomputed values that depend on it. Therefore, if \a A is changed + * this class becomes invalid. Call compute() to update it with the new + * matrix A, or modify a copy of A. + */ + explicit LeastSquaresConjugateGradient(const MatrixType& A) : Base(A) {} + + ~LeastSquaresConjugateGradient() {} + + /** \internal */ + template + void _solve_with_guess_impl(const Rhs& b, Dest& x) const + { + m_iterations = Base::maxIterations(); + m_error = Base::m_tolerance; + + for(Index j=0; j + void _solve_impl(const MatrixBase& b, Dest& x) const + { + x.setZero(); + _solve_with_guess_impl(b.derived(),x); + } + +}; + +} // end namespace Eigen + +#endif // EIGEN_LEAST_SQUARE_CONJUGATE_GRADIENT_H diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index 49c8b183d..75dbc16b0 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -390,6 +390,12 @@ template class FullPivLU #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_lu; PermutationPType m_p; PermutationQType m_q; @@ -434,6 +440,8 @@ FullPivLU::FullPivLU(const MatrixType& matrix) template FullPivLU& FullPivLU::compute(const MatrixType& matrix) { + check_template_parameters(); + // the permutations are stored as int indices, so just to be sure: eigen_assert(matrix.rows()<=NumTraits::highest() && matrix.cols()<=NumTraits::highest()); @@ -459,14 +467,16 @@ FullPivLU& FullPivLU::compute(const MatrixType& matrix) // biggest coefficient in the remaining bottom-right corner (starting at row k, col k) Index row_of_biggest_in_corner, col_of_biggest_in_corner; - RealScalar biggest_in_corner; + typedef internal::scalar_score_coeff_op Scoring; + typedef typename Scoring::result_type Score; + Score biggest_in_corner; biggest_in_corner = m_lu.bottomRightCorner(rows-k, cols-k) - .cwiseAbs() + .unaryExpr(Scoring()) .maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner); row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner, col_of_biggest_in_corner += k; // need to add k to them. - if(biggest_in_corner==RealScalar(0)) + if(biggest_in_corner==Score(0)) { // before exiting, make sure to initialize the still uninitialized transpositions // in a sane state without destroying what we already have. @@ -479,7 +489,8 @@ FullPivLU& FullPivLU::compute(const MatrixType& matrix) break; } - if(biggest_in_corner > m_maxpivot) m_maxpivot = biggest_in_corner; + RealScalar abs_pivot = internal::abs_knowing_score()(m_lu(row_of_biggest_in_corner, col_of_biggest_in_corner), biggest_in_corner); + if(abs_pivot > m_maxpivot) m_maxpivot = abs_pivot; // Now that we've found the pivot, we need to apply the row/col swaps to // bring it to the location (k,k). diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index e57b36bc5..019fc4230 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -209,6 +209,12 @@ template class PartialPivLU #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_lu; PermutationType m_p; TranspositionType m_rowsTranspositions; @@ -275,6 +281,8 @@ struct partial_lu_impl */ static Index unblocked_lu(MatrixType& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions) { + typedef scalar_score_coeff_op Scoring; + typedef typename Scoring::result_type Score; const Index rows = lu.rows(); const Index cols = lu.cols(); const Index size = (std::min)(rows,cols); @@ -286,13 +294,13 @@ struct partial_lu_impl Index rcols = cols-k-1; Index row_of_biggest_in_col; - RealScalar biggest_in_corner - = lu.col(k).tail(rows-k).cwiseAbs().maxCoeff(&row_of_biggest_in_col); + Score biggest_in_corner + = lu.col(k).tail(rows-k).unaryExpr(Scoring()).maxCoeff(&row_of_biggest_in_col); row_of_biggest_in_col += k; row_transpositions[k] = PivIndex(row_of_biggest_in_col); - if(biggest_in_corner != RealScalar(0)) + if(biggest_in_corner != Score(0)) { if(k != row_of_biggest_in_col) { @@ -423,6 +431,8 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t template PartialPivLU& PartialPivLU::compute(const MatrixType& matrix) { + check_template_parameters(); + // the row permutation is stored as int indices, so just to be sure: eigen_assert(matrix.rows()::highest()); diff --git a/Eigen/src/OrderingMethods/Amd.h b/Eigen/src/OrderingMethods/Amd.h index 3d2981f0c..63d996cb4 100644 --- a/Eigen/src/OrderingMethods/Amd.h +++ b/Eigen/src/OrderingMethods/Amd.h @@ -137,22 +137,27 @@ void minimum_degree_ordering(SparseMatrix& C, Perm degree[i] = len[i]; // degree of node i } mark = internal::cs_wclear(0, 0, w, n); /* clear w */ - elen[n] = -2; /* n is a dead element */ - Cp[n] = -1; /* n is a root of assembly tree */ - w[n] = 0; /* n is a dead element */ /* --- Initialize degree lists ------------------------------------------ */ for(i = 0; i < n; i++) { + bool has_diag = false; + for(p = Cp[i]; p dense) /* node i is dense */ + else if(d > dense || !has_diag) /* node i is dense or has no structural diagonal element */ { nv[i] = 0; /* absorb i into element n */ elen[i] = -1; /* node i is dead */ @@ -168,6 +173,10 @@ void minimum_degree_ordering(SparseMatrix& C, Perm } } + elen[n] = -2; /* n is a dead element */ + Cp[n] = -1; /* n is a root of assembly tree */ + w[n] = 0; /* n is a dead element */ + while (nel < n) /* while (selecting pivots) do */ { /* --- Select node of minimum approximate degree -------------------- */ diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index e88e637a4..cb838d04a 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -90,11 +90,11 @@ class AMDOrdering * \note Returns an empty permutation matrix * \tparam Index The type of indices of the matrix */ -template +template class NaturalOrdering { public: - typedef PermutationMatrix PermutationType; + typedef PermutationMatrix PermutationType; /** Compute the permutation vector from a column-major sparse matrix */ template diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h index 03ff0a8f2..7b3842cbe 100644 --- a/Eigen/src/QR/ColPivHouseholderQR.h +++ b/Eigen/src/QR/ColPivHouseholderQR.h @@ -398,6 +398,12 @@ template class ColPivHouseholderQR #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_qr; HCoeffsType m_hCoeffs; PermutationType m_colsPermutation; @@ -436,6 +442,8 @@ typename MatrixType::RealScalar ColPivHouseholderQR::logAbsDetermina template ColPivHouseholderQR& ColPivHouseholderQR::compute(const MatrixType& matrix) { + check_template_parameters(); + using std::abs; Index rows = matrix.rows(); Index cols = matrix.cols(); diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h index 7d5e58d2f..4c2c958a8 100644 --- a/Eigen/src/QR/FullPivHouseholderQR.h +++ b/Eigen/src/QR/FullPivHouseholderQR.h @@ -380,6 +380,12 @@ template class FullPivHouseholderQR #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_qr; HCoeffsType m_hCoeffs; IntDiagSizeVectorType m_rows_transpositions; @@ -419,6 +425,8 @@ typename MatrixType::RealScalar FullPivHouseholderQR::logAbsDetermin template FullPivHouseholderQR& FullPivHouseholderQR::compute(const MatrixType& matrix) { + check_template_parameters(); + using std::abs; Index rows = matrix.rows(); Index cols = matrix.cols(); @@ -443,13 +451,15 @@ FullPivHouseholderQR& FullPivHouseholderQR::compute(cons for (Index k = 0; k < size; ++k) { Index row_of_biggest_in_corner, col_of_biggest_in_corner; - RealScalar biggest_in_corner; + typedef internal::scalar_score_coeff_op Scoring; + typedef typename Scoring::result_type Score; - biggest_in_corner = m_qr.bottomRightCorner(rows-k, cols-k) - .cwiseAbs() - .maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner); + Score score = m_qr.bottomRightCorner(rows-k, cols-k) + .unaryExpr(Scoring()) + .maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner); row_of_biggest_in_corner += k; col_of_biggest_in_corner += k; + RealScalar biggest_in_corner = internal::abs_knowing_score()(m_qr(row_of_biggest_in_corner, col_of_biggest_in_corner), score); if(k==0) biggest = biggest_in_corner; // if the corner is negligible, then we have less than full rank, and we can finish early diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h index 195bacb85..878654be5 100644 --- a/Eigen/src/QR/HouseholderQR.h +++ b/Eigen/src/QR/HouseholderQR.h @@ -196,6 +196,12 @@ template class HouseholderQR #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + MatrixType m_qr; HCoeffsType m_hCoeffs; RowVectorType m_temp; @@ -348,6 +354,8 @@ void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) c template HouseholderQR& HouseholderQR::compute(const MatrixType& matrix) { + check_template_parameters(); + Index rows = matrix.rows(); Index cols = matrix.cols(); Index size = (std::min)(rows,cols); diff --git a/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h index 8903755e7..ad191085e 100644 --- a/Eigen/src/SVD/SVDBase.h +++ b/Eigen/src/SVD/SVDBase.h @@ -130,9 +130,10 @@ public: inline Index rank() const { using std::abs; + using std::max; eigen_assert(m_isInitialized && "JacobiSVD is not initialized."); if(m_singularValues.size()==0) return 0; - RealScalar premultiplied_threshold = m_singularValues.coeff(0) * threshold(); + RealScalar premultiplied_threshold = (max)(m_singularValues.coeff(0) * threshold(), (std::numeric_limits::min)()); Index i = m_nonzeroSingularValues-1; while(i>=0 && m_singularValues.coeff(i) < premultiplied_threshold) --i; return i+1; @@ -217,6 +218,12 @@ public: #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + // return true if already allocated bool allocate(Index rows, Index cols, unsigned int computationOptions) ; @@ -240,7 +247,9 @@ protected: m_usePrescribedThreshold(false), m_computationOptions(0), m_rows(-1), m_cols(-1), m_diagSize(0) - {} + { + check_template_parameters(); + } }; diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky.h b/Eigen/src/SparseCholesky/SimplicialCholesky.h index a0815e708..f56298e8c 100644 --- a/Eigen/src/SparseCholesky/SimplicialCholesky.h +++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h @@ -69,6 +69,7 @@ class SimplicialCholeskyBase : public SparseSolverBase typedef SparseMatrix CholMatrixType; typedef CholMatrixType const * ConstCholMatrixPtr; typedef Matrix VectorType; + typedef Matrix VectorI; public: @@ -250,8 +251,8 @@ class SimplicialCholeskyBase : public SparseSolverBase CholMatrixType m_matrix; VectorType m_diag; // the diagonal coefficients (LDLT mode) - VectorXi m_parent; // elimination tree - VectorXi m_nonZerosPerCol; + VectorI m_parent; // elimination tree + VectorI m_nonZerosPerCol; PermutationMatrix m_P; // the permutation PermutationMatrix m_Pinv; // the inverse permutation diff --git a/Eigen/src/SparseCore/CompressedStorage.h b/Eigen/src/SparseCore/CompressedStorage.h index cae9c38b0..52c7da297 100644 --- a/Eigen/src/SparseCore/CompressedStorage.h +++ b/Eigen/src/SparseCore/CompressedStorage.h @@ -86,7 +86,12 @@ class CompressedStorage void resize(Index size, double reserveSizeFactor = 0) { if (m_allocatedSize)(NumTraits::highest(), size + Index(reserveSizeFactor*double(size))); + if(realloc_size newValues(size); internal::scoped_array newIndices(size); diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index acd82e926..2b31716a3 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -49,6 +49,16 @@ public: return nnz; } + inline const Scalar coeff(Index row, Index col) const + { + return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart)); + } + + inline const Scalar coeff(Index index) const + { + return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart); + } + inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; } Index startRow() const { return IsRowMajor ? m_outerStart : 0; } Index startCol() const { return IsRowMajor ? 0 : m_outerStart; } @@ -204,6 +214,21 @@ public: } bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; } + + inline Scalar& coeffRef(Index row, Index col) + { + return m_matrix.const_cast_derived().coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart)); + } + + inline const Scalar coeff(Index row, Index col) const + { + return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart)); + } + + inline const Scalar coeff(Index index) const + { + return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart); + } const Scalar& lastCoeff() const { diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 4562f3df9..4c3a47959 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -222,24 +222,18 @@ class SparseMatrix * The non zero coefficient must \b not already exist. * * If the matrix \c *this is in compressed mode, then \c *this is turned into uncompressed - * mode while reserving room for 2 non zeros per inner vector. It is strongly recommended to first - * call reserve(const SizesType &) to reserve a more appropriate number of elements per - * inner vector that better match your scenario. + * mode while reserving room for 2 x this->innerSize() non zeros if reserve(Index) has not been called earlier. + * In this case, the insertion procedure is optimized for a \e sequential insertion mode where elements are assumed to be + * inserted by increasing outer-indices. + * + * If that's not the case, then it is strongly recommended to either use a triplet-list to assemble the matrix, or to first + * call reserve(const SizesType &) to reserve the appropriate number of non-zero elements per inner vector. * - * This function performs a sorted insertion in O(1) if the elements of each inner vector are - * inserted in increasing inner index order, and in O(nnz_j) for a random insertion. + * Assuming memory has been appropriately reserved, this function performs a sorted insertion in O(1) + * if the elements of each inner vector are inserted in increasing inner index order, and in O(nnz_j) for a random insertion. * */ - Scalar& insert(Index row, Index col) - { - eigen_assert(row>=0 && row=0 && col& SparseMatrix +typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insert(Index row, Index col) +{ + eigen_assert(row>=0 && row=0 && col(std::malloc(m_outerSize * sizeof(StorageIndex))); + if(!m_innerNonZeros) internal::throw_std_bad_alloc(); + + memset(m_innerNonZeros, 0, (m_outerSize)*sizeof(StorageIndex)); + + // pack all inner-vectors to the end of the pre-allocated space + // and allocate the entire free-space to the first inner-vector + StorageIndex end = convert_index(m_data.allocatedSize()); + for(Index j=1; j<=m_outerSize; ++j) + m_outerIndex[j] = end; + } + } + + // check whether we can do a fast "push back" insertion + Index data_end = m_data.allocatedSize(); + + // First case: we are filling a new inner vector which is packed at the end. + // We assume that all remaining inner-vectors are also empty and packed to the end. + if(m_outerIndex[outer]==data_end) + { + eigen_internal_assert(m_innerNonZeros[outer]==0); + + // pack previous empty inner-vectors to end of the used-space + // and allocate the entire free-space to the current inner-vector. + StorageIndex p = convert_index(m_data.size()); + Index j = outer; + while(j>=0 && m_innerNonZeros[j]==0) + m_outerIndex[j--] = p; + + // push back the new element + ++m_innerNonZeros[outer]; + m_data.append(Scalar(0), inner); + + // check for reallocation + if(data_end != m_data.allocatedSize()) + { + // m_data has been reallocated + // -> move remaining inner-vectors back to the end of the free-space + // so that the entire free-space is allocated to the current inner-vector. + eigen_internal_assert(data_end < m_data.allocatedSize()); + StorageIndex new_end = convert_index(m_data.allocatedSize()); + for(Index j=outer+1; j<=m_outerSize; ++j) + if(m_outerIndex[j]==data_end) + m_outerIndex[j] = new_end; + } + return m_data.value(p); + } + + // Second case: the next inner-vector is packed to the end + // and the current inner-vector end match the used-space. + if(m_outerIndex[outer+1]==data_end && m_outerIndex[outer]+m_innerNonZeros[outer]==m_data.size()) + { + eigen_internal_assert(outer+1==m_outerSize || m_innerNonZeros[outer+1]==0); + + // add space for the new element + ++m_innerNonZeros[outer]; + m_data.resize(m_data.size()+1); + + // check for reallocation + if(data_end != m_data.allocatedSize()) + { + // m_data has been reallocated + // -> move remaining inner-vectors back to the end of the free-space + // so that the entire free-space is allocated to the current inner-vector. + eigen_internal_assert(data_end < m_data.allocatedSize()); + StorageIndex new_end = convert_index(m_data.allocatedSize()); + for(Index j=outer+1; j<=m_outerSize; ++j) + if(m_outerIndex[j]==data_end) + m_outerIndex[j] = new_end; + } + + // and insert it at the right position (sorted insertion) + Index startId = m_outerIndex[outer]; + Index p = m_outerIndex[outer]+m_innerNonZeros[outer]-1; + while ( (p > startId) && (m_data.index(p-1) > inner) ) + { + m_data.index(p) = m_data.index(p-1); + m_data.value(p) = m_data.value(p-1); + --p; + } + + m_data.index(p) = convert_index(inner); + return (m_data.value(p) = 0); + } + + if(m_data.size() != m_data.allocatedSize()) + { + // make sure the matrix is compatible to random un-compressed insertion: + m_data.resize(m_data.allocatedSize()); + this->reserveInnerVectors(Array::Constant(2*m_outerSize, convert_index(m_outerSize))); + } + + return insertUncompressed(row,col); +} + template EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insertUncompressed(Index row, Index col) { diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index d76dfa33d..55b0ad9d2 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -97,7 +97,6 @@ template class SparseMatrixBase : public EigenBase Transpose >::type AdjointReturnType; typedef Transpose TransposeReturnType; - template struct SelfAdjointViewReturnType { typedef SelfAdjointView Type; }; typedef typename internal::add_const >::type ConstTransposeReturnType; // FIXME storage order do not match evaluator storage order @@ -300,9 +299,14 @@ template class SparseMatrixBase : public EigenBase template inline const TriangularView triangularView() const; + + template struct SelfAdjointViewReturnType { typedef SparseSelfAdjointView Type; }; + template struct ConstSelfAdjointViewReturnType { typedef const SparseSelfAdjointView Type; }; - template inline const SparseSelfAdjointView selfadjointView() const; - template inline SparseSelfAdjointView selfadjointView(); + template inline + typename ConstSelfAdjointViewReturnType::Type selfadjointView() const; + template inline + typename SelfAdjointViewReturnType::Type selfadjointView(); template Scalar dot(const MatrixBase& other) const; template Scalar dot(const SparseMatrixBase& other) const; diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h index 6467d4894..3da856799 100644 --- a/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -169,17 +169,17 @@ template class SparseSelfAdjointView ***************************************************************************/ template -template -const SparseSelfAdjointView SparseMatrixBase::selfadjointView() const +template +typename SparseMatrixBase::template ConstSelfAdjointViewReturnType::Type SparseMatrixBase::selfadjointView() const { - return SparseSelfAdjointView(derived()); + return SparseSelfAdjointView(derived()); } template -template -SparseSelfAdjointView SparseMatrixBase::selfadjointView() +template +typename SparseMatrixBase::template SelfAdjointViewReturnType::Type SparseMatrixBase::selfadjointView() { - return SparseSelfAdjointView(derived()); + return SparseSelfAdjointView(derived()); } /*************************************************************************** diff --git a/bench/analyze-blocking-sizes.cpp b/bench/analyze-blocking-sizes.cpp index a603c0216..d563a1d2d 100644 --- a/bench/analyze-blocking-sizes.cpp +++ b/bench/analyze-blocking-sizes.cpp @@ -1,3 +1,12 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #include #include #include @@ -7,23 +16,76 @@ #include #include #include +#include +#include + +#include using namespace std; +const int default_precision = 4; + +// see --only-cubic-sizes +bool only_cubic_sizes = false; + +// see --dump-tables +bool dump_tables = false; + +uint8_t log2_pot(size_t x) { + size_t l = 0; + while (x >>= 1) l++; + return l; +} + +uint16_t compact_size_triple(size_t k, size_t m, size_t n) +{ + return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n); +} + +// just a helper to store a triple of K,M,N sizes for matrix product +struct size_triple_t +{ + uint16_t k, m, n; + size_triple_t() : k(0), m(0), n(0) {} + size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {} + size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {} + size_triple_t(uint16_t compact) + { + k = 1 << ((compact & 0xf00) >> 8); + m = 1 << ((compact & 0x0f0) >> 4); + n = 1 << ((compact & 0x00f) >> 0); + } + bool is_cubic() const { return k == m && m == n; } +}; + +ostream& operator<<(ostream& s, const size_triple_t& t) +{ + return s << "(" << t.k << ", " << t.m << ", " << t.n << ")"; +} + struct inputfile_entry_t { uint16_t product_size; - uint16_t block_size; + uint16_t pot_block_size; + size_triple_t nonpot_block_size; float gflops; }; struct inputfile_t { + enum class type_t { + unknown, + all_pot_sizes, + default_sizes + }; + string filename; vector entries; + type_t type; inputfile_t(const string& fname) : filename(fname) + , type(type_t::unknown) { ifstream stream(filename); if (!stream.is_open()) { @@ -31,52 +93,102 @@ struct inputfile_t exit(1); } string line; - bool is_in_measurements = false; while (getline(stream, line)) { if (line.empty()) continue; - if (line.find("BEGIN MEASUREMENTS") == 0) { - is_in_measurements = true; + if (line.find("BEGIN MEASUREMENTS ALL POT SIZES") == 0) { + if (type != type_t::unknown) { + cerr << "Input file " << filename << " contains redundant BEGIN MEASUREMENTS lines"; + exit(1); + } + type = type_t::all_pot_sizes; continue; } - - if (!is_in_measurements) { + if (line.find("BEGIN MEASUREMENTS DEFAULT SIZES") == 0) { + if (type != type_t::unknown) { + cerr << "Input file " << filename << " contains redundant BEGIN MEASUREMENTS lines"; + exit(1); + } + type = type_t::default_sizes; continue; } + - unsigned int product_size, block_size; - float gflops; - int sscanf_result = - sscanf(line.c_str(), "%x %x %f", - &product_size, - &block_size, - &gflops); - if (3 != sscanf_result || - !product_size || - product_size > 0xfff || - !block_size || - block_size > 0xfff || - !isfinite(gflops)) - { - cerr << "ill-formed input file: " << filename << endl; - cerr << "offending line:" << endl << line << endl; - exit(1); + if (type == type_t::unknown) { + continue; + } + switch(type) { + case type_t::all_pot_sizes: { + unsigned int product_size, block_size; + float gflops; + int sscanf_result = + sscanf(line.c_str(), "%x %x %f", + &product_size, + &block_size, + &gflops); + if (3 != sscanf_result || + !product_size || + product_size > 0xfff || + !block_size || + block_size > 0xfff || + !isfinite(gflops)) + { + cerr << "ill-formed input file: " << filename << endl; + cerr << "offending line:" << endl << line << endl; + exit(1); + } + if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) { + continue; + } + inputfile_entry_t entry; + entry.product_size = uint16_t(product_size); + entry.pot_block_size = uint16_t(block_size); + entry.gflops = gflops; + entries.push_back(entry); + break; + } + case type_t::default_sizes: { + unsigned int product_size; + float gflops; + int bk, bm, bn; + int sscanf_result = + sscanf(line.c_str(), "%x default(%d, %d, %d) %f", + &product_size, + &bk, &bm, &bn, + &gflops); + if (5 != sscanf_result || + !product_size || + product_size > 0xfff || + !isfinite(gflops)) + { + cerr << "ill-formed input file: " << filename << endl; + cerr << "offending line:" << endl << line << endl; + exit(1); + } + if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) { + continue; + } + inputfile_entry_t entry; + entry.product_size = uint16_t(product_size); + entry.pot_block_size = 0; + entry.nonpot_block_size = size_triple_t(bk, bm, bn); + entry.gflops = gflops; + entries.push_back(entry); + break; + } + + default: + break; } - inputfile_entry_t entry; - entry.product_size = uint16_t(product_size); - entry.block_size = uint16_t(block_size); - entry.gflops = gflops; - entries.push_back(entry); } stream.close(); - if (!is_in_measurements) { - cerr << "Input file " << filename << " didn't contain a BEGIN MEASUREMENTS line. Wrong file?" << endl; + if (type == type_t::unknown) { + cerr << "Unrecognized input file " << filename << endl; exit(1); } if (entries.empty()) { cerr << "didn't find any measurements in input file: " << filename << endl; exit(1); } - //cerr << "read " << entries.size() << " measurements from " << filename << endl; } }; @@ -88,6 +200,11 @@ struct preprocessed_inputfile_entry_t float efficiency; }; +bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2) +{ + return e1.efficiency < e2.efficiency; +} + struct preprocessed_inputfile_t { string filename; @@ -96,6 +213,9 @@ struct preprocessed_inputfile_t preprocessed_inputfile_t(const inputfile_t& inputfile) : filename(inputfile.filename) { + if (inputfile.type != inputfile_t::type_t::all_pot_sizes) { + abort(); + } auto it = inputfile.entries.begin(); auto it_first_with_given_product_size = it; while (it != inputfile.entries.end()) { @@ -127,7 +247,7 @@ private: for (auto it = begin; it != end; ++it) { preprocessed_inputfile_entry_t entry; entry.product_size = it->product_size; - entry.block_size = it->block_size; + entry.block_size = it->pot_block_size; entry.efficiency = it->gflops / max_gflops; entries.push_back(entry); } @@ -201,14 +321,82 @@ float efficiency_of_subset( efficiency_this_product_size = max(efficiency_this_product_size, efficiency_this_entry); } efficiency = min(efficiency, efficiency_this_product_size); - first_entry_index_with_this_product_size = entry_index; - product_size = first_file.entries[entry_index].product_size; + if (entry_index < num_entries) { + first_entry_index_with_this_product_size = entry_index; + product_size = first_file.entries[entry_index].product_size; + } } } return efficiency; } +void dump_table_for_subset( + const vector& preprocessed_inputfiles, + const vector& subset) +{ + const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]]; + const size_t num_entries = first_file.entries.size(); + size_t entry_index = 0; + size_t first_entry_index_with_this_product_size = 0; + uint16_t product_size = first_file.entries[0].product_size; + size_t i = 0; + size_triple_t min_product_size(first_file.entries.front().product_size); + size_triple_t max_product_size(first_file.entries.back().product_size); + if (!min_product_size.is_cubic() || !max_product_size.is_cubic()) { + abort(); + } + if (only_cubic_sizes) { + cerr << "Can't generate tables with --only-cubic-sizes." << endl; + abort(); + } + cout << "struct LookupTable {" << endl; + cout << " static const size_t BaseSize = " << min_product_size.k << ";" << endl; + const size_t NumSizes = log2_pot(max_product_size.k / min_product_size.k) + 1; + const size_t TableSize = NumSizes * NumSizes * NumSizes; + cout << " static const size_t NumSizes = " << NumSizes << ";" << endl; + cout << " static const unsigned short* Data() {" << endl; + cout << " static const unsigned short data[" << TableSize << "] = {"; + while (entry_index < num_entries) { + ++entry_index; + if (entry_index == num_entries || + first_file.entries[entry_index].product_size != product_size) + { + float best_efficiency_this_product_size = 0.0f; + uint16_t best_block_size_this_product_size = 0; + for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) { + float efficiency_this_entry = 1.0f; + for (auto i = subset.begin(); i != subset.end(); ++i) { + efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency); + } + if (efficiency_this_entry > best_efficiency_this_product_size) { + best_efficiency_this_product_size = efficiency_this_entry; + best_block_size_this_product_size = first_file.entries[e].block_size; + } + } + if ((i++) % NumSizes) { + cout << " "; + } else { + cout << endl << " "; + } + cout << "0x" << hex << best_block_size_this_product_size << dec; + if (entry_index < num_entries) { + cout << ","; + first_entry_index_with_this_product_size = entry_index; + product_size = first_file.entries[entry_index].product_size; + } + } + } + if (i != TableSize) { + cerr << endl << "Wrote " << i << " table entries, expected " << TableSize << endl; + abort(); + } + cout << endl << " };" << endl; + cout << " return data;" << endl; + cout << " }" << endl; + cout << "};" << endl; +} + float efficiency_of_partition( const vector& preprocessed_inputfiles, const vector>& partition) @@ -390,67 +578,299 @@ void print_partition( for (auto file = subset->begin(); file != subset->end(); ++file) { cout << " " << preprocessed_inputfiles[*file].filename << endl; } + if (dump_tables) { + cout << " Table:" << endl; + dump_table_for_subset(preprocessed_inputfiles, *subset); + } } cout << endl; } -int main(int argc, char* argv[]) +struct action_t { - if (argc == 1) { - cerr << "usage: " << argv[0] << " [input files]" << endl; - cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl; - exit(1); - } - cout.precision(3); - cerr.precision(3); - vector inputfilenames; - for (int i = 1; i < argc; i++) { - inputfilenames.emplace_back(argv[i]); - } + virtual const char* invokation_name() const { abort(); return nullptr; } + virtual void run(const vector&) const { abort(); } + virtual ~action_t() {} +}; - vector preprocessed_inputfiles; - for (auto it = inputfilenames.begin(); it != inputfilenames.end(); ++it) { - preprocessed_inputfiles.emplace_back(inputfile_t(*it)); - } - - check_all_files_in_same_exact_order(preprocessed_inputfiles); - - float required_efficiency_to_beat = 0.0f; - vector>> partitions; - cerr << "searching for partitions...\r" << flush; - while (true) +struct partition_action_t : action_t +{ + virtual const char* invokation_name() const override { return "partition"; } + virtual void run(const vector& input_filenames) const override { - vector> partition; - find_partition_with_efficiency_higher_than( - preprocessed_inputfiles, - required_efficiency_to_beat, - partition); - float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition); - cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size() - << " subsets for " << 100.0f * actual_efficiency - << " % efficiency" - << " \r" << flush; - partitions.push_back(partition); - if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) { - break; + vector preprocessed_inputfiles; + + if (input_filenames.empty()) { + cerr << "The " << invokation_name() << " action needs a list of input files." << endl; + exit(1); } - required_efficiency_to_beat = actual_efficiency; - } - cerr << " " << endl; - while (true) { - bool repeat = false; - for (size_t i = 0; i < partitions.size() - 1; i++) { - if (partitions[i].size() >= partitions[i+1].size()) { - partitions.erase(partitions.begin() + i); - repeat = true; + + for (auto it = input_filenames.begin(); it != input_filenames.end(); ++it) { + inputfile_t inputfile(*it); + switch (inputfile.type) { + case inputfile_t::type_t::all_pot_sizes: + preprocessed_inputfiles.emplace_back(inputfile); + break; + case inputfile_t::type_t::default_sizes: + cerr << "The " << invokation_name() << " action only uses measurements for all pot sizes, and " + << "has no use for " << *it << " which contains measurements for default sizes." << endl; + exit(1); + break; + default: + cerr << "Unrecognized input file: " << *it << endl; + exit(1); + } + } + + check_all_files_in_same_exact_order(preprocessed_inputfiles); + + float required_efficiency_to_beat = 0.0f; + vector>> partitions; + cerr << "searching for partitions...\r" << flush; + while (true) + { + vector> partition; + find_partition_with_efficiency_higher_than( + preprocessed_inputfiles, + required_efficiency_to_beat, + partition); + float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition); + cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size() + << " subsets for " << 100.0f * actual_efficiency + << " % efficiency" + << " \r" << flush; + partitions.push_back(partition); + if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) { + break; + } + required_efficiency_to_beat = actual_efficiency; + } + cerr << " " << endl; + while (true) { + bool repeat = false; + for (size_t i = 0; i < partitions.size() - 1; i++) { + if (partitions[i].size() >= partitions[i+1].size()) { + partitions.erase(partitions.begin() + i); + repeat = true; + break; + } + } + if (!repeat) { break; } } - if (!repeat) { - break; + for (auto it = partitions.begin(); it != partitions.end(); ++it) { + print_partition(preprocessed_inputfiles, *it); } } - for (auto it = partitions.begin(); it != partitions.end(); ++it) { - print_partition(preprocessed_inputfiles, *it); +}; + +struct evaluate_defaults_action_t : action_t +{ + struct results_entry_t { + uint16_t product_size; + size_triple_t default_block_size; + uint16_t best_pot_block_size; + float default_gflops; + float best_pot_gflops; + float default_efficiency; + }; + friend ostream& operator<<(ostream& s, const results_entry_t& entry) + { + return s + << "Product size " << size_triple_t(entry.product_size) + << ": default block size " << entry.default_block_size + << " -> " << entry.default_gflops + << " GFlop/s = " << entry.default_efficiency * 100.0f << " %" + << " of best POT block size " << size_triple_t(entry.best_pot_block_size) + << " -> " << entry.best_pot_gflops + << " GFlop/s" << dec; } + static bool lower_efficiency(const results_entry_t& e1, const results_entry_t& e2) { + return e1.default_efficiency < e2.default_efficiency; + } + virtual const char* invokation_name() const override { return "evaluate-defaults"; } + void show_usage_and_exit() const + { + cerr << "usage: " << invokation_name() << " default-sizes-data all-pot-sizes-data" << endl; + cerr << "checks how well the performance with default sizes compares to the best " + << "performance measured over all POT sizes." << endl; + exit(1); + } + virtual void run(const vector& input_filenames) const override + { + if (input_filenames.size() != 2) { + show_usage_and_exit(); + } + inputfile_t inputfile_default_sizes(input_filenames[0]); + inputfile_t inputfile_all_pot_sizes(input_filenames[1]); + if (inputfile_default_sizes.type != inputfile_t::type_t::default_sizes) { + cerr << inputfile_default_sizes.filename << " is not an input file with default sizes." << endl; + show_usage_and_exit(); + } + if (inputfile_all_pot_sizes.type != inputfile_t::type_t::all_pot_sizes) { + cerr << inputfile_all_pot_sizes.filename << " is not an input file with all POT sizes." << endl; + show_usage_and_exit(); + } + vector results; + vector cubic_results; + + uint16_t product_size = 0; + auto it_all_pot_sizes = inputfile_all_pot_sizes.entries.begin(); + for (auto it_default_sizes = inputfile_default_sizes.entries.begin(); + it_default_sizes != inputfile_default_sizes.entries.end(); + ++it_default_sizes) + { + if (it_default_sizes->product_size == product_size) { + continue; + } + product_size = it_default_sizes->product_size; + while (it_all_pot_sizes != inputfile_all_pot_sizes.entries.end() && + it_all_pot_sizes->product_size != product_size) + { + ++it_all_pot_sizes; + } + if (it_all_pot_sizes == inputfile_all_pot_sizes.entries.end()) { + break; + } + uint16_t best_pot_block_size = 0; + float best_pot_gflops = 0; + for (auto it = it_all_pot_sizes; + it != inputfile_all_pot_sizes.entries.end() && it->product_size == product_size; + ++it) + { + if (it->gflops > best_pot_gflops) { + best_pot_gflops = it->gflops; + best_pot_block_size = it->pot_block_size; + } + } + results_entry_t entry; + entry.product_size = product_size; + entry.default_block_size = it_default_sizes->nonpot_block_size; + entry.best_pot_block_size = best_pot_block_size; + entry.default_gflops = it_default_sizes->gflops; + entry.best_pot_gflops = best_pot_gflops; + entry.default_efficiency = entry.default_gflops / entry.best_pot_gflops; + results.push_back(entry); + + size_triple_t t(product_size); + if (t.k == t.m && t.m == t.n) { + cubic_results.push_back(entry); + } + } + + cout << "All results:" << endl; + for (auto it = results.begin(); it != results.end(); ++it) { + cout << *it << endl; + } + cout << endl; + + sort(results.begin(), results.end(), lower_efficiency); + + const size_t n = min(20, results.size()); + cout << n << " worst results:" << endl; + for (size_t i = 0; i < n; i++) { + cout << results[i] << endl; + } + cout << endl; + + cout << "cubic results:" << endl; + for (auto it = cubic_results.begin(); it != cubic_results.end(); ++it) { + cout << *it << endl; + } + cout << endl; + + sort(cubic_results.begin(), cubic_results.end(), lower_efficiency); + + cout.precision(2); + vector a = {0.5f, 0.20f, 0.10f, 0.05f, 0.02f, 0.01f}; + for (auto it = a.begin(); it != a.end(); ++it) { + size_t n = min(results.size() - 1, size_t(*it * results.size())); + cout << (100.0f * n / (results.size() - 1)) + << " % of product sizes have default efficiency <= " + << 100.0f * results[n].default_efficiency << " %" << endl; + } + cout.precision(default_precision); + } +}; + + +void show_usage_and_exit(int argc, char* argv[], + const vector>& available_actions) +{ + cerr << "usage: " << argv[0] << " [options...] " << endl; + cerr << "available actions:" << endl; + for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { + cerr << " " << (*it)->invokation_name() << endl; + } + cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl; + exit(1); +} + +int main(int argc, char* argv[]) +{ + cout.precision(default_precision); + cerr.precision(default_precision); + + vector> available_actions; + available_actions.emplace_back(new partition_action_t); + available_actions.emplace_back(new evaluate_defaults_action_t); + + vector input_filenames; + + action_t* action = nullptr; + + if (argc < 2) { + show_usage_and_exit(argc, argv, available_actions); + } + for (int i = 1; i < argc; i++) { + bool arg_handled = false; + // Step 1. Try to match action invokation names. + for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { + if (!strcmp(argv[i], (*it)->invokation_name())) { + if (!action) { + action = it->get(); + arg_handled = true; + break; + } else { + cerr << "can't specify more than one action!" << endl; + show_usage_and_exit(argc, argv, available_actions); + } + } + } + if (arg_handled) { + continue; + } + // Step 2. Try to match option names. + if (argv[i][0] == '-') { + if (!strcmp(argv[i], "--only-cubic-sizes")) { + only_cubic_sizes = true; + arg_handled = true; + } + if (!strcmp(argv[i], "--dump-tables")) { + dump_tables = true; + arg_handled = true; + } + if (!arg_handled) { + cerr << "Unrecognized option: " << argv[i] << endl; + show_usage_and_exit(argc, argv, available_actions); + } + } + if (arg_handled) { + continue; + } + // Step 3. Default to interpreting args as input filenames. + input_filenames.emplace_back(argv[i]); + } + + if (dump_tables && only_cubic_sizes) { + cerr << "Incompatible options: --only-cubic-sizes and --dump-tables." << endl; + show_usage_and_exit(argc, argv, available_actions); + } + + if (!action) { + show_usage_and_exit(argc, argv, available_actions); + } + + action->run(input_filenames); } diff --git a/bench/benchmark-blocking-sizes.cpp b/bench/benchmark-blocking-sizes.cpp index 04244575a..827be2880 100644 --- a/bench/benchmark-blocking-sizes.cpp +++ b/bench/benchmark-blocking-sizes.cpp @@ -1,11 +1,23 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #include #include #include #include #include +#include +#include +bool eigen_use_specific_block_size; int eigen_block_size_k, eigen_block_size_m, eigen_block_size_n; -#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES +#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES eigen_use_specific_block_size #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K eigen_block_size_k #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M eigen_block_size_m #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N eigen_block_size_n @@ -31,11 +43,15 @@ const float min_accurate_time = 1e-2f; // See --min-working-set-size command line parameter. size_t min_working_set_size = 0; +float max_clock_speed = 0.0f; + // range of sizes that we will benchmark (in all 3 K,M,N dimensions) const size_t maxsize = 2048; const size_t minsize = 16; typedef MatrixXf MatrixType; +typedef MatrixType::Scalar Scalar; +typedef internal::packet_traits::type Packet; static_assert((maxsize & (maxsize - 1)) == 0, "maxsize must be a power of two"); static_assert((minsize & (minsize - 1)) == 0, "minsize must be a power of two"); @@ -82,16 +98,26 @@ struct benchmark_t { uint16_t compact_product_size; uint16_t compact_block_size; + bool use_default_block_size; float gflops; benchmark_t() : compact_product_size(0) , compact_block_size(0) + , use_default_block_size(false) , gflops(0) - {} + { + } benchmark_t(size_t pk, size_t pm, size_t pn, size_t bk, size_t bm, size_t bn) : compact_product_size(compact_size_triple(pk, pm, pn)) , compact_block_size(compact_size_triple(bk, bm, bn)) + , use_default_block_size(false) + , gflops(0) + {} + benchmark_t(size_t pk, size_t pm, size_t pn) + : compact_product_size(compact_size_triple(pk, pm, pn)) + , compact_block_size(0) + , use_default_block_size(true) , gflops(0) {} @@ -100,10 +126,15 @@ struct benchmark_t ostream& operator<<(ostream& s, const benchmark_t& b) { - s << hex; - s << b.compact_product_size - << " " << b.compact_block_size; - s << dec; + s << hex << b.compact_product_size << dec; + if (b.use_default_block_size) { + size_triple_t t(b.compact_product_size); + Index k = t.k, m = t.m, n = t.n; + internal::computeProductBlockingSizes(k, m, n); + s << " default(" << k << ", " << m << ", " << n << ")"; + } else { + s << " " << hex << b.compact_block_size << dec; + } s << " " << b.gflops; return s; } @@ -121,19 +152,23 @@ bool operator<(const benchmark_t& b1, const benchmark_t& b2) void benchmark_t::run() { - // expand our compact benchmark params into proper triples size_triple_t productsizes(compact_product_size); - size_triple_t blocksizes(compact_block_size); - - // feed eigen with our custom blocking params - eigen_block_size_k = blocksizes.k; - eigen_block_size_m = blocksizes.m; - eigen_block_size_n = blocksizes.n; + + if (use_default_block_size) { + eigen_use_specific_block_size = false; + } else { + // feed eigen with our custom blocking params + eigen_use_specific_block_size = true; + size_triple_t blocksizes(compact_block_size); + eigen_block_size_k = blocksizes.k; + eigen_block_size_m = blocksizes.m; + eigen_block_size_n = blocksizes.n; + } // set up the matrix pool const size_t combined_three_matrices_sizes = - sizeof(MatrixType::Scalar) * + sizeof(Scalar) * (productsizes.k * productsizes.m + productsizes.k * productsizes.n + productsizes.m * productsizes.n); @@ -168,7 +203,7 @@ void benchmark_t::run() double starttime = timer.getCpuTime(); for (int i = 0; i < iters_at_a_time; i++) { - dst[matrix_index] = lhs[matrix_index] * rhs[matrix_index]; + dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index]; matrix_index++; if (matrix_index == matrix_pool_size) { matrix_index = 0; @@ -231,9 +266,23 @@ string type_name() return "double"; } -void show_usage_and_exit(const char *progname) +struct action_t { - cerr << "usage: " << progname << " [--min-working-set-size=N]" << endl << endl; + virtual const char* invokation_name() const { abort(); return nullptr; } + virtual void run() const { abort(); } + virtual ~action_t() {} +}; + +void show_usage_and_exit(int /*argc*/, char* argv[], + const vector>& available_actions) +{ + cerr << "usage: " << argv[0] << " [options...]" << endl << endl; + cerr << "available actions:" << endl << endl; + for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { + cerr << " " << (*it)->invokation_name() << endl; + } + cerr << endl; + cerr << "options:" << endl << endl; cerr << " --min-working-set-size=N:" << endl; cerr << " Set the minimum working set size to N bytes." << endl; cerr << " This is rounded up as needed to a multiple of matrix size." << endl; @@ -244,93 +293,254 @@ void show_usage_and_exit(const char *progname) cerr << " avoid warm caches." << endl; exit(1); } - -int main(int argc, char* argv[]) + +float measure_clock_speed() { - for (int i = 1; i < argc; i++) { - if (argv[i] == strstr(argv[i], "--min-working-set-size=")) { - const char* equals_sign = strchr(argv[i], '='); - min_working_set_size = strtoul(equals_sign+1, nullptr, 10); - } else { - cerr << "unrecognized option: " << argv[i] << endl << endl; - show_usage_and_exit(argv[0]); - } + cerr << "Measuring clock speed... \r" << flush; + + vector all_gflops; + for (int i = 0; i < 8; i++) { + benchmark_t b(1024, 1024, 1024); + b.run(); + all_gflops.push_back(b.gflops); } - cout.precision(4); + sort(all_gflops.begin(), all_gflops.end()); + float stable_estimate = all_gflops[2] + all_gflops[3] + all_gflops[4] + all_gflops[5]; - print_cpuinfo(); + // multiply by an arbitrary constant to discourage trying doing anything with the + // returned values besides just comparing them with each other. + float result = stable_estimate * 123.456f; - cout << "benchmark parameters:" << endl; - cout << "pointer size: " << 8*sizeof(void*) << " bits" << endl; - cout << "scalar type: " << type_name() << endl; - cout << "packet size: " << internal::packet_traits::size << endl; - cout << "minsize = " << minsize << endl; - cout << "maxsize = " << maxsize << endl; - cout << "measurement_repetitions = " << measurement_repetitions << endl; - cout << "min_accurate_time = " << min_accurate_time << endl; - cout << "min_working_set_size = " << min_working_set_size; - if (min_working_set_size == 0) { - cout << " (try to outsize caches)"; + return result; +} + +struct human_duration_t +{ + int seconds; + human_duration_t(int s) : seconds(s) {} +}; + +ostream& operator<<(ostream& s, const human_duration_t& d) +{ + int remainder = d.seconds; + if (remainder > 3600) { + int hours = remainder / 3600; + s << hours << " h "; + remainder -= hours * 3600; } - cout << endl << endl; + if (remainder > 60) { + int minutes = remainder / 60; + s << minutes << " min "; + remainder -= minutes * 60; + } + if (d.seconds < 600) { + s << remainder << " s"; + } + return s; +} +const char session_filename[] = "/data/local/tmp/benchmark-blocking-sizes-session.data"; - // assemble the array of benchmarks without running them at first - vector benchmarks; - for (int repetition = 0; repetition < measurement_repetitions; repetition++) { - for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) { - for (size_t msize = minsize; msize <= maxsize; msize *= 2) { - for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) { - for (size_t kblock = minsize; kblock <= ksize; kblock *= 2) { - for (size_t mblock = minsize; mblock <= msize; mblock *= 2) { - for (size_t nblock = minsize; nblock <= nsize; nblock *= 2) { - benchmark_t b(ksize, msize, nsize, kblock, mblock, nblock); - benchmarks.push_back(b); - } - } +void serialize_benchmarks(const char* filename, const vector& benchmarks, size_t first_benchmark_to_run) +{ + FILE* file = fopen(filename, "w"); + if (!file) { + cerr << "Could not open file " << filename << " for writing." << endl; + cerr << "Do you have write permissions on the current working directory?" << endl; + exit(1); + } + size_t benchmarks_vector_size = benchmarks.size(); + fwrite(&max_clock_speed, sizeof(max_clock_speed), 1, file); + fwrite(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file); + fwrite(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file); + fwrite(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file); + fclose(file); +} + +bool deserialize_benchmarks(const char* filename, vector& benchmarks, size_t& first_benchmark_to_run) +{ + FILE* file = fopen(filename, "r"); + if (!file) { + return false; + } + if (1 != fread(&max_clock_speed, sizeof(max_clock_speed), 1, file)) { + return false; + } + size_t benchmarks_vector_size = 0; + if (1 != fread(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file)) { + return false; + } + if (1 != fread(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file)) { + return false; + } + benchmarks.resize(benchmarks_vector_size); + if (benchmarks.size() != fread(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file)) { + return false; + } + unlink(filename); + return true; +} + +void try_run_some_benchmarks( + vector& benchmarks, + double time_start, + size_t& first_benchmark_to_run) +{ + if (first_benchmark_to_run == benchmarks.size()) { + return; + } + + double time_last_progress_update = 0; + double time_last_clock_speed_measurement = 0; + double time_now = 0; + + size_t benchmark_index = first_benchmark_to_run; + + while (true) { + float ratio_done = float(benchmark_index) / benchmarks.size(); + time_now = timer.getRealTime(); + + // We check clock speed every minute and at the end. + if (benchmark_index == benchmarks.size() || + time_now > time_last_clock_speed_measurement + 60.0f) + { + time_last_clock_speed_measurement = time_now; + + // Ensure that clock speed is as expected + float current_clock_speed = measure_clock_speed(); + + // The tolerance needs to be smaller than the relative difference between + // clock speeds that a device could operate under. + // It seems unlikely that a device would be throttling clock speeds by + // amounts smaller than 2%. + // With a value of 1%, I was getting within noise on a Sandy Bridge. + const float clock_speed_tolerance = 0.02f; + + if (current_clock_speed > (1 + clock_speed_tolerance) * max_clock_speed) { + // Clock speed is now higher than we previously measured. + // Either our initial measurement was inaccurate, which won't happen + // too many times as we are keeping the best clock speed value and + // and allowing some tolerance; or something really weird happened, + // which invalidates all benchmark results collected so far. + // Either way, we better restart all over again now. + if (benchmark_index) { + cerr << "Restarting at " << 100.0f * ratio_done + << " % because clock speed increased. " << endl; + } + max_clock_speed = current_clock_speed; + first_benchmark_to_run = 0; + return; + } + + bool rerun_last_tests = false; + + if (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) { + cerr << "Measurements completed so far: " + << 100.0f * ratio_done + << " % " << endl; + cerr << "Clock speed seems to be only " + << current_clock_speed/max_clock_speed + << " times what it used to be." << endl; + + unsigned int seconds_to_sleep_if_lower_clock_speed = 1; + + while (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) { + if (seconds_to_sleep_if_lower_clock_speed > 32) { + cerr << "Sleeping longer probably won't make a difference." << endl; + cerr << "Serializing benchmarks to " << session_filename << endl; + serialize_benchmarks(session_filename, benchmarks, first_benchmark_to_run); + cerr << "Now restart this benchmark, and it should pick up where we left." << endl; + exit(2); } + rerun_last_tests = true; + cerr << "Sleeping " + << seconds_to_sleep_if_lower_clock_speed + << " s... \r" << endl; + sleep(seconds_to_sleep_if_lower_clock_speed); + current_clock_speed = measure_clock_speed(); + seconds_to_sleep_if_lower_clock_speed *= 2; } } + + if (rerun_last_tests) { + cerr << "Redoing the last " + << 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size() + << " % because clock speed had been low. " << endl; + return; + } + + // nothing wrong with the clock speed so far, so there won't be a need to rerun + // benchmarks run so far in case we later encounter a lower clock speed. + first_benchmark_to_run = benchmark_index; } - } - // randomly shuffling benchmarks allows us to get accurate enough progress info, - // as now the cheap/expensive benchmarks are randomly mixed so they average out. - random_shuffle(benchmarks.begin(), benchmarks.end()); + if (benchmark_index == benchmarks.size()) { + // We're done! + first_benchmark_to_run = benchmarks.size(); + // Erase progress info + cerr << " " << endl; + return; + } - // timings here are only used to display progress info. - // Whence the use of real time. - double time_start = timer.getRealTime(); - double time_last_progress_update = time_start; - for (size_t i = 0; i < benchmarks.size(); i++) { // Display progress info on stderr - double time_now = timer.getRealTime(); if (time_now > time_last_progress_update + 1.0f) { time_last_progress_update = time_now; - float ratio_done = float(i) / benchmarks.size(); - cerr.precision(3); cerr << "Measurements... " << 100.0f * ratio_done - << " %"; - - if (i > 10) { - cerr << ", ETA "; - float eta = float(time_now - time_start) * (1.0f - ratio_done) / ratio_done; - if (eta > 3600) - cerr << eta/3600 << " hours"; - else if (eta > 60) - cerr << eta/60 << " minutes"; - else cerr << eta << " seconds"; - } - cerr << " \r" << flush; + << " %, ETA " + << human_duration_t(float(time_now - time_start) * (1.0f - ratio_done) / ratio_done) + << " \r" << flush; } // This is where we actually run a benchmark! - benchmarks[i].run(); + benchmarks[benchmark_index].run(); + benchmark_index++; + } +} + +void run_benchmarks(vector& benchmarks) +{ + size_t first_benchmark_to_run; + vector deserialized_benchmarks; + bool use_deserialized_benchmarks = false; + if (deserialize_benchmarks(session_filename, deserialized_benchmarks, first_benchmark_to_run)) { + cerr << "Found serialized session with " + << 100.0f * first_benchmark_to_run / deserialized_benchmarks.size() + << " % already done" << endl; + if (deserialized_benchmarks.size() == benchmarks.size() && + first_benchmark_to_run > 0 && + first_benchmark_to_run < benchmarks.size()) + { + use_deserialized_benchmarks = true; + } } - // Erase progress info - cerr << " " << endl; + if (use_deserialized_benchmarks) { + benchmarks = deserialized_benchmarks; + } else { + // not using deserialized benchmarks, starting from scratch + first_benchmark_to_run = 0; + + // Randomly shuffling benchmarks allows us to get accurate enough progress info, + // as now the cheap/expensive benchmarks are randomly mixed so they average out. + // It also means that if data is corrupted for some time span, the odds are that + // not all repetitions of a given benchmark will be corrupted. + random_shuffle(benchmarks.begin(), benchmarks.end()); + } + + for (int i = 0; i < 4; i++) { + max_clock_speed = max(max_clock_speed, measure_clock_speed()); + } + + double time_start = 0.0; + while (first_benchmark_to_run < benchmarks.size()) { + if (first_benchmark_to_run == 0) { + time_start = timer.getRealTime(); + } + try_run_some_benchmarks(benchmarks, + time_start, + first_benchmark_to_run); + } // Sort timings by increasing benchmark parameters, and decreasing gflops. // The latter is very important. It means that we can ignore all but the first @@ -348,9 +558,120 @@ int main(int argc, char* argv[]) } } - // Output data. - cout << "BEGIN MEASUREMENTS" << endl; - for (auto it = best_benchmarks.begin(); it != best_benchmarks.end(); ++it) { - cout << *it << endl; - } + // keep and return only the best benchmarks + benchmarks = best_benchmarks; +} + +struct measure_all_pot_sizes_action_t : action_t +{ + virtual const char* invokation_name() const { return "all-pot-sizes"; } + virtual void run() const + { + vector benchmarks; + for (int repetition = 0; repetition < measurement_repetitions; repetition++) { + for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) { + for (size_t msize = minsize; msize <= maxsize; msize *= 2) { + for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) { + for (size_t kblock = minsize; kblock <= ksize; kblock *= 2) { + for (size_t mblock = minsize; mblock <= msize; mblock *= 2) { + for (size_t nblock = minsize; nblock <= nsize; nblock *= 2) { + benchmarks.emplace_back(ksize, msize, nsize, kblock, mblock, nblock); + } + } + } + } + } + } + } + + run_benchmarks(benchmarks); + + cout << "BEGIN MEASUREMENTS ALL POT SIZES" << endl; + for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) { + cout << *it << endl; + } + } +}; + +struct measure_default_sizes_action_t : action_t +{ + virtual const char* invokation_name() const { return "default-sizes"; } + virtual void run() const + { + vector benchmarks; + for (int repetition = 0; repetition < measurement_repetitions; repetition++) { + for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) { + for (size_t msize = minsize; msize <= maxsize; msize *= 2) { + for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) { + benchmarks.emplace_back(ksize, msize, nsize); + } + } + } + } + + run_benchmarks(benchmarks); + + cout << "BEGIN MEASUREMENTS DEFAULT SIZES" << endl; + for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) { + cout << *it << endl; + } + } +}; + +int main(int argc, char* argv[]) +{ + double time_start = timer.getRealTime(); + cout.precision(4); + cerr.precision(4); + + vector> available_actions; + available_actions.emplace_back(new measure_all_pot_sizes_action_t); + available_actions.emplace_back(new measure_default_sizes_action_t); + + auto action = available_actions.end(); + + if (argc <= 1) { + show_usage_and_exit(argc, argv, available_actions); + } + for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { + if (!strcmp(argv[1], (*it)->invokation_name())) { + action = it; + break; + } + } + + if (action == available_actions.end()) { + show_usage_and_exit(argc, argv, available_actions); + } + + for (int i = 2; i < argc; i++) { + if (argv[i] == strstr(argv[i], "--min-working-set-size=")) { + const char* equals_sign = strchr(argv[i], '='); + min_working_set_size = strtoul(equals_sign+1, nullptr, 10); + } else { + cerr << "unrecognized option: " << argv[i] << endl << endl; + show_usage_and_exit(argc, argv, available_actions); + } + } + + print_cpuinfo(); + + cout << "benchmark parameters:" << endl; + cout << "pointer size: " << 8*sizeof(void*) << " bits" << endl; + cout << "scalar type: " << type_name() << endl; + cout << "packet size: " << internal::packet_traits::size << endl; + cout << "minsize = " << minsize << endl; + cout << "maxsize = " << maxsize << endl; + cout << "measurement_repetitions = " << measurement_repetitions << endl; + cout << "min_accurate_time = " << min_accurate_time << endl; + cout << "min_working_set_size = " << min_working_set_size; + if (min_working_set_size == 0) { + cout << " (try to outsize caches)"; + } + cout << endl << endl; + + (*action)->run(); + + double time_end = timer.getRealTime(); + cerr << "Finished in " << human_duration_t(time_end - time_start) << endl; } diff --git a/bench/perf_monitoring/gemm/changesets.txt b/bench/perf_monitoring/gemm/changesets.txt index f19b4287d..40a71c781 100644 --- a/bench/perf_monitoring/gemm/changesets.txt +++ b/bench/perf_monitoring/gemm/changesets.txt @@ -1,39 +1,45 @@ -3.0.1 -3.1.1 -3.2.0 +#3.0.1 +#3.1.1 +#3.2.0 3.2.4 -5745:37f59e65eb6c -5891:d8652709345d -5893:24b4dc92c6d3 -5895:997c2ef9fc8b -5904:e1eafd14eaa1 -5908:f8ee3c721251 -5921:ca808bb456b0 -5927:8b1001f9e3ac -5937:5a4ca1ad8c53 -5949:f3488f4e45b2 -5969:e09031dccfd9 -5992:4a429f5e0483 +#5745:37f59e65eb6c +5891:d8652709345d # introduce AVX +#5893:24b4dc92c6d3 # merge +5895:997c2ef9fc8b # introduce FMA +#5904:e1eafd14eaa1 # complex and AVX +5908:f8ee3c721251 # improve packing with ptranspose +#5921:ca808bb456b0 # merge +#5927:8b1001f9e3ac +5937:5a4ca1ad8c53 # New gebp kernel handling up to 3 packets x 4 register-level blocks +#5949:f3488f4e45b2 # merge +#5969:e09031dccfd9 # Disable 3pX4 kernel on Altivec +#5992:4a429f5e0483 # merge before-evaluators -6334:f6a45e5b8b7c -6639:c9121c60b5c7 -6655:06f163b5221f -6677:700e023044e7 # FMA has been wrongly disabled -6681:11d31dafb0e3 -6699:5e6e8e10aad1 # merge default to tensors -6726:ff2d2388e7b9 # merge default to tensors -6742:0cbd6195e829 # merge default to tensors -6747:853d2bafeb8f # Generalized the gebp apis +#6334:f6a45e5b8b7c # Implement evaluator for sparse outer products +#6639:c9121c60b5c7 +#6655:06f163b5221f # Properly detect FMA support on ARM +#6677:700e023044e7 # FMA has been wrongly disabled +#6681:11d31dafb0e3 +#6699:5e6e8e10aad1 # merge default to tensors +#6726:ff2d2388e7b9 # merge default to tensors +#6742:0cbd6195e829 # merge default to tensors +#6747:853d2bafeb8f # Generalized the gebp apis 6765:71584fd55762 # Made the blocking computation aware of the l3 cache; Also optimized the blocking parameters to take into account the number of threads used for a computation -6781:9cc5a931b2c6 # generalized gemv -6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product -6844:039efd86b75c # merge tensor +#6781:9cc5a931b2c6 # generalized gemv +#6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product +#6844:039efd86b75c # merge tensor 6845:7333ed40c6ef # change prefetching in gebp -6856:b5be5e10eb7f # merge index conversion -6893:c3a64aba7c70 # clean blocking size computation -6898:6fb31ebe6492 # rotating kernel for ARM +#6856:b5be5e10eb7f # merge index conversion +#6893:c3a64aba7c70 # clean blocking size computation +#6898:6fb31ebe6492 # rotating kernel for ARM 6899:877facace746 # rotating kernel for ARM only -6904:c250623ae9fa # result_of +#6904:c250623ae9fa # result_of 6921:915f1b1fc158 # fix prefetching change for ARM 6923:9ff25f6dacc6 # prefetching -6933:52572e60b5d3 # blocking size strategy \ No newline at end of file +6933:52572e60b5d3 # blocking size strategy +6937:c8c042f286b2 # avoid redundant pack_rhs +6981:7e5d6f78da59 # dynamic loop swapping +6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache +6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1. +7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5) + diff --git a/bench/perf_monitoring/gemm/run_gemm.sh b/bench/perf_monitoring/gemm/run_gemm.sh index d3a9fadc9..3fa6a3661 100755 --- a/bench/perf_monitoring/gemm/run_gemm.sh +++ b/bench/perf_monitoring/gemm/run_gemm.sh @@ -6,6 +6,7 @@ # Options: # -up : enforce the recomputation of existing data, and keep best results as a merging strategy +# -s : recompute selected changesets only and keep bests if echo "$*" | grep '\-up' > /dev/null; then @@ -14,14 +15,30 @@ else update=false fi -if [ $update == true ]; then +if echo "$*" | grep '\-s' > /dev/null; then + selected=true +else + selected=false +fi + +global_args="$*" + +if [ $selected == true ]; then + echo "Recompute selected changesets only and keep bests" +elif [ $update == true ]; then echo "(Re-)Compute all changesets and keep bests" else echo "Skip previously computed changesets" fi + + if [ ! -d "eigen_src" ]; then hg clone https://bitbucket.org/eigen/eigen eigen_src +else + cd eigen_src + hg pull -u + cd .. fi if [ ! -z '$CXX' ]; then @@ -61,17 +78,31 @@ function test_current scalar=$2 name=$3 - prev=`grep $rev "$name.backup" | cut -c 14-` + prev="" + if [ -e "$name.backup" ]; then + prev=`grep $rev "$name.backup" | cut -c 14-` + fi res=$prev count_rev=`echo $prev | wc -w` count_ref=`cat "settings.txt" | wc -l` - if [ $update == true ] || [ $count_rev != $count_ref ]; then + if echo "$global_args" | grep "$rev" > /dev/null; then + rev_found=true + else + rev_found=false + fi +# echo $update et $selected et $rev_found because $rev et "$global_args" +# echo $count_rev et $count_ref + if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src gemm.cpp -DSCALAR=$scalar -o $name; then curr=`./$name` - echo merge $prev - echo with $curr + if [ $count_rev == $count_ref ]; then + echo "merge previous $prev" + echo "with new $curr" + else + echo "got $curr" + fi res=`merge "$curr" "$prev"` - echo $res +# echo $res echo "$rev $res" >> $name.out else echo "Compilation failed, skip rev $rev" @@ -86,12 +117,12 @@ make_backup $PREFIX"sgemm" make_backup $PREFIX"dgemm" make_backup $PREFIX"cgemm" -cut -f1 -d"#" < changesets.txt | while read rev +cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev do if [ ! -z '$rev' ]; then echo "Testing rev $rev" cd eigen_src - hg up -C $rev + hg up -C $rev > /dev/null actual_rev=`hg identify | cut -f1 -d' '` cd .. diff --git a/bench/perf_monitoring/gemm/settings.txt b/bench/perf_monitoring/gemm/settings.txt index 6ef690708..5c43e1c7d 100644 --- a/bench/perf_monitoring/gemm/settings.txt +++ b/bench/perf_monitoring/gemm/settings.txt @@ -1,5 +1,6 @@ 8 8 8 9 9 9 +24 24 24 239 239 239 240 240 240 2400 24 24 @@ -8,4 +9,7 @@ 24 2400 2400 2400 24 2400 2400 2400 24 +2400 2400 64 +4800 23 160 +23 4800 160 2400 2400 2400 diff --git a/blas/xerbla.cpp b/blas/xerbla.cpp index 8775b88cd..c373e8699 100644 --- a/blas/xerbla.cpp +++ b/blas/xerbla.cpp @@ -1,7 +1,7 @@ #include -#if (defined __GNUC__) && (!defined __MINGW32__) +#if (defined __GNUC__) && (!defined __MINGW32__) && (!defined __CYGWIN__) #define EIGEN_WEAK_LINKING __attribute__ ((weak)) #else #define EIGEN_WEAK_LINKING diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index b4ab95dbc..f8aec777d 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -502,6 +502,10 @@ macro(ei_set_build_string) set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-64bit) endif() + if(EIGEN_TEST_CXX11) + set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-cxx11) + endif() + if(EIGEN_BUILD_STRING_SUFFIX) set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-${EIGEN_BUILD_STRING_SUFFIX}) endif() diff --git a/doc/CustomizingEigen.dox b/doc/CustomizingEigen.dox index 0850863aa..cb25f4ec9 100644 --- a/doc/CustomizingEigen.dox +++ b/doc/CustomizingEigen.dox @@ -157,6 +157,67 @@ inline adouble abs2(const adouble& x) { return x*x; } #endif // ADOLCSUPPORT_H \endcode +This other example adds support for the \c mpq_class type from GMP. It shows in particular how to change the way Eigen picks the best pivot during LU factorization. It selects the coefficient with the highest score, where the score is by default the absolute value of a number, but we can define a different score, for instance to prefer pivots with a more compact representation (this is an example, not a recommendation). Note that the scores should always be non-negative and only zero is allowed to have a score of zero. Also, this can interact badly with thresholds for inexact scalar types. + +\code +#include +#include +#include + +namespace Eigen { + template struct NumTraits; + template<> struct NumTraits + { + typedef mpq_class Real; + typedef mpq_class NonInteger; + typedef mpq_class Nested; + + static inline Real epsilon() { return 0; } + static inline Real dummy_precision() { return 0; } + + enum { + IsInteger = 0, + IsSigned = 1, + IsComplex = 0, + RequireInitialization = 1, + ReadCost = 6, + AddCost = 150, + MulCost = 100 + }; + }; + + namespace internal { + template<> + struct significant_decimals_impl + { + // Infinite precision when printing + static inline int run() { return 0; } + }; + + template<> struct scalar_score_coeff_op { + struct result_type : boost::totally_ordered1 { + std::size_t len; + result_type(int i = 0) : len(i) {} // Eigen uses Score(0) and Score() + result_type(mpq_class const& q) : + len(mpz_size(q.get_num_mpz_t())+ + mpz_size(q.get_den_mpz_t())-1) {} + friend bool operator<(result_type x, result_type y) { + // 0 is the worst possible pivot + if (x.len == 0) return y.len > 0; + if (y.len == 0) return false; + // Prefer a pivot with a small representation + return x.len > y.len; + } + friend bool operator==(result_type x, result_type y) { + // Only used to test if the score is 0 + return x.len == y.len; + } + }; + result_type operator()(mpq_class const& x) const { return x; } + }; + } +} +\endcode \sa \ref TopicPreprocessorDirectives diff --git a/doc/SparseLinearSystems.dox b/doc/SparseLinearSystems.dox index 147b55376..13741280a 100644 --- a/doc/SparseLinearSystems.dox +++ b/doc/SparseLinearSystems.dox @@ -21,6 +21,9 @@ They are summarized in the following table: ConjugateGradient\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlinkClassic iterative CGSPDPreconditionning built-in, MPL2 Recommended for large symmetric problems (e.g., 3D Poisson eq.) +LSCG\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlinkCG for rectangular least-square problemRectangularPreconditionning + built-in, MPL2 + Solve for min |A'Ax-b|^2 without forming A'A BiCGSTAB\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlinkIterative stabilized bi-conjugate gradientSquarePreconditionning built-in, MPL2 To speedup the convergence, try it with the \ref IncompleteLUT preconditioner. diff --git a/failtest/CMakeLists.txt b/failtest/CMakeLists.txt index c8795a344..d3e82ecd9 100644 --- a/failtest/CMakeLists.txt +++ b/failtest/CMakeLists.txt @@ -47,6 +47,18 @@ ei_add_failtest("sparse_ref_3") ei_add_failtest("sparse_ref_4") ei_add_failtest("sparse_ref_5") +ei_add_failtest("partialpivlu_int") +ei_add_failtest("fullpivlu_int") +ei_add_failtest("llt_int") +ei_add_failtest("ldlt_int") +ei_add_failtest("qr_int") +ei_add_failtest("colpivqr_int") +ei_add_failtest("fullpivqr_int") +ei_add_failtest("jacobisvd_int") +ei_add_failtest("bdcsvd_int") +ei_add_failtest("eigensolver_int") +ei_add_failtest("eigensolver_cplx") + if (EIGEN_FAILTEST_FAILURE_COUNT) message(FATAL_ERROR "${EIGEN_FAILTEST_FAILURE_COUNT} out of ${EIGEN_FAILTEST_COUNT} failtests FAILED. " diff --git a/failtest/bdcsvd_int.cpp b/failtest/bdcsvd_int.cpp new file mode 100644 index 000000000..670752cf5 --- /dev/null +++ b/failtest/bdcsvd_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/SVD" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + BDCSVD > qr(Matrix::Random(10,10)); +} diff --git a/failtest/colpivqr_int.cpp b/failtest/colpivqr_int.cpp new file mode 100644 index 000000000..db11910d4 --- /dev/null +++ b/failtest/colpivqr_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/QR" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + ColPivHouseholderQR > qr(Matrix::Random(10,10)); +} diff --git a/failtest/eigensolver_cplx.cpp b/failtest/eigensolver_cplx.cpp new file mode 100644 index 000000000..c2e21e189 --- /dev/null +++ b/failtest/eigensolver_cplx.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/Eigenvalues" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR std::complex +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + EigenSolver > eig(Matrix::Random(10,10)); +} diff --git a/failtest/eigensolver_int.cpp b/failtest/eigensolver_int.cpp new file mode 100644 index 000000000..eda8dc20b --- /dev/null +++ b/failtest/eigensolver_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/Eigenvalues" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + EigenSolver > eig(Matrix::Random(10,10)); +} diff --git a/failtest/fullpivlu_int.cpp b/failtest/fullpivlu_int.cpp new file mode 100644 index 000000000..e9d2c6eb3 --- /dev/null +++ b/failtest/fullpivlu_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/LU" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + FullPivLU > lu(Matrix::Random(10,10)); +} diff --git a/failtest/fullpivqr_int.cpp b/failtest/fullpivqr_int.cpp new file mode 100644 index 000000000..d182a7b6b --- /dev/null +++ b/failtest/fullpivqr_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/QR" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + FullPivHouseholderQR > qr(Matrix::Random(10,10)); +} diff --git a/failtest/jacobisvd_int.cpp b/failtest/jacobisvd_int.cpp new file mode 100644 index 000000000..12790aef1 --- /dev/null +++ b/failtest/jacobisvd_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/SVD" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + JacobiSVD > qr(Matrix::Random(10,10)); +} diff --git a/failtest/ldlt_int.cpp b/failtest/ldlt_int.cpp new file mode 100644 index 000000000..243e45746 --- /dev/null +++ b/failtest/ldlt_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/Cholesky" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + LDLT > ldlt(Matrix::Random(10,10)); +} diff --git a/failtest/llt_int.cpp b/failtest/llt_int.cpp new file mode 100644 index 000000000..cb020650d --- /dev/null +++ b/failtest/llt_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/Cholesky" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + LLT > llt(Matrix::Random(10,10)); +} diff --git a/failtest/partialpivlu_int.cpp b/failtest/partialpivlu_int.cpp new file mode 100644 index 000000000..98ef282ea --- /dev/null +++ b/failtest/partialpivlu_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/LU" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + PartialPivLU > lu(Matrix::Random(10,10)); +} diff --git a/failtest/qr_int.cpp b/failtest/qr_int.cpp new file mode 100644 index 000000000..ce200e818 --- /dev/null +++ b/failtest/qr_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/QR" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + HouseholderQR > qr(Matrix::Random(10,10)); +} diff --git a/scripts/buildtests.in b/scripts/buildtests.in index 7026373cf..d2fd10276 100755 --- a/scripts/buildtests.in +++ b/scripts/buildtests.in @@ -14,9 +14,9 @@ targets_to_make=`echo "$TESTSLIST" | egrep "$1" | xargs echo` if [ -n "${EIGEN_MAKE_ARGS:+x}" ] then - make $targets_to_make ${EIGEN_MAKE_ARGS} + @CMAKE_MAKE_PROGRAM@ $targets_to_make ${EIGEN_MAKE_ARGS} else - make $targets_to_make + @CMAKE_MAKE_PROGRAM@ $targets_to_make @EIGEN_TEST_BUILD_FLAGS@ fi exit $? diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 168749634..393c35b57 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -139,6 +139,7 @@ endif(TEST_LIB) set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official") add_custom_target(BuildOfficial) +ei_add_test(rand) ei_add_test(meta) ei_add_test(sizeof) ei_add_test(dynalloc) @@ -226,6 +227,7 @@ ei_add_test(stdvector_overload) ei_add_test(stdlist) ei_add_test(stddeque) ei_add_test(sparse_basic) +ei_add_test(sparse_block) ei_add_test(sparse_vector) ei_add_test(sparse_product) ei_add_test(sparse_ref) @@ -234,6 +236,7 @@ ei_add_test(sparse_permutations) ei_add_test(simplicial_cholesky) ei_add_test(conjugate_gradient) ei_add_test(bicgstab) +ei_add_test(lscg) ei_add_test(sparselu) ei_add_test(sparseqr) ei_add_test(umeyama) diff --git a/test/bicgstab.cpp b/test/bicgstab.cpp index f327e2fac..7a9a11330 100644 --- a/test/bicgstab.cpp +++ b/test/bicgstab.cpp @@ -10,11 +10,11 @@ #include "sparse_solver.h" #include -template void test_bicgstab_T() +template void test_bicgstab_T() { - BiCGSTAB, DiagonalPreconditioner > bicgstab_colmajor_diag; - BiCGSTAB, IdentityPreconditioner > bicgstab_colmajor_I; - BiCGSTAB, IncompleteLUT > bicgstab_colmajor_ilut; + BiCGSTAB, DiagonalPreconditioner > bicgstab_colmajor_diag; + BiCGSTAB, IdentityPreconditioner > bicgstab_colmajor_I; + BiCGSTAB, IncompleteLUT > bicgstab_colmajor_ilut; //BiCGSTAB, SSORPreconditioner > bicgstab_colmajor_ssor; CALL_SUBTEST( check_sparse_square_solving(bicgstab_colmajor_diag) ); @@ -25,6 +25,7 @@ template void test_bicgstab_T() void test_bicgstab() { - CALL_SUBTEST_1(test_bicgstab_T()); - CALL_SUBTEST_2(test_bicgstab_T >()); + CALL_SUBTEST_1((test_bicgstab_T()) ); + CALL_SUBTEST_2((test_bicgstab_T, int>())); + CALL_SUBTEST_3((test_bicgstab_T())); } diff --git a/test/conjugate_gradient.cpp b/test/conjugate_gradient.cpp index 019cc4d64..9622fd86d 100644 --- a/test/conjugate_gradient.cpp +++ b/test/conjugate_gradient.cpp @@ -10,13 +10,14 @@ #include "sparse_solver.h" #include -template void test_conjugate_gradient_T() +template void test_conjugate_gradient_T() { - ConjugateGradient, Lower > cg_colmajor_lower_diag; - ConjugateGradient, Upper > cg_colmajor_upper_diag; - ConjugateGradient, Lower|Upper> cg_colmajor_loup_diag; - ConjugateGradient, Lower, IdentityPreconditioner> cg_colmajor_lower_I; - ConjugateGradient, Upper, IdentityPreconditioner> cg_colmajor_upper_I; + typedef SparseMatrix SparseMatrixType; + ConjugateGradient cg_colmajor_lower_diag; + ConjugateGradient cg_colmajor_upper_diag; + ConjugateGradient cg_colmajor_loup_diag; + ConjugateGradient cg_colmajor_lower_I; + ConjugateGradient cg_colmajor_upper_I; CALL_SUBTEST( check_sparse_spd_solving(cg_colmajor_lower_diag) ); CALL_SUBTEST( check_sparse_spd_solving(cg_colmajor_upper_diag) ); @@ -27,6 +28,7 @@ template void test_conjugate_gradient_T() void test_conjugate_gradient() { - CALL_SUBTEST_1(test_conjugate_gradient_T()); - CALL_SUBTEST_2(test_conjugate_gradient_T >()); + CALL_SUBTEST_1(( test_conjugate_gradient_T() )); + CALL_SUBTEST_2(( test_conjugate_gradient_T, int>() )); + CALL_SUBTEST_3(( test_conjugate_gradient_T() )); } diff --git a/test/lscg.cpp b/test/lscg.cpp new file mode 100644 index 000000000..daa62a954 --- /dev/null +++ b/test/lscg.cpp @@ -0,0 +1,29 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "sparse_solver.h" +#include + +template void test_lscg_T() +{ + LeastSquaresConjugateGradient > lscg_colmajor_diag; + LeastSquaresConjugateGradient, IdentityPreconditioner> lscg_colmajor_I; + + CALL_SUBTEST( check_sparse_square_solving(lscg_colmajor_diag) ); + CALL_SUBTEST( check_sparse_square_solving(lscg_colmajor_I) ); + + CALL_SUBTEST( check_sparse_leastsquare_solving(lscg_colmajor_diag) ); + CALL_SUBTEST( check_sparse_leastsquare_solving(lscg_colmajor_I) ); +} + +void test_lscg() +{ + CALL_SUBTEST_1(test_lscg_T()); + CALL_SUBTEST_2(test_lscg_T >()); +} diff --git a/test/main.h b/test/main.h index 1f937690c..ecf0c6924 100644 --- a/test/main.h +++ b/test/main.h @@ -42,13 +42,19 @@ #include #include #include +#if __cplusplus >= 201103L +#include +#ifdef EIGEN_USE_THREADS +#include +#endif +#endif // To test that all calls from Eigen code to std::min() and std::max() are // protected by parenthesis against macro expansion, the min()/max() macros // are defined here and any not-parenthesized min/max call will cause a // compiler error. -//#define min(A,B) please_protect_your_min_with_parentheses -//#define max(A,B) please_protect_your_max_with_parentheses +#define min(A,B) please_protect_your_min_with_parentheses +#define max(A,B) please_protect_your_max_with_parentheses #define FORBIDDEN_IDENTIFIER (this_identifier_is_forbidden_to_avoid_clashes) this_identifier_is_forbidden_to_avoid_clashes // B0 is defined in POSIX header termios.h diff --git a/test/product_extra.cpp b/test/product_extra.cpp index 744a1ef7f..1b4c6c33c 100644 --- a/test/product_extra.cpp +++ b/test/product_extra.cpp @@ -109,8 +109,33 @@ void mat_mat_scalar_scalar_product() double det = 6.0, wt = 0.5; VERIFY_IS_APPROX(dNdxy.transpose()*dNdxy*det*wt, det*wt*dNdxy.transpose()*dNdxy); } + +template +void zero_sized_objects(const MatrixType& m) +{ + Index rows = m.rows(); + Index cols = m.cols(); -void zero_sized_objects() + { + MatrixType res, a(rows,0), b(0,cols); + VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(rows,cols) ); + VERIFY_IS_APPROX( (res=a*a.transpose()), MatrixType::Zero(rows,rows) ); + VERIFY_IS_APPROX( (res=b.transpose()*b), MatrixType::Zero(cols,cols) ); + VERIFY_IS_APPROX( (res=b.transpose()*a.transpose()), MatrixType::Zero(cols,rows) ); + } + + { + MatrixType res, a(rows,cols), b(cols,0); + res = a*b; + VERIFY(res.rows()==rows && res.cols()==0); + b.resize(0,rows); + res = b*a; + VERIFY(res.rows()==0 && res.cols()==cols); + } +} + + +void bug_127() { // Bug 127 // @@ -171,7 +196,8 @@ void test_product_extra() CALL_SUBTEST_2( mat_mat_scalar_scalar_product() ); CALL_SUBTEST_3( product_extra(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE/2), internal::random(1,EIGEN_TEST_MAX_SIZE/2))) ); CALL_SUBTEST_4( product_extra(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE/2), internal::random(1,EIGEN_TEST_MAX_SIZE/2))) ); + CALL_SUBTEST_1( zero_sized_objects(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); } - CALL_SUBTEST_5( zero_sized_objects() ); + CALL_SUBTEST_5( bug_127() ); CALL_SUBTEST_6( unaligned_objects() ); } diff --git a/test/rand.cpp b/test/rand.cpp new file mode 100644 index 000000000..7c8068a3b --- /dev/null +++ b/test/rand.cpp @@ -0,0 +1,88 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template Scalar check_in_range(Scalar x, Scalar y) +{ + Scalar r = internal::random(x,y); + VERIFY(r>=x); + if(y>=x) + { + VERIFY(r<=y); + } + return r; +} + +template void check_all_in_range(Scalar x, Scalar y) +{ + Array mask(y-x+1); + mask.fill(0); + long n = (y-x+1)*32; + for(long k=0; k0).all() ); +} + +void test_rand() +{ + long long_ref = NumTraits::highest()/10; + char char_offset = (std::min)(g_repeat,64); + char short_offset = (std::min)(g_repeat,16000); + + for(int i = 0; i < g_repeat*10; i++) { + CALL_SUBTEST(check_in_range(10,11)); + CALL_SUBTEST(check_in_range(1.24234523,1.24234523)); + CALL_SUBTEST(check_in_range(-1,1)); + CALL_SUBTEST(check_in_range(-1432.2352,-1432.2352)); + + CALL_SUBTEST(check_in_range(10,11)); + CALL_SUBTEST(check_in_range(1.24234523,1.24234523)); + CALL_SUBTEST(check_in_range(-1,1)); + CALL_SUBTEST(check_in_range(-1432.2352,-1432.2352)); + + CALL_SUBTEST(check_in_range(0,-1)); + CALL_SUBTEST(check_in_range(0,-1)); + CALL_SUBTEST(check_in_range(0,-1)); + CALL_SUBTEST(check_in_range(-673456,673456)); + CALL_SUBTEST(check_in_range(-24345,24345)); + CALL_SUBTEST(check_in_range(-long_ref,long_ref)); + } + + CALL_SUBTEST(check_all_in_range(11,11)); + CALL_SUBTEST(check_all_in_range(11,11+char_offset)); + CALL_SUBTEST(check_all_in_range(-5,5)); + CALL_SUBTEST(check_all_in_range(-11-char_offset,-11)); + CALL_SUBTEST(check_all_in_range(-126,-126+char_offset)); + CALL_SUBTEST(check_all_in_range(126-char_offset,126)); + CALL_SUBTEST(check_all_in_range(-126,126)); + + CALL_SUBTEST(check_all_in_range(11,11)); + CALL_SUBTEST(check_all_in_range(11,11+short_offset)); + CALL_SUBTEST(check_all_in_range(-5,5)); + CALL_SUBTEST(check_all_in_range(-11-short_offset,-11)); + CALL_SUBTEST(check_all_in_range(-24345,-24345+short_offset)); + CALL_SUBTEST(check_all_in_range(24345,24345+short_offset)); + + CALL_SUBTEST(check_all_in_range(11,11)); + CALL_SUBTEST(check_all_in_range(11,11+g_repeat)); + CALL_SUBTEST(check_all_in_range(-5,5)); + CALL_SUBTEST(check_all_in_range(-11-g_repeat,-11)); + CALL_SUBTEST(check_all_in_range(-673456,-673456+g_repeat)); + CALL_SUBTEST(check_all_in_range(673456,673456+g_repeat)); + + CALL_SUBTEST(check_all_in_range(11,11)); + CALL_SUBTEST(check_all_in_range(11,11+g_repeat)); + CALL_SUBTEST(check_all_in_range(-5,5)); + CALL_SUBTEST(check_all_in_range(-11-g_repeat,-11)); + CALL_SUBTEST(check_all_in_range(-long_ref,-long_ref+g_repeat)); + CALL_SUBTEST(check_all_in_range( long_ref, long_ref+g_repeat)); +} diff --git a/test/ref.cpp b/test/ref.cpp index b9470213c..fbe2c450f 100644 --- a/test/ref.cpp +++ b/test/ref.cpp @@ -228,6 +228,28 @@ void call_ref() VERIFY_EVALUATION_COUNT( call_ref_7(c,c), 0); } +typedef Matrix RowMatrixXd; +int test_ref_overload_fun1(Ref ) { return 1; } +int test_ref_overload_fun1(Ref ) { return 2; } +int test_ref_overload_fun1(Ref ) { return 3; } + +int test_ref_overload_fun2(Ref ) { return 4; } +int test_ref_overload_fun2(Ref ) { return 5; } + +// See also bug 969 +void test_ref_overloads() +{ + MatrixXd Ad, Bd; + RowMatrixXd rAd, rBd; + VERIFY( test_ref_overload_fun1(Ad)==1 ); + VERIFY( test_ref_overload_fun1(rAd)==2 ); + + MatrixXf Af, Bf; + VERIFY( test_ref_overload_fun2(Ad)==4 ); + VERIFY( test_ref_overload_fun2(Ad+Bd)==4 ); + VERIFY( test_ref_overload_fun2(Af+Bf)==5 ); +} + void test_ref() { for(int i = 0; i < g_repeat; i++) { @@ -248,4 +270,6 @@ void test_ref() CALL_SUBTEST_5( ref_matrix(MatrixXi(internal::random(1,10),internal::random(1,10))) ); CALL_SUBTEST_6( call_ref() ); } + + CALL_SUBTEST_7( test_ref_overloads() ); } diff --git a/test/simplicial_cholesky.cpp b/test/simplicial_cholesky.cpp index 786468421..b7cc2d351 100644 --- a/test/simplicial_cholesky.cpp +++ b/test/simplicial_cholesky.cpp @@ -9,16 +9,17 @@ #include "sparse_solver.h" -template void test_simplicial_cholesky_T() +template void test_simplicial_cholesky_T() { - SimplicialCholesky, Lower> chol_colmajor_lower_amd; - SimplicialCholesky, Upper> chol_colmajor_upper_amd; - SimplicialLLT, Lower> llt_colmajor_lower_amd; - SimplicialLLT, Upper> llt_colmajor_upper_amd; - SimplicialLDLT, Lower> ldlt_colmajor_lower_amd; - SimplicialLDLT, Upper> ldlt_colmajor_upper_amd; - SimplicialLDLT, Lower, NaturalOrdering > ldlt_colmajor_lower_nat; - SimplicialLDLT, Upper, NaturalOrdering > ldlt_colmajor_upper_nat; + typedef SparseMatrix SparseMatrixType; + SimplicialCholesky chol_colmajor_lower_amd; + SimplicialCholesky chol_colmajor_upper_amd; + SimplicialLLT< SparseMatrixType, Lower> llt_colmajor_lower_amd; + SimplicialLLT< SparseMatrixType, Upper> llt_colmajor_upper_amd; + SimplicialLDLT< SparseMatrixType, Lower> ldlt_colmajor_lower_amd; + SimplicialLDLT< SparseMatrixType, Upper> ldlt_colmajor_upper_amd; + SimplicialLDLT< SparseMatrixType, Lower, NaturalOrdering > ldlt_colmajor_lower_nat; + SimplicialLDLT< SparseMatrixType, Upper, NaturalOrdering > ldlt_colmajor_upper_nat; check_sparse_spd_solving(chol_colmajor_lower_amd); check_sparse_spd_solving(chol_colmajor_upper_amd); @@ -40,6 +41,7 @@ template void test_simplicial_cholesky_T() void test_simplicial_cholesky() { - CALL_SUBTEST_1(test_simplicial_cholesky_T()); - CALL_SUBTEST_2(test_simplicial_cholesky_T >()); + CALL_SUBTEST_1(( test_simplicial_cholesky_T() )); + CALL_SUBTEST_2(( test_simplicial_cholesky_T, int>() )); + CALL_SUBTEST_3(( test_simplicial_cholesky_T() )); } diff --git a/test/sparse.h b/test/sparse.h index 81ab9e873..3c3a0c9be 100644 --- a/test/sparse.h +++ b/test/sparse.h @@ -53,15 +53,15 @@ enum { * \param zeroCoords and nonzeroCoords allows to get the coordinate lists of the non zero, * and zero coefficients respectively. */ -template void +template void initSparse(double density, Matrix& refMat, - SparseMatrix& sparseMat, + SparseMatrix& sparseMat, int flags = 0, - std::vector >* zeroCoords = 0, - std::vector >* nonzeroCoords = 0) + std::vector >* zeroCoords = 0, + std::vector >* nonzeroCoords = 0) { - enum { IsRowMajor = SparseMatrix::IsRowMajor }; + enum { IsRowMajor = SparseMatrix::IsRowMajor }; sparseMat.setZero(); //sparseMat.reserve(int(refMat.rows()*refMat.cols()*density)); sparseMat.reserve(VectorXi::Constant(IsRowMajor ? refMat.rows() : refMat.cols(), int((1.5*density)*(IsRowMajor?refMat.cols():refMat.rows())))); @@ -93,11 +93,11 @@ initSparse(double density, //sparseMat.insertBackByOuterInner(j,i) = v; sparseMat.insertByOuterInner(j,i) = v; if (nonzeroCoords) - nonzeroCoords->push_back(Matrix (ai,aj)); + nonzeroCoords->push_back(Matrix (ai,aj)); } else if (zeroCoords) { - zeroCoords->push_back(Matrix (ai,aj)); + zeroCoords->push_back(Matrix (ai,aj)); } refMat(ai,aj) = v; } diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index 8021f4db6..75f29a2b4 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -9,6 +9,9 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +static long g_realloc_count = 0; +#define EIGEN_SPARSE_COMPRESSED_STORAGE_REALLOCATE_PLUGIN g_realloc_count++; + #include "sparse.h" template void sparse_basic(const SparseMatrixType& ref) @@ -55,48 +58,6 @@ template void sparse_basic(const SparseMatrixType& re VERIFY_IS_APPROX(m, refMat); - // test InnerIterators and Block expressions - for (Index t=0; t<10; ++t) - { - Index j = internal::random(0,cols-1); - Index i = internal::random(0,rows-1); - Index w = internal::random(1,cols-j-1); - Index h = internal::random(1,rows-i-1); - - VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w)); - for(Index c=0; c void sparse_basic(const SparseMatrixType& re DenseMatrix m1(rows,cols); m1.setZero(); SparseMatrixType m2(rows,cols); - if(internal::random()%2) - m2.reserve(VectorXi::Constant(m2.outerSize(), 2)); + bool call_reserve = internal::random()%2; + Index nnz = internal::random(1,int(rows)/2); + if(call_reserve) + { + if(internal::random()%2) + m2.reserve(VectorXi::Constant(m2.outerSize(), int(nnz))); + else + m2.reserve(m2.outerSize() * nnz); + } + g_realloc_count = 0; for (Index j=0; j(0,rows-1); if (m1.coeff(i,j)==Scalar(0)) m2.insert(i,j) = m1(i,j) = internal::random(); } } + + if(call_reserve && !SparseMatrixType::IsRowMajor) + { + VERIFY(g_realloc_count==0); + } + m2.finalize(); VERIFY_IS_APPROX(m2,m1); } @@ -167,82 +142,6 @@ template void sparse_basic(const SparseMatrixType& re VERIFY_IS_APPROX(m2,m1); } - // test innerVector() - { - DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); - SparseMatrixType m2(rows, cols); - initSparse(density, refMat2, m2); - Index j0 = internal::random(0,outer-1); - Index j1 = internal::random(0,outer-1); - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.row(j0)); - else - VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.col(j0)); - - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.row(j0)+refMat2.row(j1)); - else - VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.col(j0)+refMat2.col(j1)); - - SparseMatrixType m3(rows,cols); - m3.reserve(VectorXi::Constant(outer,int(inner/2))); - for(Index j=0; j0) - VERIFY(j==numext::real(m3.innerVector(j).lastCoeff())); - } - m3.makeCompressed(); - for(Index j=0; j<(std::min)(outer, inner); ++j) - { - VERIFY(j==numext::real(m3.innerVector(j).nonZeros())); - if(j>0) - VERIFY(j==numext::real(m3.innerVector(j).lastCoeff())); - } - - VERIFY(m3.innerVector(j0).nonZeros() == m3.transpose().innerVector(j0).nonZeros()); - -// m2.innerVector(j0) = 2*m2.innerVector(j1); -// refMat2.col(j0) = 2*refMat2.col(j1); -// VERIFY_IS_APPROX(m2, refMat2); - } - - // test innerVectors() - { - DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); - SparseMatrixType m2(rows, cols); - initSparse(density, refMat2, m2); - if(internal::random(0,1)>0.5) m2.makeCompressed(); - Index j0 = internal::random(0,outer-2); - Index j1 = internal::random(0,outer-2); - Index n0 = internal::random(1,outer-(std::max)(j0,j1)); - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(j0,0,n0,cols)); - else - VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(0,j0,rows,n0)); - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0), - refMat2.middleRows(j0,n0)+refMat2.middleRows(j1,n0)); - else - VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0), - refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0)); - - VERIFY_IS_APPROX(m2, refMat2); - - VERIFY(m2.innerVectors(j0,n0).nonZeros() == m2.transpose().innerVectors(j0,n0).nonZeros()); - - m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0); - if(SparseMatrixType::IsRowMajor) - refMat2.middleRows(j0,n0) = (refMat2.middleRows(j0,n0) + refMat2.middleRows(j1,n0)).eval(); - else - refMat2.middleCols(j0,n0) = (refMat2.middleCols(j0,n0) + refMat2.middleCols(j1,n0)).eval(); - - VERIFY_IS_APPROX(m2, refMat2); - } - // test basic computations { DenseMatrix refM1 = DenseMatrix::Zero(rows, cols); @@ -313,40 +212,6 @@ template void sparse_basic(const SparseMatrixType& re VERIFY(m2.isApprox(m3)); } - - - // test generic blocks - { - DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); - SparseMatrixType m2(rows, cols); - initSparse(density, refMat2, m2); - Index j0 = internal::random(0,outer-2); - Index j1 = internal::random(0,outer-2); - Index n0 = internal::random(1,outer-(std::max)(j0,j1)); - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.block(j0,0,n0,cols), refMat2.block(j0,0,n0,cols)); - else - VERIFY_IS_APPROX(m2.block(0,j0,rows,n0), refMat2.block(0,j0,rows,n0)); - - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.block(j0,0,n0,cols)+m2.block(j1,0,n0,cols), - refMat2.block(j0,0,n0,cols)+refMat2.block(j1,0,n0,cols)); - else - VERIFY_IS_APPROX(m2.block(0,j0,rows,n0)+m2.block(0,j1,rows,n0), - refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0)); - - Index i = internal::random(0,m2.outerSize()-1); - if(SparseMatrixType::IsRowMajor) { - m2.innerVector(i) = m2.innerVector(i) * s1; - refMat2.row(i) = refMat2.row(i) * s1; - VERIFY_IS_APPROX(m2,refMat2); - } else { - m2.innerVector(i) = m2.innerVector(i) * s1; - refMat2.col(i) = refMat2.col(i) * s1; - VERIFY_IS_APPROX(m2,refMat2); - } - } - // test prune { SparseMatrixType m2(rows, cols); @@ -575,7 +440,7 @@ void big_sparse_triplet(Index rows, Index cols, double density) { void test_sparse_basic() { for(int i = 0; i < g_repeat; i++) { - int r = Eigen::internal::random(1,100), c = Eigen::internal::random(1,100); + int r = Eigen::internal::random(1,200), c = Eigen::internal::random(1,200); if(Eigen::internal::random(0,4) == 0) { r = c; // check square matrices in 25% of tries } @@ -585,11 +450,17 @@ void test_sparse_basic() CALL_SUBTEST_2(( sparse_basic(SparseMatrix, ColMajor>(r, c)) )); CALL_SUBTEST_2(( sparse_basic(SparseMatrix, RowMajor>(r, c)) )); CALL_SUBTEST_1(( sparse_basic(SparseMatrix(r, c)) )); - CALL_SUBTEST_1(( sparse_basic(SparseMatrix(r, c)) )); - CALL_SUBTEST_1(( sparse_basic(SparseMatrix(r, c)) )); + CALL_SUBTEST_5(( sparse_basic(SparseMatrix(r, c)) )); + CALL_SUBTEST_5(( sparse_basic(SparseMatrix(r, c)) )); - CALL_SUBTEST_1(( sparse_basic(SparseMatrix(short(r), short(c))) )); - CALL_SUBTEST_1(( sparse_basic(SparseMatrix(short(r), short(c))) )); + r = Eigen::internal::random(1,100); + c = Eigen::internal::random(1,100); + if(Eigen::internal::random(0,4) == 0) { + r = c; // check square matrices in 25% of tries + } + + CALL_SUBTEST_6(( sparse_basic(SparseMatrix(short(r), short(c))) )); + CALL_SUBTEST_6(( sparse_basic(SparseMatrix(short(r), short(c))) )); } // Regression test for bug 900: (manually insert higher values here, if you have enough RAM): diff --git a/test/sparse_block.cpp b/test/sparse_block.cpp new file mode 100644 index 000000000..8a6e0687c --- /dev/null +++ b/test/sparse_block.cpp @@ -0,0 +1,254 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "sparse.h" + +template void sparse_block(const SparseMatrixType& ref) +{ + const Index rows = ref.rows(); + const Index cols = ref.cols(); + const Index inner = ref.innerSize(); + const Index outer = ref.outerSize(); + + typedef typename SparseMatrixType::Scalar Scalar; + + double density = (std::max)(8./(rows*cols), 0.01); + typedef Matrix DenseMatrix; + typedef Matrix DenseVector; + typedef Matrix RowDenseVector; + + Scalar s1 = internal::random(); + { + SparseMatrixType m(rows, cols); + DenseMatrix refMat = DenseMatrix::Zero(rows, cols); + initSparse(density, refMat, m); + + VERIFY_IS_APPROX(m, refMat); + + // test InnerIterators and Block expressions + for (int t=0; t<10; ++t) + { + Index j = internal::random(0,cols-2); + Index i = internal::random(0,rows-2); + Index w = internal::random(1,cols-j); + Index h = internal::random(1,rows-i); + + VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w)); + for(Index c=0; c(density, refMat2, m2); + Index j0 = internal::random(0,outer-1); + Index j1 = internal::random(0,outer-1); + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.row(j0)); + else + VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.col(j0)); + + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.row(j0)+refMat2.row(j1)); + else + VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.col(j0)+refMat2.col(j1)); + + SparseMatrixType m3(rows,cols); + m3.reserve(VectorXi::Constant(outer,int(inner/2))); + for(Index j=0; j0) + VERIFY(j==numext::real(m3.innerVector(j).lastCoeff())); + } + m3.makeCompressed(); + for(Index j=0; j<(std::min)(outer, inner); ++j) + { + VERIFY(j==numext::real(m3.innerVector(j).nonZeros())); + if(j>0) + VERIFY(j==numext::real(m3.innerVector(j).lastCoeff())); + } + + VERIFY(m3.innerVector(j0).nonZeros() == m3.transpose().innerVector(j0).nonZeros()); + +// m2.innerVector(j0) = 2*m2.innerVector(j1); +// refMat2.col(j0) = 2*refMat2.col(j1); +// VERIFY_IS_APPROX(m2, refMat2); + } + + // test innerVectors() + { + DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); + SparseMatrixType m2(rows, cols); + initSparse(density, refMat2, m2); + if(internal::random(0,1)>0.5) m2.makeCompressed(); + Index j0 = internal::random(0,outer-2); + Index j1 = internal::random(0,outer-2); + Index n0 = internal::random(1,outer-(std::max)(j0,j1)); + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(j0,0,n0,cols)); + else + VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(0,j0,rows,n0)); + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0), + refMat2.middleRows(j0,n0)+refMat2.middleRows(j1,n0)); + else + VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0), + refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0)); + + VERIFY_IS_APPROX(m2, refMat2); + + VERIFY(m2.innerVectors(j0,n0).nonZeros() == m2.transpose().innerVectors(j0,n0).nonZeros()); + + m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0); + if(SparseMatrixType::IsRowMajor) + refMat2.middleRows(j0,n0) = (refMat2.middleRows(j0,n0) + refMat2.middleRows(j1,n0)).eval(); + else + refMat2.middleCols(j0,n0) = (refMat2.middleCols(j0,n0) + refMat2.middleCols(j1,n0)).eval(); + + VERIFY_IS_APPROX(m2, refMat2); + } + + // test generic blocks + { + DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); + SparseMatrixType m2(rows, cols); + initSparse(density, refMat2, m2); + Index j0 = internal::random(0,outer-2); + Index j1 = internal::random(0,outer-2); + Index n0 = internal::random(1,outer-(std::max)(j0,j1)); + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.block(j0,0,n0,cols), refMat2.block(j0,0,n0,cols)); + else + VERIFY_IS_APPROX(m2.block(0,j0,rows,n0), refMat2.block(0,j0,rows,n0)); + + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.block(j0,0,n0,cols)+m2.block(j1,0,n0,cols), + refMat2.block(j0,0,n0,cols)+refMat2.block(j1,0,n0,cols)); + else + VERIFY_IS_APPROX(m2.block(0,j0,rows,n0)+m2.block(0,j1,rows,n0), + refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0)); + + Index i = internal::random(0,m2.outerSize()-1); + if(SparseMatrixType::IsRowMajor) { + m2.innerVector(i) = m2.innerVector(i) * s1; + refMat2.row(i) = refMat2.row(i) * s1; + VERIFY_IS_APPROX(m2,refMat2); + } else { + m2.innerVector(i) = m2.innerVector(i) * s1; + refMat2.col(i) = refMat2.col(i) * s1; + VERIFY_IS_APPROX(m2,refMat2); + } + + Index r0 = internal::random(0,rows-2); + Index c0 = internal::random(0,cols-2); + Index r1 = internal::random(1,rows-r0); + Index c1 = internal::random(1,cols-c0); + + VERIFY_IS_APPROX(DenseVector(m2.col(c0)), refMat2.col(c0)); + VERIFY_IS_APPROX(m2.col(c0), refMat2.col(c0)); + + VERIFY_IS_APPROX(RowDenseVector(m2.row(r0)), refMat2.row(r0)); + VERIFY_IS_APPROX(m2.row(r0), refMat2.row(r0)); + + VERIFY_IS_APPROX(m2.block(r0,c0,r1,c1), refMat2.block(r0,c0,r1,c1)); + VERIFY_IS_APPROX((2*m2).block(r0,c0,r1,c1), (2*refMat2).block(r0,c0,r1,c1)); + } +} + +void test_sparse_block() +{ + for(int i = 0; i < g_repeat; i++) { + int r = Eigen::internal::random(1,200), c = Eigen::internal::random(1,200); + if(Eigen::internal::random(0,4) == 0) { + r = c; // check square matrices in 25% of tries + } + EIGEN_UNUSED_VARIABLE(r+c); + CALL_SUBTEST_1(( sparse_block(SparseMatrix(1, 1)) )); + CALL_SUBTEST_1(( sparse_block(SparseMatrix(8, 8)) )); + CALL_SUBTEST_1(( sparse_block(SparseMatrix(r, c)) )); + CALL_SUBTEST_2(( sparse_block(SparseMatrix, ColMajor>(r, c)) )); + CALL_SUBTEST_2(( sparse_block(SparseMatrix, RowMajor>(r, c)) )); + + CALL_SUBTEST_3(( sparse_block(SparseMatrix(r, c)) )); + CALL_SUBTEST_3(( sparse_block(SparseMatrix(r, c)) )); + + r = Eigen::internal::random(1,100); + c = Eigen::internal::random(1,100); + if(Eigen::internal::random(0,4) == 0) { + r = c; // check square matrices in 25% of tries + } + + CALL_SUBTEST_4(( sparse_block(SparseMatrix(short(r), short(c))) )); + CALL_SUBTEST_4(( sparse_block(SparseMatrix(short(r), short(c))) )); + } +} diff --git a/test/sparse_solver.h b/test/sparse_solver.h index f0a4691e3..a078851c3 100644 --- a/test/sparse_solver.h +++ b/test/sparse_solver.h @@ -17,9 +17,9 @@ void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A, typedef typename Mat::Scalar Scalar; typedef typename Mat::StorageIndex StorageIndex; - DenseRhs refX = dA.lu().solve(db); + DenseRhs refX = dA.householderQr().solve(db); { - Rhs x(b.rows(), b.cols()); + Rhs x(A.cols(), b.cols()); Rhs oldb = b; solver.compute(A); @@ -94,7 +94,7 @@ void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A, // test dense Block as the result and rhs: { - DenseRhs x(db.rows(), db.cols()); + DenseRhs x(refX.rows(), refX.cols()); DenseRhs oldb(db); x.setZero(); x.block(0,0,x.rows(),x.cols()) = solver.solve(db.block(0,0,db.rows(),db.cols())); @@ -119,7 +119,7 @@ void check_sparse_solving_real_cases(Solver& solver, const typename Solver::Matr typedef typename Mat::Scalar Scalar; typedef typename Mat::RealScalar RealScalar; - Rhs x(b.rows(), b.cols()); + Rhs x(A.cols(), b.cols()); solver.compute(A); if (solver.info() != Success) @@ -230,7 +230,7 @@ template void check_sparse_spd_solving(Solver& solver) { typedef typename Solver::MatrixType Mat; typedef typename Mat::Scalar Scalar; - typedef SparseMatrix SpMat; + typedef SparseMatrix SpMat; typedef Matrix DenseMatrix; typedef Matrix DenseVector; @@ -304,12 +304,12 @@ template void check_sparse_spd_determinant(Solver& solver) } template -int generate_sparse_square_problem(Solver&, typename Solver::MatrixType& A, DenseMat& dA, int maxSize = 300, int options = ForceNonZeroDiag) +Index generate_sparse_square_problem(Solver&, typename Solver::MatrixType& A, DenseMat& dA, int maxSize = 300, int options = ForceNonZeroDiag) { typedef typename Solver::MatrixType Mat; typedef typename Mat::Scalar Scalar; - int size = internal::random(1,maxSize); + Index size = internal::random(1,maxSize); double density = (std::max)(8./(size*size), 0.01); A.resize(size,size); @@ -324,7 +324,7 @@ template void check_sparse_square_solving(Solver& solver) { typedef typename Solver::MatrixType Mat; typedef typename Mat::Scalar Scalar; - typedef SparseMatrix SpMat; + typedef SparseMatrix SpMat; typedef Matrix DenseMatrix; typedef Matrix DenseVector; @@ -333,7 +333,7 @@ template void check_sparse_square_solving(Solver& solver) Mat A; DenseMatrix dA; for (int i = 0; i < g_repeat; i++) { - int size = generate_sparse_square_problem(solver, A, dA); + Index size = generate_sparse_square_problem(solver, A, dA); A.makeCompressed(); DenseVector b = DenseVector::Random(size); @@ -410,3 +410,53 @@ template void check_sparse_square_abs_determinant(Solver& solve } } +template +void generate_sparse_leastsquare_problem(Solver&, typename Solver::MatrixType& A, DenseMat& dA, int maxSize = 300, int options = ForceNonZeroDiag) +{ + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + + int rows = internal::random(1,maxSize); + int cols = internal::random(1,rows); + double density = (std::max)(8./(rows*cols), 0.01); + + A.resize(rows,cols); + dA.resize(rows,cols); + + initSparse(density, dA, A, options); +} + +template void check_sparse_leastsquare_solving(Solver& solver) +{ + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + typedef SparseMatrix SpMat; + typedef Matrix DenseMatrix; + typedef Matrix DenseVector; + + int rhsCols = internal::random(1,16); + + Mat A; + DenseMatrix dA; + for (int i = 0; i < g_repeat; i++) { + generate_sparse_leastsquare_problem(solver, A, dA); + + A.makeCompressed(); + DenseVector b = DenseVector::Random(A.rows()); + DenseMatrix dB(A.rows(),rhsCols); + SpMat B(A.rows(),rhsCols); + double density = (std::max)(8./(A.rows()*rhsCols), 0.1); + initSparse(density, dB, B, ForceNonZeroDiag); + B.makeCompressed(); + check_sparse_solving(solver, A, b, dA, b); + check_sparse_solving(solver, A, dB, dA, dB); + check_sparse_solving(solver, A, B, dA, dB); + + // check only once + if(i==0) + { + b = DenseVector::Zero(A.rows()); + check_sparse_solving(solver, A, b, dA, b); + } + } +} diff --git a/test/unalignedassert.cpp b/test/unalignedassert.cpp index d8815263a..6f7b72167 100644 --- a/test/unalignedassert.cpp +++ b/test/unalignedassert.cpp @@ -81,7 +81,7 @@ void construct_at_boundary(int boundary) void unalignedassert() { - #if EIGEN_ALIGN_STATICALLY +#if EIGEN_ALIGN_STATICALLY construct_at_boundary(4); construct_at_boundary(4); construct_at_boundary(16); @@ -100,7 +100,7 @@ void unalignedassert() construct_at_boundary(4); construct_at_boundary(EIGEN_ALIGN_BYTES); construct_at_boundary(16); - #endif +#endif check_unalignedassert_good(); check_unalignedassert_good(); @@ -112,11 +112,12 @@ void unalignedassert() check_unalignedassert_good >(); #if EIGEN_ALIGN_STATICALLY - if(EIGEN_ALIGN_BYTES==16) + if(EIGEN_ALIGN_BYTES>=16) { VERIFY_RAISES_ASSERT(construct_at_boundary(8)); VERIFY_RAISES_ASSERT(construct_at_boundary(8)); VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); } for(int b=8; b::Vectori >(DefaultTraversal,CompleteUnrolling))); VERIFY((test_assign(Matrix11(), Matrix()*Matrix(), - PacketSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD?DefaultTraversal:InnerVectorizedTraversal, CompleteUnrolling))); + InnerVectorizedTraversal, CompleteUnrolling))); #endif VERIFY(test_assign(MatrixXX(10,10),MatrixXX(20,20).block(10,10,2,3), diff --git a/unsupported/Eigen/CMakeLists.txt b/unsupported/Eigen/CMakeLists.txt index e06f1238b..6faf4585d 100644 --- a/unsupported/Eigen/CMakeLists.txt +++ b/unsupported/Eigen/CMakeLists.txt @@ -9,3 +9,4 @@ install(FILES ) add_subdirectory(src) +add_subdirectory(CXX11) \ No newline at end of file diff --git a/unsupported/Eigen/CXX11/CMakeLists.txt b/unsupported/Eigen/CXX11/CMakeLists.txt new file mode 100644 index 000000000..f1d9f0482 --- /dev/null +++ b/unsupported/Eigen/CXX11/CMakeLists.txt @@ -0,0 +1,8 @@ +set(Eigen_CXX11_HEADERS Core Tensor TensorSymmetry) + +install(FILES + ${Eigen_CXX11_HEADERS} + DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel + ) + +add_subdirectory(src) diff --git a/unsupported/Eigen/CXX11/src/CMakeLists.txt b/unsupported/Eigen/CXX11/src/CMakeLists.txt new file mode 100644 index 000000000..d90ee1b0f --- /dev/null +++ b/unsupported/Eigen/CXX11/src/CMakeLists.txt @@ -0,0 +1,3 @@ +add_subdirectory(Core) +add_subdirectory(Tensor) +add_subdirectory(TensorSymmetry) diff --git a/unsupported/Eigen/CXX11/src/Core/CMakeLists.txt b/unsupported/Eigen/CXX11/src/Core/CMakeLists.txt new file mode 100644 index 000000000..28571dcb9 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Core/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(util) diff --git a/unsupported/Eigen/CXX11/src/Core/util/CMakeLists.txt b/unsupported/Eigen/CXX11/src/Core/util/CMakeLists.txt new file mode 100644 index 000000000..1e3b14712 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Core/util/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_CXX11_Core_util_SRCS "*.h") + +INSTALL(FILES + ${Eigen_CXX11_Core_util_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/Core/util COMPONENT Devel + ) diff --git a/unsupported/Eigen/CXX11/src/Tensor/CMakeLists.txt b/unsupported/Eigen/CXX11/src/Tensor/CMakeLists.txt new file mode 100644 index 000000000..6d4b3ea0d --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_CXX11_Tensor_SRCS "*.h") + +INSTALL(FILES + ${Eigen_CXX11_Tensor_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/Tensor COMPONENT Devel + ) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index b2b28826a..ca6681a1f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -526,48 +526,101 @@ class TensorBase : public TensorBase + const TensorLayoutSwapOp swap_layout() const { + return TensorLayoutSwapOp(derived()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorLayoutSwapOp + swap_layout() { return TensorLayoutSwapOp(derived()); } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorConcatenationOp + concatenate(const OtherDerived& other, const Axis& axis) const { + return TensorConcatenationOp(derived(), other, axis); + } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConcatenationOp - concatenate(const OtherDerived& other, const Axis& axis) const { - return TensorConcatenationOp(derived(), other.derived(), axis); + concatenate(const OtherDerived& other, const Axis& axis) { + return TensorConcatenationOp(derived(), other, axis); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReshapingOp + reshape(const NewDimensions& newDimensions) const { + return TensorReshapingOp(derived(), newDimensions); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp - reshape(const NewDimensions& newDimensions) const { + reshape(const NewDimensions& newDimensions) { return TensorReshapingOp(derived(), newDimensions); } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorSlicingOp + slice(const StartIndices& startIndices, const Sizes& sizes) const { + return TensorSlicingOp(derived(), startIndices, sizes); + } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp - slice(const StartIndices& startIndices, const Sizes& sizes) const { + slice(const StartIndices& startIndices, const Sizes& sizes) { return TensorSlicingOp(derived(), startIndices, sizes); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorChippingOp + const TensorChippingOp chip(const Index offset) const { + return TensorChippingOp(derived(), offset, DimId); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorChippingOp + chip(const Index offset) { return TensorChippingOp(derived(), offset, DimId); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorChippingOp + chip(const Index offset, const Index dim) const { + return TensorChippingOp(derived(), offset, dim); + } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp - chip(const Index offset, const Index dim) const { + chip(const Index offset, const Index dim) { return TensorChippingOp(derived(), offset, dim); } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReverseOp + reverse(const ReverseDimensions& rev) const { + return TensorReverseOp(derived(), rev); + } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp - reverse(const ReverseDimensions& rev) const { + reverse(const ReverseDimensions& rev) { return TensorReverseOp(derived(), rev); } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorShufflingOp + shuffle(const Shuffle& shuffle) const { + return TensorShufflingOp(derived(), shuffle); + } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp - shuffle(const Shuffle& shuffle) const { + shuffle(const Shuffle& shuffle) { return TensorShufflingOp(derived(), shuffle); } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorStridingOp + stride(const Strides& strides) const { + return TensorStridingOp(derived(), strides); + } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp - stride(const Strides& strides) const { + stride(const Strides& strides) { return TensorStridingOp(derived(), strides); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h index 649bdb308..7a67c56b3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -21,8 +21,7 @@ namespace Eigen { * Example: * C.device(EIGEN_GPU) = A + B; * - * Todo: thread pools. - * Todo: operator +=, -=, *= and so on. + * Todo: operator *= and /=. */ template class TensorDevice { @@ -50,6 +49,18 @@ template class TensorDevice { return *this; } + template + EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { + typedef typename OtherDerived::Scalar Scalar; + typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Difference; + Difference difference(m_expression, other); + typedef TensorAssignOp Assign; + Assign assign(m_expression, difference); + static const bool Vectorize = TensorEvaluator::PacketAccess; + internal::TensorExecutor::run(assign, m_device); + return *this; + } + protected: const DeviceType& m_device; ExpressionType& m_expression; @@ -82,6 +93,18 @@ template class TensorDevice + EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { + typedef typename OtherDerived::Scalar Scalar; + typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Difference; + Difference difference(m_expression, other); + typedef TensorAssignOp Assign; + Assign assign(m_expression, difference); + static const bool Vectorize = TensorEvaluator::PacketAccess; + internal::TensorExecutor::run(assign, m_device); + return *this; + } + protected: const ThreadPoolDevice& m_device; ExpressionType& m_expression; @@ -114,6 +137,18 @@ template class TensorDevice return *this; } + template + EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { + typedef typename OtherDerived::Scalar Scalar; + typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Difference; + Difference difference(m_expression, other); + typedef TensorAssignOp Assign; + Assign assign(m_expression, difference); + static const bool Vectorize = TensorEvaluator::PacketAccess; + internal::TensorExecutor::run(assign, m_device); + return *this; + } + protected: const GpuDevice& m_device; ExpressionType m_expression; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 38586d067..25f085a59 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -77,7 +77,7 @@ template struct MeanReducer } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { - return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * packet_traits::size); + return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits::size); } protected: diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h index a9d0f6c39..bdc6ddb87 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h @@ -30,14 +30,14 @@ std::ostream& operator << (std::ostream& os, const TensorBase::type Scalar; typedef typename T::Index Index; typedef typename TensorEvaluator, DefaultDevice>::Dimensions Dimensions; - const Index total_size = internal::array_prod(tensor.dimensions()); + const Index total_size = tensor.dimensions().TotalSize(); // Print the tensor as a 1d vector or a 2d matrix. if (internal::array_size::value == 1) { Map > array(const_cast(tensor.data()), total_size); os << array; } else { - const Index first_dim = tensor.dimensions()[0]; + const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); static const int layout = TensorEvaluator, DefaultDevice>::Layout; Map > matrix(const_cast(tensor.data()), first_dim, total_size/first_dim); os << matrix; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h index 9a35b044d..0745b1742 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h @@ -65,7 +65,7 @@ struct traits > static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; enum { Options = Options_, - Flags = compute_tensor_flags::ret | LvalueBit, + Flags = compute_tensor_flags::ret | (is_const::value ? 0 : LvalueBit), }; }; @@ -80,7 +80,7 @@ struct traits > static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; enum { Options = Options_, - Flags = compute_tensor_flags::ret | LvalueBit, + Flags = compute_tensor_flags::ret | (is_const::value ? 0: LvalueBit), }; }; @@ -97,7 +97,7 @@ struct traits > static const int Layout = BaseTraits::Layout; enum { Options = Options_, - Flags = ((BaseTraits::Flags | LvalueBit) & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0), + Flags = (BaseTraits::Flags & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0), }; }; @@ -113,7 +113,7 @@ struct traits > static const int Layout = BaseTraits::Layout; enum { Options = BaseTraits::Options, - Flags = ((BaseTraits::Flags | LvalueBit) & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0), + Flags = (BaseTraits::Flags & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0), }; }; diff --git a/unsupported/Eigen/CXX11/src/TensorSymmetry/CMakeLists.txt b/unsupported/Eigen/CXX11/src/TensorSymmetry/CMakeLists.txt new file mode 100644 index 000000000..6e871a8da --- /dev/null +++ b/unsupported/Eigen/CXX11/src/TensorSymmetry/CMakeLists.txt @@ -0,0 +1,8 @@ +FILE(GLOB Eigen_CXX11_TensorSymmetry_SRCS "*.h") + +INSTALL(FILES + ${Eigen_CXX11_TensorSymmetry_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/TensorSymmetry COMPONENT Devel + ) + +add_subdirectory(util) diff --git a/unsupported/Eigen/CXX11/src/TensorSymmetry/util/CMakeLists.txt b/unsupported/Eigen/CXX11/src/TensorSymmetry/util/CMakeLists.txt new file mode 100644 index 000000000..dc9fc78ec --- /dev/null +++ b/unsupported/Eigen/CXX11/src/TensorSymmetry/util/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_CXX11_TensorSymmetry_util_SRCS "*.h") + +INSTALL(FILES + ${Eigen_CXX11_TensorSymmetry_util_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/TensorSymmetry/util COMPONENT Devel + ) diff --git a/unsupported/Eigen/MPRealSupport b/unsupported/Eigen/MPRealSupport index 8e42965a3..89036886b 100644 --- a/unsupported/Eigen/MPRealSupport +++ b/unsupported/Eigen/MPRealSupport @@ -141,20 +141,32 @@ int main() public: typedef mpfr::mpreal ResScalar; enum { + Vectorizable = false, + LhsPacketSize = 1, + RhsPacketSize = 1, + ResPacketSize = 1, + NumberOfRegisters = 1, nr = 1, mr = 1, LhsProgress = 1, RhsProgress = 1 }; + typedef ResScalar LhsPacket; + typedef ResScalar RhsPacket; + typedef ResScalar ResPacket; + }; - template - struct gebp_kernel + + + template + struct gebp_kernel { typedef mpfr::mpreal mpreal; EIGEN_DONT_INLINE - void operator()(mpreal* res, Index resStride, const mpreal* blockA, const mpreal* blockB, Index rows, Index depth, Index cols, mpreal alpha, + void operator()(const DataMapper& res, const mpreal* blockA, const mpreal* blockB, + Index rows, Index depth, Index cols, const mpreal& alpha, Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0) { if(rows==0 || cols==0 || depth==0) @@ -170,8 +182,6 @@ int main() { for(Index j=0; j - inline bool GetMarketLine (std::stringstream& line, int& M, int& N, int& i, int& j, Scalar& value) + inline bool GetMarketLine (std::stringstream& line, Index& M, Index& N, Index& i, Index& j, Scalar& value) { line >> i >> j >> value; i--; @@ -31,7 +31,7 @@ namespace internal return false; } template - inline bool GetMarketLine (std::stringstream& line, int& M, int& N, int& i, int& j, std::complex& value) + inline bool GetMarketLine (std::stringstream& line, Index& M, Index& N, Index& i, Index& j, std::complex& value) { Scalar valR, valI; line >> i >> j >> valR >> valI; diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 806ea77b5..f952af752 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -50,7 +50,7 @@ if(MPFR_FOUND) include_directories(${MPFR_INCLUDES} ./mpreal) ei_add_property(EIGEN_TESTED_BACKENDS "MPFR C++, ") set(EIGEN_MPFR_TEST_LIBRARIES ${MPFR_LIBRARIES} ${GMP_LIBRARIES}) -# ei_add_test(mpreal_support "" "${EIGEN_MPFR_TEST_LIBRARIES}" ) + ei_add_test(mpreal_support "" "${EIGEN_MPFR_TEST_LIBRARIES}" ) else() ei_add_property(EIGEN_MISSING_BACKENDS "MPFR C++, ") endif() diff --git a/unsupported/test/cxx11_tensor_comparisons.cpp b/unsupported/test/cxx11_tensor_comparisons.cpp index 186f56ac3..b1ff8aecb 100644 --- a/unsupported/test/cxx11_tensor_comparisons.cpp +++ b/unsupported/test/cxx11_tensor_comparisons.cpp @@ -54,7 +54,7 @@ static void test_equality() for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { - if (random() < 0.5) { + if (internal::random()) { mat2(i,j,k) = mat1(i,j,k); } } diff --git a/unsupported/test/cxx11_tensor_const.cpp b/unsupported/test/cxx11_tensor_const.cpp index 0ffb02afd..ad9c9da39 100644 --- a/unsupported/test/cxx11_tensor_const.cpp +++ b/unsupported/test/cxx11_tensor_const.cpp @@ -13,8 +13,6 @@ using Eigen::Tensor; - - static void test_simple_assign() { Tensor random(2,3,7); @@ -33,7 +31,32 @@ static void test_simple_assign() } } + +static void test_assign_of_const_tensor() +{ + Tensor random(2,3,7); + random.setRandom(); + + TensorMap > constant1(random.data(), 2, 3, 7); + TensorMap > constant2(random.data(), 2, 3, 7); + const TensorMap > constant3(random.data(), 2, 3, 7); + + Tensor result1 = constant1.chip(0, 2); + Tensor result2 = constant2.chip(0, 2); + Tensor result3 = constant3.chip(0, 2); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_EQUAL((result1(i,j)), random(i,j,0)); + VERIFY_IS_EQUAL((result2(i,j)), random(i,j,0)); + VERIFY_IS_EQUAL((result3(i,j)), random(i,j,0)); + } + } +} + + void test_cxx11_tensor_const() { CALL_SUBTEST(test_simple_assign()); + CALL_SUBTEST(test_assign_of_const_tensor()); } diff --git a/unsupported/test/cxx11_tensor_expr.cpp b/unsupported/test/cxx11_tensor_expr.cpp index 695565e9b..8389e9840 100644 --- a/unsupported/test/cxx11_tensor_expr.cpp +++ b/unsupported/test/cxx11_tensor_expr.cpp @@ -260,7 +260,7 @@ static void test_type_casting() mat1.setRandom(); mat2.setRandom(); - mat3 = mat1.template cast(); + mat3 = mat1.cast(); for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { @@ -269,7 +269,7 @@ static void test_type_casting() } } - mat3 = mat2.template cast(); + mat3 = mat2.cast(); for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { diff --git a/unsupported/test/cxx11_tensor_ref.cpp b/unsupported/test/cxx11_tensor_ref.cpp index aa369f278..c8f105e3d 100644 --- a/unsupported/test/cxx11_tensor_ref.cpp +++ b/unsupported/test/cxx11_tensor_ref.cpp @@ -196,6 +196,45 @@ static void test_coeff_ref() } +static void test_nested_ops_with_ref() +{ + Tensor t(2, 3, 5, 7); + t.setRandom(); + TensorMap > m(t.data(), 2, 3, 5, 7); + array, 4> paddings; + paddings[0] = std::make_pair(0, 0); + paddings[1] = std::make_pair(2, 1); + paddings[2] = std::make_pair(3, 4); + paddings[3] = std::make_pair(0, 0); + DSizes shuffle_dims(0, 1, 2, 3); + TensorRef > ref(m.pad(paddings)); + array, 4> trivial; + trivial[0] = std::make_pair(0, 0); + trivial[1] = std::make_pair(0, 0); + trivial[2] = std::make_pair(0, 0); + trivial[3] = std::make_pair(0, 0); + Tensor padded = ref.shuffle(shuffle_dims).pad(trivial); + VERIFY_IS_EQUAL(padded.dimension(0), 2+0); + VERIFY_IS_EQUAL(padded.dimension(1), 3+3); + VERIFY_IS_EQUAL(padded.dimension(2), 5+7); + VERIFY_IS_EQUAL(padded.dimension(3), 7+0); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 6; ++j) { + for (int k = 0; k < 12; ++k) { + for (int l = 0; l < 7; ++l) { + if (j >= 2 && j < 5 && k >= 3 && k < 8) { + VERIFY_IS_EQUAL(padded(i,j,k,l), t(i,j-2,k-3,l)); + } else { + VERIFY_IS_EQUAL(padded(i,j,k,l), 0.0f); + } + } + } + } + } +} + + void test_cxx11_tensor_ref() { CALL_SUBTEST(test_simple_lvalue_ref()); @@ -205,4 +244,5 @@ void test_cxx11_tensor_ref() CALL_SUBTEST(test_ref_of_ref()); CALL_SUBTEST(test_ref_in_expr()); CALL_SUBTEST(test_coeff_ref()); + CALL_SUBTEST(test_nested_ops_with_ref()); } diff --git a/unsupported/test/mpreal/mpreal.h b/unsupported/test/mpreal/mpreal.h index dddda7dd9..7d6f4e79f 100644 --- a/unsupported/test/mpreal/mpreal.h +++ b/unsupported/test/mpreal/mpreal.h @@ -57,7 +57,8 @@ #include // Options -#define MPREAL_HAVE_INT64_SUPPORT // Enable int64_t support if possible. Available only for MSVC 2010 & GCC. +// FIXME HAVE_INT64_SUPPORT leads to clashes with long int and int64_t on some systems. +//#define MPREAL_HAVE_INT64_SUPPORT // Enable int64_t support if possible. Available only for MSVC 2010 & GCC. #define MPREAL_HAVE_MSVC_DEBUGVIEW // Enable Debugger Visualizer for "Debug" builds in MSVC. #define MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS // Enable extended std::numeric_limits specialization. // Meaning that "digits", "round_style" and similar members are defined as functions, not constants.