Pulled latest update from the eigen main codebase

This commit is contained in:
Benoit Steiner 2015-03-24 13:12:14 -07:00
commit d3f7915aeb
104 changed files with 3294 additions and 694 deletions

View File

@ -310,6 +310,7 @@ using std::ptrdiff_t;
#include "src/Core/arch/NEON/PacketMath.h"
#include "src/Core/arch/NEON/MathFunctions.h"
#include "src/Core/arch/NEON/Complex.h"
#include "src/Core/arch/NEON/BlockingSizesLookupTables.h"
#endif
#if defined EIGEN_VECTORIZE_CUDA
@ -383,6 +384,7 @@ using std::ptrdiff_t;
#include "src/Core/Inverse.h"
#include "src/Core/TriangularMatrix.h"
#include "src/Core/SelfAdjointView.h"
#include "src/Core/products/LookupBlockingSizesTable.h"
#include "src/Core/products/GeneralBlockPanelKernel.h"
#include "src/Core/products/Parallelizer.h"
#include "src/Core/ProductEvaluators.h"

View File

@ -12,24 +12,26 @@
* This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse.
* Those solvers are accessible via the following classes:
* - ConjugateGradient for selfadjoint (hermitian) matrices,
* - LeastSquaresConjugateGradient for rectangular least-square problems,
* - BiCGSTAB for general square matrices.
*
* These iterative solvers are associated with some preconditioners:
* - IdentityPreconditioner - not really useful
* - DiagonalPreconditioner - also called JAcobi preconditioner, work very well on diagonal dominant matrices.
* - IncompleteILUT - incomplete LU factorization with dual thresholding
* - IncompleteLUT - incomplete LU factorization with dual thresholding
*
* Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport.
*
* \code
* #include <Eigen/IterativeLinearSolvers>
* \endcode
\code
#include <Eigen/IterativeLinearSolvers>
\endcode
*/
#include "src/IterativeLinearSolvers/SolveWithGuess.h"
#include "src/IterativeLinearSolvers/IterativeSolverBase.h"
#include "src/IterativeLinearSolvers/BasicPreconditioners.h"
#include "src/IterativeLinearSolvers/ConjugateGradient.h"
#include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h"
#include "src/IterativeLinearSolvers/BiCGSTAB.h"
#include "src/IterativeLinearSolvers/IncompleteLUT.h"

View File

@ -11,9 +11,9 @@
* - \ref SparseQR_Module
* - \ref IterativeLinearSolvers_Module
*
* \code
* #include <Eigen/Sparse>
* \endcode
\code
#include <Eigen/Sparse>
\endcode
*/
#include "SparseCore"

View File

@ -226,6 +226,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
/** \internal
* Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.
@ -424,6 +429,8 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
template<typename MatrixType, int _UpLo>
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
{
check_template_parameters();
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();

View File

@ -170,6 +170,12 @@ template<typename _MatrixType, int _UpLo> class LLT
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
/** \internal
* Used to compute and store L
* The strict upper part is not used and even not initialized.
@ -377,6 +383,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
template<typename MatrixType, int _UpLo>
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
{
check_template_parameters();
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();
m_matrix.resize(size, size);

View File

@ -647,11 +647,15 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
HasNoStride = HasNoInnerStride && HasNoOuterStride,
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
// TODO: should check for smaller packet types once we can handle multi-sized packet types
AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
KeepsPacketAccess = bool(HasNoInnerStride)
&& ( bool(IsDynamicSize)
|| HasNoOuterStride
|| ( OuterStrideAtCompileTime!=Dynamic
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ),
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime) % AlignBytes)==0 ) ),
Flags0 = evaluator<PlainObjectType>::Flags,
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
@ -717,7 +721,10 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0,
// TODO: should check for smaller packet types once we can handle multi-sized packet types
AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
@ -825,12 +832,16 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc
typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
{
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
typedef typename XprType::Scalar Scalar;
EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
: mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
{
// TODO: should check for smaller packet types once we can handle multi-sized packet types
const int AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar);
EIGEN_ONLY_USED_FOR_DEBUG(AlignBytes)
// FIXME this should be an internal assertion
eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned");
eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % AlignBytes) == 0) && "data is not aligned");
}
};

View File

@ -34,14 +34,35 @@ void check_static_allocation_size()
#endif
}
template<typename T, int Size, typename Packet = typename packet_traits<T>::type,
bool Match = bool((Size%unpacket_traits<Packet>::size)==0),
bool TryHalf = bool(int(unpacket_traits<Packet>::size) > Size)
&& bool(int(unpacket_traits<Packet>::size) > int(unpacket_traits<typename unpacket_traits<Packet>::half>::size)) >
struct compute_default_alignment
{
enum { value = 0 };
};
template<typename T, int Size, typename Packet>
struct compute_default_alignment<T, Size, Packet, true, false> // Match
{
enum { value = sizeof(T) * unpacket_traits<Packet>::size };
};
template<typename T, int Size, typename Packet>
struct compute_default_alignment<T, Size, Packet, false, true>
{
// current packet too large, try with an half-packet
enum { value = compute_default_alignment<T, Size, typename unpacket_traits<Packet>::half>::value };
};
/** \internal
* Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:
* to 16 bytes boundary if the total size is a multiple of 16 bytes.
*/
template <typename T, int Size, int MatrixOrArrayOptions,
int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
: (((Size*sizeof(T))%EIGEN_ALIGN_BYTES)==0) ? EIGEN_ALIGN_BYTES
: 0 >
: compute_default_alignment<T,Size>::value >
struct plain_array
{
T array[Size];
@ -81,14 +102,71 @@ struct plain_array
#endif
template <typename T, int Size, int MatrixOrArrayOptions>
struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES>
struct plain_array<T, Size, MatrixOrArrayOptions, 8>
{
EIGEN_USER_ALIGN_DEFAULT T array[Size];
EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
EIGEN_DEVICE_FUNC
plain_array()
{
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(EIGEN_ALIGN_BYTES-1);
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
check_static_allocation_size<T,Size>();
}
EIGEN_DEVICE_FUNC
plain_array(constructor_without_unaligned_array_assert)
{
check_static_allocation_size<T,Size>();
}
};
template <typename T, int Size, int MatrixOrArrayOptions>
struct plain_array<T, Size, MatrixOrArrayOptions, 16>
{
EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
EIGEN_DEVICE_FUNC
plain_array()
{
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
check_static_allocation_size<T,Size>();
}
EIGEN_DEVICE_FUNC
plain_array(constructor_without_unaligned_array_assert)
{
check_static_allocation_size<T,Size>();
}
};
template <typename T, int Size, int MatrixOrArrayOptions>
struct plain_array<T, Size, MatrixOrArrayOptions, 32>
{
EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
EIGEN_DEVICE_FUNC
plain_array()
{
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
check_static_allocation_size<T,Size>();
}
EIGEN_DEVICE_FUNC
plain_array(constructor_without_unaligned_array_assert)
{
check_static_allocation_size<T,Size>();
}
};
template <typename T, int Size, int MatrixOrArrayOptions>
struct plain_array<T, Size, MatrixOrArrayOptions, 64>
{
EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
EIGEN_DEVICE_FUNC
plain_array()
{
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
check_static_allocation_size<T,Size>();
}

View File

@ -473,48 +473,48 @@ struct random_default_impl<Scalar, false, false>
};
enum {
floor_log2_terminate,
floor_log2_move_up,
floor_log2_move_down,
floor_log2_bogus
meta_floor_log2_terminate,
meta_floor_log2_move_up,
meta_floor_log2_move_down,
meta_floor_log2_bogus
};
template<unsigned int n, int lower, int upper> struct floor_log2_selector
template<unsigned int n, int lower, int upper> struct meta_floor_log2_selector
{
enum { middle = (lower + upper) / 2,
value = (upper <= lower + 1) ? int(floor_log2_terminate)
: (n < (1 << middle)) ? int(floor_log2_move_down)
: (n==0) ? int(floor_log2_bogus)
: int(floor_log2_move_up)
value = (upper <= lower + 1) ? int(meta_floor_log2_terminate)
: (n < (1 << middle)) ? int(meta_floor_log2_move_down)
: (n==0) ? int(meta_floor_log2_bogus)
: int(meta_floor_log2_move_up)
};
};
template<unsigned int n,
int lower = 0,
int upper = sizeof(unsigned int) * CHAR_BIT - 1,
int selector = floor_log2_selector<n, lower, upper>::value>
struct floor_log2 {};
int selector = meta_floor_log2_selector<n, lower, upper>::value>
struct meta_floor_log2 {};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_move_down>
struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down>
{
enum { value = floor_log2<n, lower, floor_log2_selector<n, lower, upper>::middle>::value };
enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value };
};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_move_up>
struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up>
{
enum { value = floor_log2<n, floor_log2_selector<n, lower, upper>::middle, upper>::value };
enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value };
};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_terminate>
struct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate>
{
enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower };
};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_bogus>
struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>
{
// no value, error at compile time
};
@ -522,11 +522,24 @@ struct floor_log2<n, lower, upper, floor_log2_bogus>
template<typename Scalar>
struct random_default_impl<Scalar, false, true>
{
typedef typename NumTraits<Scalar>::NonInteger NonInteger;
static inline Scalar run(const Scalar& x, const Scalar& y)
{
return x + Scalar((NonInteger(y)-x+1) * std::rand() / (RAND_MAX + NonInteger(1)));
{
using std::max;
using std::min;
typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
if(y<x)
return x;
std::size_t range = ScalarX(y)-ScalarX(x);
std::size_t offset = 0;
// rejection sampling
std::size_t divisor = (range+RAND_MAX-1)/(range+1);
std::size_t multiplier = (range+RAND_MAX-1)/std::size_t(RAND_MAX);
do {
offset = ( (std::size_t(std::rand()) * multiplier) / divisor );
} while (offset > range);
return Scalar(ScalarX(x) + offset);
}
static inline Scalar run()
@ -534,7 +547,7 @@ struct random_default_impl<Scalar, false, true>
#ifdef EIGEN_MAKING_DOCS
return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
#else
enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value,
enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
scalar_bits = sizeof(Scalar) * CHAR_BIT,
shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0

View File

@ -105,7 +105,8 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
OuterStrideMatch = Derived::IsVectorAtCompileTime
|| int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits<Derived>::Flags&AlignedBit)==AlignedBit),
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch
ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
};
typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
};
@ -184,9 +185,11 @@ protected:
template<typename PlainObjectType, int Options, typename StrideType> class Ref
: public RefBase<Ref<PlainObjectType, Options, StrideType> >
{
private:
typedef internal::traits<Ref> Traits;
template<typename Derived>
EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr);
EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0);
public:
typedef RefBase<Ref> Base;
@ -195,13 +198,15 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename Derived>
EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr)
EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
{
EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
Base::construct(expr.derived());
}
template<typename Derived>
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr)
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
#else
template<typename Derived>
inline Ref(DenseBase<Derived>& expr)
@ -228,7 +233,8 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
template<typename Derived>
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr)
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
typename internal::enable_if<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>::type* = 0)
{
// std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
// std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n";

View File

@ -197,21 +197,21 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(cons
}
#endif
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, int stride) {
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
}
template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, int stride) {
template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
return make_double2(from[0*stride], from[1*stride]);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, int stride) {
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
to[stride*0] = from.x;
to[stride*1] = from.y;
to[stride*2] = from.z;
to[stride*3] = from.w;
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, int stride) {
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
to[stride*0] = from.x;
to[stride*1] = from.y;
}
@ -245,14 +245,14 @@ template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a)
}
template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
return make_float4(fabs(a.x), fabs(a.y), fabs(a.z), fabs(a.w));
return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
}
template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
return make_double2(abs(a.x), abs(a.y));
return make_double2(fabs(a.x), fabs(a.y));
}
template<> EIGEN_DEVICE_FUNC inline void
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<float4,4>& kernel) {
double tmp = kernel.packet[0].y;
kernel.packet[0].y = kernel.packet[1].x;
@ -279,7 +279,7 @@ ptranspose(PacketBlock<float4,4>& kernel) {
kernel.packet[3].z = tmp;
}
template<> EIGEN_DEVICE_FUNC inline void
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<double2,2>& kernel) {
double tmp = kernel.packet[0].y;
kernel.packet[0].y = kernel.packet[1].x;

View File

@ -0,0 +1,110 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H
#define EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H
namespace Eigen {
namespace internal {
/* The following lookup table was generated from measurements on a Nexus 5,
* which has a Qualcomm Krait 400 CPU. This is very representative of current
* 32bit (ARMv7) Android devices. On the other hand, I don't know how
* representative that is outside of these conditions. Accordingly,
* let's only use this lookup table on ARM 32bit on Android for now.
*
* Measurements were single-threaded, with Scalar=float, compiled with
* -mfpu=neon-vfpv4, so the pmadd instruction used was VFMA.F32.
*
* The device was cooled, allowing it to run a the max clock speed throughout.
* This may not be representative of real-world thermal conditions.
*
* The benchmark attempted to flush caches to test cold-cache performance.
*/
#if EIGEN_ARCH_ARM && EIGEN_OS_ANDROID
template<>
struct BlockingSizesLookupTable<float, float> {
static const size_t BaseSize = 16;
static const size_t NumSizes = 8;
static const unsigned short* Data() {
static const unsigned short data[512] = {
0x444, 0x445, 0x446, 0x447, 0x448, 0x449, 0x447, 0x447,
0x454, 0x455, 0x456, 0x457, 0x458, 0x459, 0x45a, 0x456,
0x464, 0x465, 0x466, 0x467, 0x468, 0x469, 0x46a, 0x467,
0x474, 0x475, 0x476, 0x467, 0x478, 0x479, 0x476, 0x478,
0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x476, 0x476,
0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x496, 0x488,
0x474, 0x475, 0x476, 0x4a6, 0x496, 0x496, 0x495, 0x4a6,
0x474, 0x475, 0x466, 0x4a6, 0x497, 0x4a5, 0x496, 0x4a5,
0x544, 0x545, 0x546, 0x547, 0x548, 0x549, 0x54a, 0x54b,
0x554, 0x555, 0x556, 0x557, 0x558, 0x559, 0x55a, 0x55b,
0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x56b,
0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x576,
0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x587,
0x564, 0x565, 0x566, 0x567, 0x596, 0x596, 0x596, 0x597,
0x574, 0x565, 0x566, 0x596, 0x596, 0x5a6, 0x5a6, 0x5a6,
0x564, 0x565, 0x5a6, 0x596, 0x5a6, 0x5a6, 0x5a6, 0x5a6,
0x644, 0x645, 0x646, 0x647, 0x648, 0x649, 0x64a, 0x64b,
0x644, 0x655, 0x656, 0x657, 0x658, 0x659, 0x65a, 0x65b,
0x664, 0x665, 0x666, 0x667, 0x668, 0x669, 0x65a, 0x667,
0x654, 0x665, 0x676, 0x677, 0x678, 0x679, 0x67a, 0x675,
0x684, 0x675, 0x686, 0x687, 0x688, 0x688, 0x687, 0x686,
0x664, 0x685, 0x666, 0x677, 0x697, 0x696, 0x697, 0x697,
0x664, 0x665, 0x696, 0x696, 0x685, 0x6a6, 0x696, 0x696,
0x664, 0x675, 0x686, 0x696, 0x6a6, 0x696, 0x696, 0x696,
0x744, 0x745, 0x746, 0x747, 0x748, 0x749, 0x74a, 0x747,
0x754, 0x755, 0x756, 0x757, 0x758, 0x759, 0x75a, 0x757,
0x764, 0x765, 0x756, 0x767, 0x768, 0x759, 0x75a, 0x766,
0x744, 0x755, 0x766, 0x777, 0x768, 0x759, 0x778, 0x777,
0x744, 0x745, 0x766, 0x777, 0x788, 0x786, 0x786, 0x788,
0x754, 0x755, 0x766, 0x787, 0x796, 0x796, 0x787, 0x796,
0x684, 0x695, 0x696, 0x6a6, 0x795, 0x786, 0x795, 0x796,
0x684, 0x695, 0x696, 0x795, 0x786, 0x796, 0x795, 0x796,
0x844, 0x845, 0x846, 0x847, 0x848, 0x849, 0x848, 0x848,
0x844, 0x855, 0x846, 0x847, 0x848, 0x849, 0x855, 0x857,
0x844, 0x845, 0x846, 0x857, 0x848, 0x859, 0x866, 0x865,
0x844, 0x855, 0x846, 0x847, 0x878, 0x859, 0x877, 0x877,
0x844, 0x855, 0x846, 0x867, 0x886, 0x887, 0x885, 0x886,
0x784, 0x785, 0x786, 0x877, 0x897, 0x885, 0x896, 0x896,
0x684, 0x695, 0x686, 0x886, 0x885, 0x885, 0x886, 0x896,
0x694, 0x6a5, 0x6a6, 0x885, 0x885, 0x886, 0x896, 0x896,
0x944, 0x945, 0x946, 0x947, 0x948, 0x847, 0x847, 0x848,
0x954, 0x855, 0x856, 0x947, 0x858, 0x857, 0x858, 0x858,
0x944, 0x945, 0x946, 0x867, 0x948, 0x866, 0x867, 0x867,
0x944, 0x975, 0x976, 0x877, 0x877, 0x877, 0x877, 0x877,
0x784, 0x785, 0x886, 0x887, 0x886, 0x887, 0x887, 0x887,
0x784, 0x785, 0x786, 0x796, 0x887, 0x897, 0x896, 0x896,
0x684, 0x695, 0x6a6, 0x886, 0x886, 0x896, 0x896, 0x896,
0x6a4, 0x6a5, 0x696, 0x896, 0x886, 0x896, 0x896, 0x896,
0xa44, 0xa45, 0xa46, 0xa47, 0x847, 0x848, 0x847, 0x848,
0xa44, 0xa45, 0x856, 0x857, 0x857, 0x857, 0x857, 0x857,
0xa44, 0xa65, 0x866, 0x867, 0x867, 0x867, 0x867, 0x867,
0x774, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877,
0x784, 0x785, 0x886, 0x887, 0x887, 0x887, 0x887, 0x887,
0x784, 0x785, 0x786, 0x787, 0x887, 0x896, 0x897, 0x897,
0x684, 0x6a5, 0x696, 0x886, 0x886, 0x896, 0x896, 0x896,
0x684, 0x6a5, 0x6a5, 0x886, 0x886, 0x896, 0x896, 0x896,
0xb44, 0x845, 0x846, 0x847, 0x847, 0x945, 0x846, 0x946,
0xb54, 0x855, 0x856, 0x857, 0x857, 0x856, 0x857, 0x856,
0x864, 0x865, 0x866, 0x867, 0x867, 0x866, 0x866, 0x867,
0x864, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877,
0x784, 0x885, 0x886, 0x787, 0x887, 0x887, 0x887, 0x887,
0x784, 0x785, 0x786, 0x796, 0x886, 0x897, 0x897, 0x897,
0x684, 0x695, 0x696, 0x886, 0x896, 0x896, 0x896, 0x896,
0x684, 0x685, 0x696, 0xb57, 0x896, 0x896, 0x896, 0x896
};
return data;
}
};
#endif
}
}
#endif // EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H

View File

@ -272,7 +272,7 @@ ptranspose(PacketBlock<Packet2cf,2>& kernel) {
}
//---------- double ----------
#if EIGEN_ARCH_ARM64
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
static uint64x2_t p2ul_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x0, 0x8000000000000000);

View File

@ -518,7 +518,19 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) {
}
//---------- double ----------
#if EIGEN_ARCH_ARM64
// Clang 3.5 in the iOS toolchain has an ICE triggered by NEON intrisics for double.
// Confirmed at least with __apple_build_version__ = 6000054.
#ifdef __apple_build_version__
// Let's hope that by the time __apple_build_version__ hits the 601* range, the bug will be fixed.
// https://gist.github.com/yamaya/2924292 suggests that the 3 first digits are only updated with
// major toolchain updates.
#define EIGEN_APPLE_DOUBLE_NEON_BUG (__apple_build_version__ < 6010000)
#else
#define EIGEN_APPLE_DOUBLE_NEON_BUG 0
#endif
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
#if (EIGEN_COMP_GNUC_STRICT && defined(__ANDROID__)) || defined(__apple_build_version__)
// Bug 907: workaround missing declarations of the following two functions in the ADK
@ -541,12 +553,12 @@ typedef float64x1_t Packet1d;
template<> struct packet_traits<double> : default_packet_traits
{
typedef Packet2d type;
typedef Packet1d half;
typedef Packet2d half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 2,
HasHalfPacket=1,
HasHalfPacket=0,
HasDiv = 1,
// FIXME check the Has*

View File

@ -55,6 +55,34 @@ struct functor_traits<scalar_abs_op<Scalar> >
};
};
/** \internal
* \brief Template functor to compute the score of a scalar, to chose a pivot
*
* \sa class CwiseUnaryOp
*/
template<typename Scalar> struct scalar_score_coeff_op : scalar_abs_op<Scalar>
{
typedef void Score_is_abs;
};
template<typename Scalar>
struct functor_traits<scalar_score_coeff_op<Scalar> > : functor_traits<scalar_abs_op<Scalar> > {};
/* Avoid recomputing abs when we know the score and they are the same. Not a true Eigen functor. */
template<typename Scalar, typename=void> struct abs_knowing_score
{
EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
typedef typename NumTraits<Scalar>::Real result_type;
template<typename Score>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a, const Score&) const { using std::abs; return abs(a); }
};
template<typename Scalar> struct abs_knowing_score<Scalar, typename scalar_score_coeff_op<Scalar>::Score_is_abs>
{
EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
typedef typename NumTraits<Scalar>::Real result_type;
template<typename Scal>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scal&, const result_type& a) const { return a; }
};
/** \internal
* \brief Template functor to compute the squared absolute value of a scalar
*

View File

@ -25,21 +25,31 @@ inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff
return a<=0 ? b : a;
}
#if EIGEN_ARCH_i386_OR_x86_64
const std::ptrdiff_t defaultL1CacheSize = 32*1024;
const std::ptrdiff_t defaultL2CacheSize = 256*1024;
const std::ptrdiff_t defaultL3CacheSize = 2*1024*1024;
#else
const std::ptrdiff_t defaultL1CacheSize = 16*1024;
const std::ptrdiff_t defaultL2CacheSize = 512*1024;
const std::ptrdiff_t defaultL3CacheSize = 512*1024;
#endif
/** \internal */
inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3)
{
static bool m_cache_sizes_initialized = false;
static std::ptrdiff_t m_l1CacheSize = 32*1024;
static std::ptrdiff_t m_l2CacheSize = 256*1024;
static std::ptrdiff_t m_l3CacheSize = 2*1024*1024;
static std::ptrdiff_t m_l1CacheSize = 0;
static std::ptrdiff_t m_l2CacheSize = 0;
static std::ptrdiff_t m_l3CacheSize = 0;
if(!m_cache_sizes_initialized)
{
int l1CacheSize, l2CacheSize, l3CacheSize;
queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize);
m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, 8*1024);
m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, 256*1024);
m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, 8*1024*1024);
m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize);
m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize);
m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize);
m_cache_sizes_initialized = true;
}
@ -64,43 +74,23 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
}
}
/** \brief Computes the blocking parameters for a m x k times k x n matrix product
*
* \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
* \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
* \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
*
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
* this function computes the blocking size parameters along the respective dimensions
* for matrix products and related algorithms. The blocking sizes depends on various
* parameters:
* - the L1 and L2 cache sizes,
* - the register level blocking sizes defined by gebp_traits,
* - the number of scalars that fit into a packet (when vectorization is enabled).
*
* \sa setCpuCacheSizes */
/* Helper for computeProductBlockingSizes.
*
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
* this function computes the blocking size parameters along the respective dimensions
* for matrix products and related algorithms. The blocking sizes depends on various
* parameters:
* - the L1 and L2 cache sizes,
* - the register level blocking sizes defined by gebp_traits,
* - the number of scalars that fit into a packet (when vectorization is enabled).
*
* \sa setCpuCacheSizes */
template<typename LhsScalar, typename RhsScalar, int KcFactor>
void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1)
{
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
EIGEN_UNUSED_VARIABLE(num_threads);
enum {
kr = 8,
mr = Traits::mr,
nr = Traits::nr
};
k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
if (k > kr) k -= k % kr;
m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
if (m > mr) m -= m % mr;
n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
if (n > nr) n -= n % nr;
return;
#endif
// Explanations:
// Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and
// kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed
@ -155,7 +145,7 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
// In unit tests we do not want to use extra large matrices,
// so we reduce the cache size to check the blocking strategy is not flawed
#ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
l1 = 4*1024;
l1 = 9*1024;
l2 = 32*1024;
l3 = 512*1024;
#endif
@ -164,7 +154,7 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
// Perhaps it would make more sense to consider k*n*m??
// Note that for very tiny problem, this function should be bypassed anyway
// because we use the coefficient-based implementation for them.
if(std::max(k,std::max(m,n))<48)
if((std::max)(k,(std::max)(m,n))<48)
return;
typedef typename Traits::ResScalar ResScalar;
@ -211,8 +201,22 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
// Here, nc is chosen such that a block of kc x nc of the rhs fit within half of L2.
// The second half is implicitly reserved to access the result and lhs coefficients.
// When k<max_kc, then nc can arbitrarily growth. In practice, it seems to be fruitful
// to limit this growth: we bound nc to growth by a factor x1.5, leading to:
const Index max_nc = (3*actual_l2)/(2*2*max_kc*sizeof(RhsScalar));
// to limit this growth: we bound nc to growth by a factor x1.5.
// However, if the entire lhs block fit within L1, then we are not going to block on the rows at all,
// and it becomes fruitful to keep the packed rhs blocks in L1 if there is enough remaining space.
Index max_nc;
const Index lhs_bytes = m * k * sizeof(LhsScalar);
const Index remaining_l1 = l1- k_sub - lhs_bytes;
if(remaining_l1 >= Index(Traits::nr*sizeof(RhsScalar))*k)
{
// L1 blocking
max_nc = remaining_l1 / (k*sizeof(RhsScalar));
}
else
{
// L2 blocking
max_nc = (3*actual_l2)/(2*2*max_kc*sizeof(RhsScalar));
}
// WARNING Below, we assume that Traits::nr is a power of two.
Index nc = std::min<Index>(actual_l2/(2*k*sizeof(RhsScalar)), max_nc) & (~(Traits::nr-1));
if(n>nc)
@ -228,6 +232,7 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
{
// So far, no blocking at all, i.e., kc==k, and nc==n.
// In this case, let's perform a blocking over the rows such that the packed lhs data is kept in cache L1/L2
// TODO: part of this blocking strategy is now implemented within the kernel itself, so the L1-based heuristic here should be obsolete.
Index problem_size = k*n*sizeof(LhsScalar);
Index actual_lm = actual_l2;
Index max_mc = m;
@ -254,6 +259,60 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
}
}
inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)
{
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
return true;
}
#else
EIGEN_UNUSED_VARIABLE(k)
EIGEN_UNUSED_VARIABLE(m)
EIGEN_UNUSED_VARIABLE(n)
#endif
return false;
}
/** \brief Computes the blocking parameters for a m x k times k x n matrix product
*
* \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
* \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
* \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
*
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
* this function computes the blocking size parameters along the respective dimensions
* for matrix products and related algorithms.
*
* The blocking size parameters may be evaluated:
* - either by a heuristic based on cache sizes;
* - or using a precomputed lookup table;
* - or using fixed prescribed values (for testing purposes).
*
* \sa setCpuCacheSizes */
template<typename LhsScalar, typename RhsScalar, int KcFactor>
void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
{
if (!useSpecificBlockingSizes(k, m, n)) {
if (!lookupBlockingSizesFromTable<LhsScalar, RhsScalar>(k, m, n, num_threads)) {
evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor>(k, m, n, num_threads);
}
}
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
enum {
kr = 8,
mr = Traits::mr,
nr = Traits::nr
};
if (k > kr) k -= k % kr;
if (m > mr) m -= m % mr;
if (n > nr) n -= n % nr;
}
template<typename LhsScalar, typename RhsScalar>
inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
{
@ -949,19 +1008,31 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
// This corresponds to 3*LhsProgress x nr register blocks.
// Usually, make sense only with FMA
if(mr>=3*Traits::LhsProgress)
{
{
PossiblyRotatingKernelHelper<gebp_kernel> possiblyRotatingKernelHelper(traits);
// loops on each largest micro horizontal panel of lhs (3*Traits::LhsProgress x depth)
for(Index i=0; i<peeled_mc3; i+=3*Traits::LhsProgress)
// Here, the general idea is to loop on each largest micro horizontal panel of the lhs (3*Traits::LhsProgress x depth)
// and on each largest micro vertical panel of the rhs (depth * nr).
// Blocking sizes, i.e., 'depth' has been computed so that the micro horizontal panel of the lhs fit in L1.
// However, if depth is too small, we can extend the number of rows of these horizontal panels.
// This actual number of rows is computed as follow:
const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.
// The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size
// suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),
// or because we are testing specific blocking sizes.
const Index actual_panel_rows = (3*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) ));
for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows)
{
// loops on each largest micro vertical panel of rhs (depth * nr)
const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3);
for(Index j2=0; j2<packet_cols4; j2+=nr)
{
// We select a 3*Traits::LhsProgress x nr micro block of res which is entirely
for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
{
// We selected a 3*Traits::LhsProgress x nr micro block of res which is entirely
// stored into 3 x nr registers.
const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)];
const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*LhsProgress)];
prefetch(&blA[0]);
// gets res block as register
@ -1093,12 +1164,15 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
traits.acc(C11, alphav, R2);
r3.storePacket(0 * Traits::ResPacketSize, R0);
r3.storePacket(1 * Traits::ResPacketSize, R1);
r3.storePacket(2 * Traits::ResPacketSize, R2);
r3.storePacket(2 * Traits::ResPacketSize, R2);
}
}
// Deal with remaining columns of the rhs
for(Index j2=packet_cols4; j2<cols; j2++)
{
for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
{
// One column at a time
const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)];
prefetch(&blA[0]);
@ -1169,7 +1243,8 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
traits.acc(C8, alphav, R2);
r0.storePacket(0 * Traits::ResPacketSize, R0);
r0.storePacket(1 * Traits::ResPacketSize, R1);
r0.storePacket(2 * Traits::ResPacketSize, R2);
r0.storePacket(2 * Traits::ResPacketSize, R2);
}
}
}
}
@ -1177,13 +1252,21 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
//---------- Process 2 * LhsProgress rows at once ----------
if(mr>=2*Traits::LhsProgress)
{
// loops on each largest micro horizontal panel of lhs (2*LhsProgress x depth)
for(Index i=peeled_mc3; i<peeled_mc2; i+=2*LhsProgress)
const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.
// The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size
// suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),
// or because we are testing specific blocking sizes.
Index actual_panel_rows = (2*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) ));
for(Index i1=peeled_mc3; i1<peeled_mc2; i1+=actual_panel_rows)
{
// loops on each largest micro vertical panel of rhs (depth * nr)
Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc2);
for(Index j2=0; j2<packet_cols4; j2+=nr)
{
// We select a 2*Traits::LhsProgress x nr micro block of res which is entirely
for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
{
// We selected a 2*Traits::LhsProgress x nr micro block of res which is entirely
// stored into 2 x nr registers.
const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
@ -1287,11 +1370,14 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
r2.storePacket(1 * Traits::ResPacketSize, R1);
r3.storePacket(0 * Traits::ResPacketSize, R2);
r3.storePacket(1 * Traits::ResPacketSize, R3);
}
}
// Deal with remaining columns of the rhs
for(Index j2=packet_cols4; j2<cols; j2++)
{
for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
{
// One column at a time
const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
prefetch(&blA[0]);
@ -1358,6 +1444,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
traits.acc(C4, alphav, R1);
r0.storePacket(0 * Traits::ResPacketSize, R0);
r0.storePacket(1 * Traits::ResPacketSize, R1);
}
}
}
}
@ -1479,17 +1566,17 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
for(Index k=0; k<peeled_kc; k+=pk)
{
EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX1");
EIGEN_ASM_COMMENT("begin gebp micro kernel 1pX1");
RhsPacket B_0;
#define EIGEN_GEBGP_ONESTEP(K) \
do { \
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX1"); \
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX1"); \
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \
traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
traits.madd(A0, B_0, C0, B_0); \
EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX1"); \
EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX1"); \
} while(false);
EIGEN_GEBGP_ONESTEP(0);
@ -1504,7 +1591,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
blB += pk*RhsProgress;
blA += pk*1*Traits::LhsProgress;
EIGEN_ASM_COMMENT("end gebp micro kernel 2pX1");
EIGEN_ASM_COMMENT("end gebp micro kernel 1pX1");
}
// process remaining peeled loop

View File

@ -457,6 +457,8 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
{
eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0)
return;
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);

View File

@ -0,0 +1,89 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
#define EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
namespace Eigen {
namespace internal {
template <typename LhsScalar,
typename RhsScalar,
bool HasLookupTable = BlockingSizesLookupTable<LhsScalar, RhsScalar>::NumSizes != 0 >
struct LookupBlockingSizesFromTableImpl
{
static bool run(Index&, Index&, Index&, Index)
{
return false;
}
};
inline size_t floor_log2_helper(unsigned short& x, size_t offset)
{
unsigned short y = x >> offset;
if (y) {
x = y;
return offset;
} else {
return 0;
}
}
inline size_t floor_log2(unsigned short x)
{
return floor_log2_helper(x, 8)
+ floor_log2_helper(x, 4)
+ floor_log2_helper(x, 2)
+ floor_log2_helper(x, 1);
}
inline size_t ceil_log2(unsigned short x)
{
return x > 1 ? floor_log2(x - 1) + 1 : 0;
}
template <typename LhsScalar,
typename RhsScalar>
struct LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar, true>
{
static bool run(Index& k, Index& m, Index& n, Index)
{
using std::min;
using std::max;
typedef BlockingSizesLookupTable<LhsScalar, RhsScalar> Table;
const unsigned short minsize = Table::BaseSize;
const unsigned short maxsize = minsize << (Table::NumSizes - 1);
const unsigned short k_clamped = max<unsigned short>(minsize, min<Index>(k, maxsize));
const unsigned short m_clamped = max<unsigned short>(minsize, min<Index>(m, maxsize));
const unsigned short n_clamped = max<unsigned short>(minsize, min<Index>(n, maxsize));
const size_t k_index = ceil_log2(k_clamped / minsize);
const size_t m_index = ceil_log2(m_clamped / minsize);
const size_t n_index = ceil_log2(n_clamped / minsize);
const size_t index = n_index + Table::NumSizes * (m_index + Table::NumSizes * k_index);
const unsigned short table_entry = Table::Data()[index];
k = min<Index>(k, 1 << ((table_entry & 0xf00) >> 8));
m = min<Index>(m, 1 << ((table_entry & 0x0f0) >> 4));
n = min<Index>(n, 1 << ((table_entry & 0x00f) >> 0));
return true;
}
};
template <typename LhsScalar,
typename RhsScalar>
bool lookupBlockingSizesFromTable(Index& k, Index& m, Index& n, Index num_threads)
{
return LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar>::run(k, m, n, num_threads);
}
}
}
#endif // EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H

View File

@ -214,7 +214,7 @@ class blas_data_mapper {
}
template<typename SubPacket>
EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, SubPacket p) const {
EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
}

View File

@ -288,6 +288,14 @@ struct stem_function
typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
typedef ComplexScalar type(ComplexScalar, int);
};
template <typename LhsScalar,
typename RhsScalar>
struct BlockingSizesLookupTable
{
static const size_t NumSizes = 0;
};
}
} // end namespace Eigen

View File

@ -318,6 +318,9 @@
// Defined the boundary (in bytes) on which the data needs to be aligned. Note
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
// aligned at all regardless of the value of this #define.
// TODO should be renamed EIGEN_MAXIMAL_ALIGN_BYTES,
// for instance with AVX 1 EIGEN_MAXIMAL_ALIGN_BYTES=32 while for 'int' 16 bytes alignment is always enough,
// and 16 bytes alignment is also enough for Vector4f.
#define EIGEN_ALIGN_BYTES 16
#ifdef EIGEN_DONT_ALIGN

View File

@ -159,13 +159,16 @@ class compute_matrix_evaluator_flags
enum {
row_major_bit = Options&RowMajor ? RowMajorBit : 0,
is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
// TODO: should check for smaller packet types once we can handle multi-sized packet types
align_bytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
aligned_bit =
(
((Options&DontAlign)==0)
&& (
#if EIGEN_ALIGN_STATICALLY
((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0))
((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0))
#else
0
#endif

View File

@ -234,6 +234,12 @@ template<typename _MatrixType> class ComplexEigenSolver
}
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
EigenvectorType m_eivec;
EigenvalueType m_eivalues;
ComplexSchur<MatrixType> m_schur;
@ -251,6 +257,8 @@ template<typename MatrixType>
ComplexEigenSolver<MatrixType>&
ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
{
check_template_parameters();
// this code is inspired from Jampack
eigen_assert(matrix.cols() == matrix.rows());

View File

@ -299,6 +299,13 @@ template<typename _MatrixType> class EigenSolver
void doComputeEigenvectors();
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
}
MatrixType m_eivec;
EigenvalueType m_eivalues;
bool m_isInitialized;
@ -366,6 +373,8 @@ template<typename MatrixType>
EigenSolver<MatrixType>&
EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
{
check_template_parameters();
using std::sqrt;
using std::abs;
using numext::isfinite;

View File

@ -263,6 +263,13 @@ template<typename _MatrixType> class GeneralizedEigenSolver
}
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
}
MatrixType m_eivec;
ComplexVectorType m_alphas;
VectorType m_betas;
@ -290,6 +297,8 @@ template<typename MatrixType>
GeneralizedEigenSolver<MatrixType>&
GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors)
{
check_template_parameters();
using std::sqrt;
using std::abs;
eigen_assert(A.cols() == A.rows() && B.cols() == A.rows() && B.cols() == B.rows());

View File

@ -347,6 +347,11 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
static const int m_maxIterations = 30;
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
MatrixType m_eivec;
RealVectorType m_eivalues;
typename TridiagonalizationType::SubDiagonalType m_subdiag;
@ -382,6 +387,8 @@ EIGEN_DEVICE_FUNC
SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
::compute(const MatrixType& matrix, int options)
{
check_template_parameters();
using std::abs;
eigen_assert(matrix.cols() == matrix.rows());
eigen_assert((options&~(EigVecMask|GenEigMask))==0

View File

@ -441,7 +441,7 @@ QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) c
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
return internal::quat_product<Architecture::Target, Derived, OtherDerived,
typename internal::traits<Derived>::Scalar,
internal::traits<Derived>::IsAligned && internal::traits<OtherDerived>::IsAligned>::run(*this, other);
(internal::traits<Derived>::IsAligned && internal::traits<OtherDerived>::IsAligned)?Aligned:Unaligned>::run(*this, other);
}
/** \sa operator*(Quaternion) */
@ -646,6 +646,16 @@ inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Der
}
}
// Generic conjugate of a Quaternion
namespace internal {
template<int Arch, class Derived, typename Scalar, int _Options> struct quat_conj
{
static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z());
}
};
}
/** \returns the conjugate of the \c *this which is equal to the multiplicative inverse
* if the quaternion is normalized.
* The conjugate of a quaternion represents the opposite rotation.
@ -656,7 +666,10 @@ template <class Derived>
inline Quaternion<typename internal::traits<Derived>::Scalar>
QuaternionBase<Derived>::conjugate() const
{
return Quaternion<Scalar>(this->w(),-this->x(),-this->y(),-this->z());
return internal::quat_conj<Architecture::Target, Derived,
typename internal::traits<Derived>::Scalar,
internal::traits<Derived>::IsAligned?Aligned:Unaligned>::run(*this);
}
/** \returns the angle (in radian) between two rotations
@ -667,12 +680,10 @@ template <class OtherDerived>
inline typename internal::traits<Derived>::Scalar
QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& other) const
{
using std::acos;
using std::atan2;
using std::abs;
Scalar d = abs(this->dot(other));
if (d>=Scalar(1))
return Scalar(0);
return Scalar(2) * acos(d);
Quaternion<Scalar> d = (*this) * other.conjugate();
return Scalar(2) * atan2( d.vec().norm(), abs(d.w()) );
}

View File

@ -20,23 +20,35 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned>
{
static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
{
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0,0,0,0x80000000));
Quaternion<float> res;
const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f);
__m128 a = _a.coeffs().template packet<Aligned>(0);
__m128 b = _b.coeffs().template packet<Aligned>(0);
__m128 flip1 = _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),
vec4f_swizzle1(b,2,0,1,2)),mask);
__m128 flip2 = _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),
vec4f_swizzle1(b,0,1,2,1)),mask);
__m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));
__m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1));
pstore(&res.x(),
_mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)),
_mm_mul_ps(vec4f_swizzle1(a,2,0,1,0),
vec4f_swizzle1(b,1,2,0,0))),
_mm_add_ps(flip1,flip2)));
_mm_xor_ps(mask,_mm_add_ps(s1,s2))));
return res;
}
};
template<class Derived, int Alignment>
struct quat_conj<Architecture::SSE, Derived, float, Alignment>
{
static inline Quaternion<float> run(const QuaternionBase<Derived>& q)
{
Quaternion<float> res;
const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f);
pstore(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<Alignment>(0)));
return res;
}
};
template<typename VectorLhs,typename VectorRhs>
struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
{
@ -56,8 +68,8 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
template<class Derived, class OtherDerived>
struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Aligned>
template<class Derived, class OtherDerived, int Alignment>
struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
{
static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
{
@ -66,8 +78,8 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Aligned>
Quaternion<double> res;
const double* a = _a.coeffs().data();
Packet2d b_xy = _b.coeffs().template packet<Aligned>(0);
Packet2d b_zw = _b.coeffs().template packet<Aligned>(2);
Packet2d b_xy = _b.coeffs().template packet<Alignment>(0);
Packet2d b_zw = _b.coeffs().template packet<Alignment>(2);
Packet2d a_xx = pset1<Packet2d>(a[0]);
Packet2d a_yy = pset1<Packet2d>(a[1]);
Packet2d a_zz = pset1<Packet2d>(a[2]);
@ -108,6 +120,20 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Aligned>
}
};
template<class Derived, int Alignment>
struct quat_conj<Architecture::SSE, Derived, double, Alignment>
{
static inline Quaternion<double> run(const QuaternionBase<Derived>& q)
{
Quaternion<double> res;
const __m128d mask0 = _mm_setr_pd(-0.,-0.);
const __m128d mask2 = _mm_setr_pd(-0.,0.);
pstore(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<Alignment>(0)));
pstore(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<Alignment>(2)));
return res;
}
};
} // end namespace internal
} // end namespace Eigen

View File

@ -17,9 +17,9 @@ namespace Eigen {
*
* This class allows to approximately solve for A.x = b problems assuming A is a diagonal matrix.
* In other words, this preconditioner neglects all off diagonal entries and, in Eigen's language, solves for:
* \code
* A.diagonal().asDiagonal() . x = b
* \endcode
\code
A.diagonal().asDiagonal() . x = b
\endcode
*
* \tparam _Scalar the type of the scalar.
*
@ -28,6 +28,7 @@ namespace Eigen {
*
* \note A variant that has yet to be implemented would attempt to preserve the norm of each column.
*
* \sa class LeastSquareDiagonalPreconditioner, class ConjugateGradient
*/
template <typename _Scalar>
class DiagonalPreconditioner
@ -100,6 +101,69 @@ class DiagonalPreconditioner
bool m_isInitialized;
};
/** \ingroup IterativeLinearSolvers_Module
* \brief Jacobi preconditioner for LeastSquaresConjugateGradient
*
* This class allows to approximately solve for A' A x = A' b problems assuming A' A is a diagonal matrix.
* In other words, this preconditioner neglects all off diagonal entries and, in Eigen's language, solves for:
\code
(A.adjoint() * A).diagonal().asDiagonal() * x = b
\endcode
*
* \tparam _Scalar the type of the scalar.
*
* The diagonal entries are pre-inverted and stored into a dense vector.
*
* \sa class LeastSquaresConjugateGradient, class DiagonalPreconditioner
*/
template <typename _Scalar>
class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar>
{
typedef _Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef DiagonalPreconditioner<_Scalar> Base;
using Base::m_invdiag;
public:
LeastSquareDiagonalPreconditioner() : Base() {}
template<typename MatType>
explicit LeastSquareDiagonalPreconditioner(const MatType& mat) : Base()
{
compute(mat);
}
template<typename MatType>
LeastSquareDiagonalPreconditioner& analyzePattern(const MatType& )
{
return *this;
}
template<typename MatType>
LeastSquareDiagonalPreconditioner& factorize(const MatType& mat)
{
// Compute the inverse squared-norm of each column of mat
m_invdiag.resize(mat.cols());
for(Index j=0; j<mat.outerSize(); ++j)
{
RealScalar sum = mat.innerVector(j).squaredNorm();
if(sum>0)
m_invdiag(j) = RealScalar(1)/sum;
else
m_invdiag(j) = RealScalar(1);
}
Base::m_isInitialized = true;
return *this;
}
template<typename MatType>
LeastSquareDiagonalPreconditioner& compute(const MatType& mat)
{
return factorize(mat);
}
protected:
};
/** \ingroup IterativeLinearSolvers_Module
* \brief A naive preconditioner which approximates any matrix as the identity matrix

View File

@ -60,29 +60,29 @@ void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x,
}
VectorType p(n);
p = precond.solve(residual); //initial search direction
p = precond.solve(residual); // initial search direction
VectorType z(n), tmp(n);
RealScalar absNew = numext::real(residual.dot(p)); // the square of the absolute value of r scaled by invM
Index i = 0;
while(i < maxIters)
{
tmp.noalias() = mat * p; // the bottleneck of the algorithm
tmp.noalias() = mat * p; // the bottleneck of the algorithm
Scalar alpha = absNew / p.dot(tmp); // the amount we travel on dir
x += alpha * p; // update solution
residual -= alpha * tmp; // update residue
Scalar alpha = absNew / p.dot(tmp); // the amount we travel on dir
x += alpha * p; // update solution
residual -= alpha * tmp; // update residual
residualNorm2 = residual.squaredNorm();
if(residualNorm2 < threshold)
break;
z = precond.solve(residual); // approximately solve for "A z = residual"
z = precond.solve(residual); // approximately solve for "A z = residual"
RealScalar absOld = absNew;
absNew = numext::real(residual.dot(z)); // update the absolute value of r
RealScalar beta = absNew / absOld; // calculate the Gram-Schmidt value used to create the new search direction
p = z + beta * p; // update search direction
RealScalar beta = absNew / absOld; // calculate the Gram-Schmidt value used to create the new search direction
p = z + beta * p; // update search direction
i++;
}
tol_error = sqrt(residualNorm2 / rhsNorm2);
@ -122,24 +122,24 @@ struct traits<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >
* and NumTraits<Scalar>::epsilon() for the tolerance.
*
* This class can be used as the direct solver classes. Here is a typical usage example:
* \code
* int n = 10000;
* VectorXd x(n), b(n);
* SparseMatrix<double> A(n,n);
* // fill A and b
* ConjugateGradient<SparseMatrix<double> > cg;
* cg.compute(A);
* x = cg.solve(b);
* std::cout << "#iterations: " << cg.iterations() << std::endl;
* std::cout << "estimated error: " << cg.error() << std::endl;
* // update b, and solve again
* x = cg.solve(b);
* \endcode
\code
int n = 10000;
VectorXd x(n), b(n);
SparseMatrix<double> A(n,n);
// fill A and b
ConjugateGradient<SparseMatrix<double> > cg;
cg.compute(A);
x = cg.solve(b);
std::cout << "#iterations: " << cg.iterations() << std::endl;
std::cout << "estimated error: " << cg.error() << std::endl;
// update b, and solve again
x = cg.solve(b);
\endcode
*
* By default the iterations start with x=0 as an initial guess of the solution.
* One can control the start using the solveWithGuess() method.
*
* \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
* \sa class LeastSquaresConjugateGradient, class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
*/
template< typename _MatrixType, int _UpLo, typename _Preconditioner>
class ConjugateGradient : public IterativeSolverBase<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >
@ -185,7 +185,7 @@ public:
{
typedef typename internal::conditional<UpLo==(Lower|Upper),
Ref<const MatrixType>&,
SparseSelfAdjointView<const Ref<const MatrixType>, UpLo>
typename Ref<const MatrixType>::template ConstSelfAdjointViewReturnType<UpLo>::Type
>::type MatrixWrapperType;
m_iterations = Base::maxIterations();
m_error = Base::m_tolerance;
@ -208,7 +208,7 @@ public:
template<typename Rhs,typename Dest>
void _solve_impl(const MatrixBase<Rhs>& b, Dest& x) const
{
x.setOnes();
x.setZero();
_solve_with_guess_impl(b.derived(),x);
}

View File

@ -93,21 +93,23 @@ Index QuickSplit(VectorV &row, VectorI &ind, Index ncut)
* alternatively, on GMANE:
* http://comments.gmane.org/gmane.comp.lib.eigen/3302
*/
template <typename _Scalar>
class IncompleteLUT : public SparseSolverBase<IncompleteLUT<_Scalar> >
template <typename _Scalar, typename _StorageIndex = int>
class IncompleteLUT : public SparseSolverBase<IncompleteLUT<_Scalar, _StorageIndex> >
{
protected:
typedef SparseSolverBase<IncompleteLUT<_Scalar> > Base;
typedef SparseSolverBase<IncompleteLUT> Base;
using Base::m_isInitialized;
public:
typedef _Scalar Scalar;
typedef _StorageIndex StorageIndex;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef Matrix<Scalar,Dynamic,1> Vector;
typedef SparseMatrix<Scalar,RowMajor> FactorType;
typedef SparseMatrix<Scalar,ColMajor> PermutType;
typedef typename FactorType::StorageIndex StorageIndex;
typedef Matrix<StorageIndex,Dynamic,1> VectorI;
typedef SparseMatrix<Scalar,RowMajor,StorageIndex> FactorType;
public:
// this typedef is only to export the scalar type and compile-time dimensions to solve_retval
typedef Matrix<Scalar,Dynamic,Dynamic> MatrixType;
IncompleteLUT()
@ -151,7 +153,7 @@ class IncompleteLUT : public SparseSolverBase<IncompleteLUT<_Scalar> >
*
**/
template<typename MatrixType>
IncompleteLUT<Scalar>& compute(const MatrixType& amat)
IncompleteLUT& compute(const MatrixType& amat)
{
analyzePattern(amat);
factorize(amat);
@ -197,8 +199,8 @@ protected:
* Set control parameter droptol
* \param droptol Drop any element whose magnitude is less than this tolerance
**/
template<typename Scalar>
void IncompleteLUT<Scalar>::setDroptol(const RealScalar& droptol)
template<typename Scalar, typename StorageIndex>
void IncompleteLUT<Scalar,StorageIndex>::setDroptol(const RealScalar& droptol)
{
this->m_droptol = droptol;
}
@ -207,15 +209,15 @@ void IncompleteLUT<Scalar>::setDroptol(const RealScalar& droptol)
* Set control parameter fillfactor
* \param fillfactor This is used to compute the number @p fill_in of largest elements to keep on each row.
**/
template<typename Scalar>
void IncompleteLUT<Scalar>::setFillfactor(int fillfactor)
template<typename Scalar, typename StorageIndex>
void IncompleteLUT<Scalar,StorageIndex>::setFillfactor(int fillfactor)
{
this->m_fillfactor = fillfactor;
}
template <typename Scalar>
template <typename Scalar, typename StorageIndex>
template<typename _MatrixType>
void IncompleteLUT<Scalar>::analyzePattern(const _MatrixType& amat)
void IncompleteLUT<Scalar,StorageIndex>::analyzePattern(const _MatrixType& amat)
{
// Compute the Fill-reducing permutation
SparseMatrix<Scalar,ColMajor, StorageIndex> mat1 = amat;
@ -232,9 +234,9 @@ void IncompleteLUT<Scalar>::analyzePattern(const _MatrixType& amat)
m_analysisIsOk = true;
}
template <typename Scalar>
template <typename Scalar, typename StorageIndex>
template<typename _MatrixType>
void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
void IncompleteLUT<Scalar,StorageIndex>::factorize(const _MatrixType& amat)
{
using std::sqrt;
using std::swap;
@ -246,8 +248,8 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
m_lu.resize(n,n);
// Declare Working vectors and variables
Vector u(n) ; // real values of the row -- maximum size is n --
VectorXi ju(n); // column position of the values in u -- maximum size is n
VectorXi jr(n); // Indicate the position of the nonzero elements in the vector u -- A zero location is indicated by -1
VectorI ju(n); // column position of the values in u -- maximum size is n
VectorI jr(n); // Indicate the position of the nonzero elements in the vector u -- A zero location is indicated by -1
// Apply the fill-reducing permutation
eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
@ -398,7 +400,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
sizel = len;
len = (std::min)(sizel, nnzL);
typename Vector::SegmentReturnType ul(u.segment(0, sizel));
typename VectorXi::SegmentReturnType jul(ju.segment(0, sizel));
typename VectorI::SegmentReturnType jul(ju.segment(0, sizel));
internal::QuickSplit(ul, jul, len);
// store the largest m_fill elements of the L part
@ -427,14 +429,13 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
sizeu = len + 1; // +1 to take into account the diagonal element
len = (std::min)(sizeu, nnzU);
typename Vector::SegmentReturnType uu(u.segment(ii+1, sizeu-1));
typename VectorXi::SegmentReturnType juu(ju.segment(ii+1, sizeu-1));
typename VectorI::SegmentReturnType juu(ju.segment(ii+1, sizeu-1));
internal::QuickSplit(uu, juu, len);
// store the largest elements of the U part
for(Index k = ii + 1; k < ii + len; k++)
m_lu.insertBackByOuterInnerUnordered(ii,ju(k)) = u(k);
}
m_lu.finalize();
m_lu.makeCompressed();

View File

@ -52,9 +52,9 @@ public:
* this class becomes invalid. Call compute() to update it with the new
* matrix A, or modify a copy of A.
*/
template<typename SparseMatrixDerived>
explicit IterativeSolverBase(const SparseMatrixBase<SparseMatrixDerived>& A)
: mp_matrix(A)
template<typename MatrixDerived>
explicit IterativeSolverBase(const EigenBase<MatrixDerived>& A)
: mp_matrix(A.derived())
{
init();
compute(mp_matrix);
@ -67,8 +67,8 @@ public:
* Currently, this function mostly calls analyzePattern on the preconditioner. In the future
* we might, for instance, implement column reordering for faster matrix vector products.
*/
template<typename SparseMatrixDerived>
Derived& analyzePattern(const SparseMatrixBase<SparseMatrixDerived>& A)
template<typename MatrixDerived>
Derived& analyzePattern(const EigenBase<MatrixDerived>& A)
{
grab(A.derived());
m_preconditioner.analyzePattern(mp_matrix);
@ -87,8 +87,8 @@ public:
* this class becomes invalid. Call compute() to update it with the new
* matrix A, or modify a copy of A.
*/
template<typename SparseMatrixDerived>
Derived& factorize(const SparseMatrixBase<SparseMatrixDerived>& A)
template<typename MatrixDerived>
Derived& factorize(const EigenBase<MatrixDerived>& A)
{
eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
grab(A.derived());
@ -108,8 +108,8 @@ public:
* this class becomes invalid. Call compute() to update it with the new
* matrix A, or modify a copy of A.
*/
template<typename SparseMatrixDerived>
Derived& compute(const SparseMatrixBase<SparseMatrixDerived>& A)
template<typename MatrixDerived>
Derived& compute(const EigenBase<MatrixDerived>& A)
{
grab(A.derived());
m_preconditioner.compute(mp_matrix);
@ -223,11 +223,11 @@ protected:
m_tolerance = NumTraits<Scalar>::epsilon();
}
template<typename SparseMatrixDerived>
void grab(const SparseMatrixBase<SparseMatrixDerived> &A)
template<typename MatrixDerived>
void grab(const EigenBase<MatrixDerived> &A)
{
mp_matrix.~Ref<const MatrixType>();
::new (&mp_matrix) Ref<const MatrixType>(A);
::new (&mp_matrix) Ref<const MatrixType>(A.derived());
}
void grab(const Ref<const MatrixType> &A)

View File

@ -0,0 +1,213 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_LEAST_SQUARE_CONJUGATE_GRADIENT_H
#define EIGEN_LEAST_SQUARE_CONJUGATE_GRADIENT_H
namespace Eigen {
namespace internal {
/** \internal Low-level conjugate gradient algorithm for least-square problems
* \param mat The matrix A
* \param rhs The right hand side vector b
* \param x On input and initial solution, on output the computed solution.
* \param precond A preconditioner being able to efficiently solve for an
* approximation of A'Ax=b (regardless of b)
* \param iters On input the max number of iteration, on output the number of performed iterations.
* \param tol_error On input the tolerance error, on output an estimation of the relative error.
*/
template<typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>
EIGEN_DONT_INLINE
void least_square_conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x,
const Preconditioner& precond, Index& iters,
typename Dest::RealScalar& tol_error)
{
using std::sqrt;
using std::abs;
typedef typename Dest::RealScalar RealScalar;
typedef typename Dest::Scalar Scalar;
typedef Matrix<Scalar,Dynamic,1> VectorType;
RealScalar tol = tol_error;
Index maxIters = iters;
Index m = mat.rows(), n = mat.cols();
VectorType residual = rhs - mat * x;
VectorType normal_residual = mat.adjoint() * residual;
RealScalar rhsNorm2 = (mat.adjoint()*rhs).squaredNorm();
if(rhsNorm2 == 0)
{
x.setZero();
iters = 0;
tol_error = 0;
return;
}
RealScalar threshold = tol*tol*rhsNorm2;
RealScalar residualNorm2 = normal_residual.squaredNorm();
if (residualNorm2 < threshold)
{
iters = 0;
tol_error = sqrt(residualNorm2 / rhsNorm2);
return;
}
VectorType p(n);
p = precond.solve(normal_residual); // initial search direction
VectorType z(n), tmp(m);
RealScalar absNew = numext::real(normal_residual.dot(p)); // the square of the absolute value of r scaled by invM
Index i = 0;
while(i < maxIters)
{
tmp.noalias() = mat * p;
Scalar alpha = absNew / tmp.squaredNorm(); // the amount we travel on dir
x += alpha * p; // update solution
residual -= alpha * tmp; // update residual
normal_residual = mat.adjoint() * residual; // update residual of the normal equation
residualNorm2 = normal_residual.squaredNorm();
if(residualNorm2 < threshold)
break;
z = precond.solve(normal_residual); // approximately solve for "A'A z = normal_residual"
RealScalar absOld = absNew;
absNew = numext::real(normal_residual.dot(z)); // update the absolute value of r
RealScalar beta = absNew / absOld; // calculate the Gram-Schmidt value used to create the new search direction
p = z + beta * p; // update search direction
i++;
}
tol_error = sqrt(residualNorm2 / rhsNorm2);
iters = i;
}
}
template< typename _MatrixType,
typename _Preconditioner = LeastSquareDiagonalPreconditioner<typename _MatrixType::Scalar> >
class LeastSquaresConjugateGradient;
namespace internal {
template< typename _MatrixType, typename _Preconditioner>
struct traits<LeastSquaresConjugateGradient<_MatrixType,_Preconditioner> >
{
typedef _MatrixType MatrixType;
typedef _Preconditioner Preconditioner;
};
}
/** \ingroup IterativeLinearSolvers_Module
* \brief A conjugate gradient solver for sparse (or dense) least-square problems
*
* This class allows to solve for A x = b linear problems using an iterative conjugate gradient algorithm.
* The matrix A can be non symmetric and rectangular, but the matrix A' A should be positive-definite to guaranty stability.
* Otherwise, the SparseLU or SparseQR classes might be preferable.
* The matrix A and the vectors x and b can be either dense or sparse.
*
* \tparam _MatrixType the type of the matrix A, can be a dense or a sparse matrix.
* \tparam _Preconditioner the type of the preconditioner. Default is LeastSquareDiagonalPreconditioner
*
* The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()
* and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations
* and NumTraits<Scalar>::epsilon() for the tolerance.
*
* This class can be used as the direct solver classes. Here is a typical usage example:
\code
int m=1000000, n = 10000;
VectorXd x(n), b(m);
SparseMatrix<double> A(m,n);
// fill A and b
LeastSquaresConjugateGradient<SparseMatrix<double> > lscg;
lscg.compute(A);
x = lscg.solve(b);
std::cout << "#iterations: " << lscg.iterations() << std::endl;
std::cout << "estimated error: " << lscg.error() << std::endl;
// update b, and solve again
x = lscg.solve(b);
\endcode
*
* By default the iterations start with x=0 as an initial guess of the solution.
* One can control the start using the solveWithGuess() method.
*
* \sa class ConjugateGradient, SparseLU, SparseQR
*/
template< typename _MatrixType, typename _Preconditioner>
class LeastSquaresConjugateGradient : public IterativeSolverBase<LeastSquaresConjugateGradient<_MatrixType,_Preconditioner> >
{
typedef IterativeSolverBase<LeastSquaresConjugateGradient> Base;
using Base::mp_matrix;
using Base::m_error;
using Base::m_iterations;
using Base::m_info;
using Base::m_isInitialized;
public:
typedef _MatrixType MatrixType;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
typedef _Preconditioner Preconditioner;
public:
/** Default constructor. */
LeastSquaresConjugateGradient() : Base() {}
/** Initialize the solver with matrix \a A for further \c Ax=b solving.
*
* This constructor is a shortcut for the default constructor followed
* by a call to compute().
*
* \warning this class stores a reference to the matrix A as well as some
* precomputed values that depend on it. Therefore, if \a A is changed
* this class becomes invalid. Call compute() to update it with the new
* matrix A, or modify a copy of A.
*/
explicit LeastSquaresConjugateGradient(const MatrixType& A) : Base(A) {}
~LeastSquaresConjugateGradient() {}
/** \internal */
template<typename Rhs,typename Dest>
void _solve_with_guess_impl(const Rhs& b, Dest& x) const
{
m_iterations = Base::maxIterations();
m_error = Base::m_tolerance;
for(Index j=0; j<b.cols(); ++j)
{
m_iterations = Base::maxIterations();
m_error = Base::m_tolerance;
typename Dest::ColXpr xj(x,j);
internal::least_square_conjugate_gradient(mp_matrix, b.col(j), xj, Base::m_preconditioner, m_iterations, m_error);
}
m_isInitialized = true;
m_info = m_error <= Base::m_tolerance ? Success : NoConvergence;
}
/** \internal */
using Base::_solve_impl;
template<typename Rhs,typename Dest>
void _solve_impl(const MatrixBase<Rhs>& b, Dest& x) const
{
x.setZero();
_solve_with_guess_impl(b.derived(),x);
}
};
} // end namespace Eigen
#endif // EIGEN_LEAST_SQUARE_CONJUGATE_GRADIENT_H

View File

@ -390,6 +390,12 @@ template<typename _MatrixType> class FullPivLU
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
MatrixType m_lu;
PermutationPType m_p;
PermutationQType m_q;
@ -434,6 +440,8 @@ FullPivLU<MatrixType>::FullPivLU(const MatrixType& matrix)
template<typename MatrixType>
FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
{
check_template_parameters();
// the permutations are stored as int indices, so just to be sure:
eigen_assert(matrix.rows()<=NumTraits<int>::highest() && matrix.cols()<=NumTraits<int>::highest());
@ -459,14 +467,16 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
// biggest coefficient in the remaining bottom-right corner (starting at row k, col k)
Index row_of_biggest_in_corner, col_of_biggest_in_corner;
RealScalar biggest_in_corner;
typedef internal::scalar_score_coeff_op<Scalar> Scoring;
typedef typename Scoring::result_type Score;
Score biggest_in_corner;
biggest_in_corner = m_lu.bottomRightCorner(rows-k, cols-k)
.cwiseAbs()
.unaryExpr(Scoring())
.maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner);
row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner,
col_of_biggest_in_corner += k; // need to add k to them.
if(biggest_in_corner==RealScalar(0))
if(biggest_in_corner==Score(0))
{
// before exiting, make sure to initialize the still uninitialized transpositions
// in a sane state without destroying what we already have.
@ -479,7 +489,8 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
break;
}
if(biggest_in_corner > m_maxpivot) m_maxpivot = biggest_in_corner;
RealScalar abs_pivot = internal::abs_knowing_score<Scalar>()(m_lu(row_of_biggest_in_corner, col_of_biggest_in_corner), biggest_in_corner);
if(abs_pivot > m_maxpivot) m_maxpivot = abs_pivot;
// Now that we've found the pivot, we need to apply the row/col swaps to
// bring it to the location (k,k).

View File

@ -209,6 +209,12 @@ template<typename _MatrixType> class PartialPivLU
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
MatrixType m_lu;
PermutationType m_p;
TranspositionType m_rowsTranspositions;
@ -275,6 +281,8 @@ struct partial_lu_impl
*/
static Index unblocked_lu(MatrixType& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions)
{
typedef scalar_score_coeff_op<Scalar> Scoring;
typedef typename Scoring::result_type Score;
const Index rows = lu.rows();
const Index cols = lu.cols();
const Index size = (std::min)(rows,cols);
@ -286,13 +294,13 @@ struct partial_lu_impl
Index rcols = cols-k-1;
Index row_of_biggest_in_col;
RealScalar biggest_in_corner
= lu.col(k).tail(rows-k).cwiseAbs().maxCoeff(&row_of_biggest_in_col);
Score biggest_in_corner
= lu.col(k).tail(rows-k).unaryExpr(Scoring()).maxCoeff(&row_of_biggest_in_col);
row_of_biggest_in_col += k;
row_transpositions[k] = PivIndex(row_of_biggest_in_col);
if(biggest_in_corner != RealScalar(0))
if(biggest_in_corner != Score(0))
{
if(k != row_of_biggest_in_col)
{
@ -423,6 +431,8 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t
template<typename MatrixType>
PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const MatrixType& matrix)
{
check_template_parameters();
// the row permutation is stored as int indices, so just to be sure:
eigen_assert(matrix.rows()<NumTraits<int>::highest());

View File

@ -137,22 +137,27 @@ void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,StorageIndex>& C, Perm
degree[i] = len[i]; // degree of node i
}
mark = internal::cs_wclear<StorageIndex>(0, 0, w, n); /* clear w */
elen[n] = -2; /* n is a dead element */
Cp[n] = -1; /* n is a root of assembly tree */
w[n] = 0; /* n is a dead element */
/* --- Initialize degree lists ------------------------------------------ */
for(i = 0; i < n; i++)
{
bool has_diag = false;
for(p = Cp[i]; p<Cp[i+1]; ++p)
if(Ci[p]==i)
{
has_diag = true;
break;
}
d = degree[i];
if(d == 0) /* node i is empty */
if(d == 1) /* node i is empty */
{
elen[i] = -2; /* element i is dead */
nel++;
Cp[i] = -1; /* i is a root of assembly tree */
w[i] = 0;
}
else if(d > dense) /* node i is dense */
else if(d > dense || !has_diag) /* node i is dense or has no structural diagonal element */
{
nv[i] = 0; /* absorb i into element n */
elen[i] = -1; /* node i is dead */
@ -168,6 +173,10 @@ void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,StorageIndex>& C, Perm
}
}
elen[n] = -2; /* n is a dead element */
Cp[n] = -1; /* n is a root of assembly tree */
w[n] = 0; /* n is a dead element */
while (nel < n) /* while (selecting pivots) do */
{
/* --- Select node of minimum approximate degree -------------------- */

View File

@ -90,11 +90,11 @@ class AMDOrdering
* \note Returns an empty permutation matrix
* \tparam Index The type of indices of the matrix
*/
template <typename Index>
template <typename StorageIndex>
class NaturalOrdering
{
public:
typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType;
typedef PermutationMatrix<Dynamic, Dynamic, StorageIndex> PermutationType;
/** Compute the permutation vector from a column-major sparse matrix */
template <typename MatrixType>

View File

@ -398,6 +398,12 @@ template<typename _MatrixType> class ColPivHouseholderQR
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
MatrixType m_qr;
HCoeffsType m_hCoeffs;
PermutationType m_colsPermutation;
@ -436,6 +442,8 @@ typename MatrixType::RealScalar ColPivHouseholderQR<MatrixType>::logAbsDetermina
template<typename MatrixType>
ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const MatrixType& matrix)
{
check_template_parameters();
using std::abs;
Index rows = matrix.rows();
Index cols = matrix.cols();

View File

@ -380,6 +380,12 @@ template<typename _MatrixType> class FullPivHouseholderQR
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
MatrixType m_qr;
HCoeffsType m_hCoeffs;
IntDiagSizeVectorType m_rows_transpositions;
@ -419,6 +425,8 @@ typename MatrixType::RealScalar FullPivHouseholderQR<MatrixType>::logAbsDetermin
template<typename MatrixType>
FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(const MatrixType& matrix)
{
check_template_parameters();
using std::abs;
Index rows = matrix.rows();
Index cols = matrix.cols();
@ -443,13 +451,15 @@ FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(cons
for (Index k = 0; k < size; ++k)
{
Index row_of_biggest_in_corner, col_of_biggest_in_corner;
RealScalar biggest_in_corner;
typedef internal::scalar_score_coeff_op<Scalar> Scoring;
typedef typename Scoring::result_type Score;
biggest_in_corner = m_qr.bottomRightCorner(rows-k, cols-k)
.cwiseAbs()
.maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner);
Score score = m_qr.bottomRightCorner(rows-k, cols-k)
.unaryExpr(Scoring())
.maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner);
row_of_biggest_in_corner += k;
col_of_biggest_in_corner += k;
RealScalar biggest_in_corner = internal::abs_knowing_score<Scalar>()(m_qr(row_of_biggest_in_corner, col_of_biggest_in_corner), score);
if(k==0) biggest = biggest_in_corner;
// if the corner is negligible, then we have less than full rank, and we can finish early

View File

@ -196,6 +196,12 @@ template<typename _MatrixType> class HouseholderQR
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
MatrixType m_qr;
HCoeffsType m_hCoeffs;
RowVectorType m_temp;
@ -348,6 +354,8 @@ void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) c
template<typename MatrixType>
HouseholderQR<MatrixType>& HouseholderQR<MatrixType>::compute(const MatrixType& matrix)
{
check_template_parameters();
Index rows = matrix.rows();
Index cols = matrix.cols();
Index size = (std::min)(rows,cols);

View File

@ -130,9 +130,10 @@ public:
inline Index rank() const
{
using std::abs;
using std::max;
eigen_assert(m_isInitialized && "JacobiSVD is not initialized.");
if(m_singularValues.size()==0) return 0;
RealScalar premultiplied_threshold = m_singularValues.coeff(0) * threshold();
RealScalar premultiplied_threshold = (max)(m_singularValues.coeff(0) * threshold(), (std::numeric_limits<RealScalar>::min)());
Index i = m_nonzeroSingularValues-1;
while(i>=0 && m_singularValues.coeff(i) < premultiplied_threshold) --i;
return i+1;
@ -217,6 +218,12 @@ public:
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
// return true if already allocated
bool allocate(Index rows, Index cols, unsigned int computationOptions) ;
@ -240,7 +247,9 @@ protected:
m_usePrescribedThreshold(false),
m_computationOptions(0),
m_rows(-1), m_cols(-1), m_diagSize(0)
{}
{
check_template_parameters();
}
};

View File

@ -69,6 +69,7 @@ class SimplicialCholeskyBase : public SparseSolverBase<Derived>
typedef SparseMatrix<Scalar,ColMajor,StorageIndex> CholMatrixType;
typedef CholMatrixType const * ConstCholMatrixPtr;
typedef Matrix<Scalar,Dynamic,1> VectorType;
typedef Matrix<StorageIndex,Dynamic,1> VectorI;
public:
@ -250,8 +251,8 @@ class SimplicialCholeskyBase : public SparseSolverBase<Derived>
CholMatrixType m_matrix;
VectorType m_diag; // the diagonal coefficients (LDLT mode)
VectorXi m_parent; // elimination tree
VectorXi m_nonZerosPerCol;
VectorI m_parent; // elimination tree
VectorI m_nonZerosPerCol;
PermutationMatrix<Dynamic,Dynamic,StorageIndex> m_P; // the permutation
PermutationMatrix<Dynamic,Dynamic,StorageIndex> m_Pinv; // the inverse permutation

View File

@ -86,7 +86,12 @@ class CompressedStorage
void resize(Index size, double reserveSizeFactor = 0)
{
if (m_allocatedSize<size)
reallocate(size + Index(reserveSizeFactor*double(size)));
{
Index realloc_size = (std::min<Index>)(NumTraits<StorageIndex>::highest(), size + Index(reserveSizeFactor*double(size)));
if(realloc_size<size)
internal::throw_std_bad_alloc();
reallocate(realloc_size);
}
m_size = size;
}
@ -214,6 +219,9 @@ class CompressedStorage
inline void reallocate(Index size)
{
#ifdef EIGEN_SPARSE_COMPRESSED_STORAGE_REALLOCATE_PLUGIN
EIGEN_SPARSE_COMPRESSED_STORAGE_REALLOCATE_PLUGIN
#endif
eigen_internal_assert(size!=m_allocatedSize);
internal::scoped_array<Scalar> newValues(size);
internal::scoped_array<StorageIndex> newIndices(size);

View File

@ -49,6 +49,16 @@ public:
return nnz;
}
inline const Scalar coeff(Index row, Index col) const
{
return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
}
inline const Scalar coeff(Index index) const
{
return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart);
}
inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
@ -204,6 +214,21 @@ public:
}
bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; }
inline Scalar& coeffRef(Index row, Index col)
{
return m_matrix.const_cast_derived().coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
}
inline const Scalar coeff(Index row, Index col) const
{
return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
}
inline const Scalar coeff(Index index) const
{
return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart);
}
const Scalar& lastCoeff() const
{

View File

@ -222,24 +222,18 @@ class SparseMatrix
* The non zero coefficient must \b not already exist.
*
* If the matrix \c *this is in compressed mode, then \c *this is turned into uncompressed
* mode while reserving room for 2 non zeros per inner vector. It is strongly recommended to first
* call reserve(const SizesType &) to reserve a more appropriate number of elements per
* inner vector that better match your scenario.
* mode while reserving room for 2 x this->innerSize() non zeros if reserve(Index) has not been called earlier.
* In this case, the insertion procedure is optimized for a \e sequential insertion mode where elements are assumed to be
* inserted by increasing outer-indices.
*
* If that's not the case, then it is strongly recommended to either use a triplet-list to assemble the matrix, or to first
* call reserve(const SizesType &) to reserve the appropriate number of non-zero elements per inner vector.
*
* This function performs a sorted insertion in O(1) if the elements of each inner vector are
* inserted in increasing inner index order, and in O(nnz_j) for a random insertion.
* Assuming memory has been appropriately reserved, this function performs a sorted insertion in O(1)
* if the elements of each inner vector are inserted in increasing inner index order, and in O(nnz_j) for a random insertion.
*
*/
Scalar& insert(Index row, Index col)
{
eigen_assert(row>=0 && row<rows() && col>=0 && col<cols());
if(isCompressed())
{
reserve(IndexVector::Constant(outerSize(), 2));
}
return insertUncompressed(row,col);
}
Scalar& insert(Index row, Index col);
public:
@ -1076,6 +1070,118 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Opt
}
}
template<typename _Scalar, int _Options, typename _Index>
typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insert(Index row, Index col)
{
eigen_assert(row>=0 && row<rows() && col>=0 && col<cols());
const Index outer = IsRowMajor ? row : col;
const Index inner = IsRowMajor ? col : row;
if(isCompressed())
{
if(nonZeros()==0)
{
// reserve space if not already done
if(m_data.allocatedSize()==0)
m_data.reserve(2*m_innerSize);
// turn the matrix into non-compressed mode
m_innerNonZeros = static_cast<StorageIndex*>(std::malloc(m_outerSize * sizeof(StorageIndex)));
if(!m_innerNonZeros) internal::throw_std_bad_alloc();
memset(m_innerNonZeros, 0, (m_outerSize)*sizeof(StorageIndex));
// pack all inner-vectors to the end of the pre-allocated space
// and allocate the entire free-space to the first inner-vector
StorageIndex end = convert_index(m_data.allocatedSize());
for(Index j=1; j<=m_outerSize; ++j)
m_outerIndex[j] = end;
}
}
// check whether we can do a fast "push back" insertion
Index data_end = m_data.allocatedSize();
// First case: we are filling a new inner vector which is packed at the end.
// We assume that all remaining inner-vectors are also empty and packed to the end.
if(m_outerIndex[outer]==data_end)
{
eigen_internal_assert(m_innerNonZeros[outer]==0);
// pack previous empty inner-vectors to end of the used-space
// and allocate the entire free-space to the current inner-vector.
StorageIndex p = convert_index(m_data.size());
Index j = outer;
while(j>=0 && m_innerNonZeros[j]==0)
m_outerIndex[j--] = p;
// push back the new element
++m_innerNonZeros[outer];
m_data.append(Scalar(0), inner);
// check for reallocation
if(data_end != m_data.allocatedSize())
{
// m_data has been reallocated
// -> move remaining inner-vectors back to the end of the free-space
// so that the entire free-space is allocated to the current inner-vector.
eigen_internal_assert(data_end < m_data.allocatedSize());
StorageIndex new_end = convert_index(m_data.allocatedSize());
for(Index j=outer+1; j<=m_outerSize; ++j)
if(m_outerIndex[j]==data_end)
m_outerIndex[j] = new_end;
}
return m_data.value(p);
}
// Second case: the next inner-vector is packed to the end
// and the current inner-vector end match the used-space.
if(m_outerIndex[outer+1]==data_end && m_outerIndex[outer]+m_innerNonZeros[outer]==m_data.size())
{
eigen_internal_assert(outer+1==m_outerSize || m_innerNonZeros[outer+1]==0);
// add space for the new element
++m_innerNonZeros[outer];
m_data.resize(m_data.size()+1);
// check for reallocation
if(data_end != m_data.allocatedSize())
{
// m_data has been reallocated
// -> move remaining inner-vectors back to the end of the free-space
// so that the entire free-space is allocated to the current inner-vector.
eigen_internal_assert(data_end < m_data.allocatedSize());
StorageIndex new_end = convert_index(m_data.allocatedSize());
for(Index j=outer+1; j<=m_outerSize; ++j)
if(m_outerIndex[j]==data_end)
m_outerIndex[j] = new_end;
}
// and insert it at the right position (sorted insertion)
Index startId = m_outerIndex[outer];
Index p = m_outerIndex[outer]+m_innerNonZeros[outer]-1;
while ( (p > startId) && (m_data.index(p-1) > inner) )
{
m_data.index(p) = m_data.index(p-1);
m_data.value(p) = m_data.value(p-1);
--p;
}
m_data.index(p) = convert_index(inner);
return (m_data.value(p) = 0);
}
if(m_data.size() != m_data.allocatedSize())
{
// make sure the matrix is compatible to random un-compressed insertion:
m_data.resize(m_data.allocatedSize());
this->reserveInnerVectors(Array<StorageIndex,Dynamic,1>::Constant(2*m_outerSize, convert_index(m_outerSize)));
}
return insertUncompressed(row,col);
}
template<typename _Scalar, int _Options, typename _Index>
EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insertUncompressed(Index row, Index col)
{

View File

@ -97,7 +97,6 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
Transpose<const Derived>
>::type AdjointReturnType;
typedef Transpose<Derived> TransposeReturnType;
template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
// FIXME storage order do not match evaluator storage order
@ -300,9 +299,14 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
template<int Mode>
inline const TriangularView<const Derived, Mode> triangularView() const;
template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SparseSelfAdjointView<Derived, UpLo> Type; };
template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SparseSelfAdjointView<const Derived, UpLo> Type; };
template<unsigned int UpLo> inline const SparseSelfAdjointView<const Derived, UpLo> selfadjointView() const;
template<unsigned int UpLo> inline SparseSelfAdjointView<Derived, UpLo> selfadjointView();
template<unsigned int UpLo> inline
typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
template<unsigned int UpLo> inline
typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
template<typename OtherDerived> Scalar dot(const MatrixBase<OtherDerived>& other) const;
template<typename OtherDerived> Scalar dot(const SparseMatrixBase<OtherDerived>& other) const;

View File

@ -169,17 +169,17 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
***************************************************************************/
template<typename Derived>
template<unsigned int Mode>
const SparseSelfAdjointView<const Derived, Mode> SparseMatrixBase<Derived>::selfadjointView() const
template<unsigned int UpLo>
typename SparseMatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type SparseMatrixBase<Derived>::selfadjointView() const
{
return SparseSelfAdjointView<const Derived, Mode>(derived());
return SparseSelfAdjointView<const Derived, UpLo>(derived());
}
template<typename Derived>
template<unsigned int Mode>
SparseSelfAdjointView<Derived, Mode> SparseMatrixBase<Derived>::selfadjointView()
template<unsigned int UpLo>
typename SparseMatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type SparseMatrixBase<Derived>::selfadjointView()
{
return SparseSelfAdjointView<Derived, Mode>(derived());
return SparseSelfAdjointView<Derived, UpLo>(derived());
}
/***************************************************************************

View File

@ -1,3 +1,12 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include <iostream>
#include <cstdint>
#include <cstdlib>
@ -7,23 +16,76 @@
#include <string>
#include <cmath>
#include <cassert>
#include <cstring>
#include <memory>
#include <Eigen/Core>
using namespace std;
const int default_precision = 4;
// see --only-cubic-sizes
bool only_cubic_sizes = false;
// see --dump-tables
bool dump_tables = false;
uint8_t log2_pot(size_t x) {
size_t l = 0;
while (x >>= 1) l++;
return l;
}
uint16_t compact_size_triple(size_t k, size_t m, size_t n)
{
return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
}
// just a helper to store a triple of K,M,N sizes for matrix product
struct size_triple_t
{
uint16_t k, m, n;
size_triple_t() : k(0), m(0), n(0) {}
size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
size_triple_t(uint16_t compact)
{
k = 1 << ((compact & 0xf00) >> 8);
m = 1 << ((compact & 0x0f0) >> 4);
n = 1 << ((compact & 0x00f) >> 0);
}
bool is_cubic() const { return k == m && m == n; }
};
ostream& operator<<(ostream& s, const size_triple_t& t)
{
return s << "(" << t.k << ", " << t.m << ", " << t.n << ")";
}
struct inputfile_entry_t
{
uint16_t product_size;
uint16_t block_size;
uint16_t pot_block_size;
size_triple_t nonpot_block_size;
float gflops;
};
struct inputfile_t
{
enum class type_t {
unknown,
all_pot_sizes,
default_sizes
};
string filename;
vector<inputfile_entry_t> entries;
type_t type;
inputfile_t(const string& fname)
: filename(fname)
, type(type_t::unknown)
{
ifstream stream(filename);
if (!stream.is_open()) {
@ -31,52 +93,102 @@ struct inputfile_t
exit(1);
}
string line;
bool is_in_measurements = false;
while (getline(stream, line)) {
if (line.empty()) continue;
if (line.find("BEGIN MEASUREMENTS") == 0) {
is_in_measurements = true;
if (line.find("BEGIN MEASUREMENTS ALL POT SIZES") == 0) {
if (type != type_t::unknown) {
cerr << "Input file " << filename << " contains redundant BEGIN MEASUREMENTS lines";
exit(1);
}
type = type_t::all_pot_sizes;
continue;
}
if (!is_in_measurements) {
if (line.find("BEGIN MEASUREMENTS DEFAULT SIZES") == 0) {
if (type != type_t::unknown) {
cerr << "Input file " << filename << " contains redundant BEGIN MEASUREMENTS lines";
exit(1);
}
type = type_t::default_sizes;
continue;
}
unsigned int product_size, block_size;
float gflops;
int sscanf_result =
sscanf(line.c_str(), "%x %x %f",
&product_size,
&block_size,
&gflops);
if (3 != sscanf_result ||
!product_size ||
product_size > 0xfff ||
!block_size ||
block_size > 0xfff ||
!isfinite(gflops))
{
cerr << "ill-formed input file: " << filename << endl;
cerr << "offending line:" << endl << line << endl;
exit(1);
if (type == type_t::unknown) {
continue;
}
switch(type) {
case type_t::all_pot_sizes: {
unsigned int product_size, block_size;
float gflops;
int sscanf_result =
sscanf(line.c_str(), "%x %x %f",
&product_size,
&block_size,
&gflops);
if (3 != sscanf_result ||
!product_size ||
product_size > 0xfff ||
!block_size ||
block_size > 0xfff ||
!isfinite(gflops))
{
cerr << "ill-formed input file: " << filename << endl;
cerr << "offending line:" << endl << line << endl;
exit(1);
}
if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) {
continue;
}
inputfile_entry_t entry;
entry.product_size = uint16_t(product_size);
entry.pot_block_size = uint16_t(block_size);
entry.gflops = gflops;
entries.push_back(entry);
break;
}
case type_t::default_sizes: {
unsigned int product_size;
float gflops;
int bk, bm, bn;
int sscanf_result =
sscanf(line.c_str(), "%x default(%d, %d, %d) %f",
&product_size,
&bk, &bm, &bn,
&gflops);
if (5 != sscanf_result ||
!product_size ||
product_size > 0xfff ||
!isfinite(gflops))
{
cerr << "ill-formed input file: " << filename << endl;
cerr << "offending line:" << endl << line << endl;
exit(1);
}
if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) {
continue;
}
inputfile_entry_t entry;
entry.product_size = uint16_t(product_size);
entry.pot_block_size = 0;
entry.nonpot_block_size = size_triple_t(bk, bm, bn);
entry.gflops = gflops;
entries.push_back(entry);
break;
}
default:
break;
}
inputfile_entry_t entry;
entry.product_size = uint16_t(product_size);
entry.block_size = uint16_t(block_size);
entry.gflops = gflops;
entries.push_back(entry);
}
stream.close();
if (!is_in_measurements) {
cerr << "Input file " << filename << " didn't contain a BEGIN MEASUREMENTS line. Wrong file?" << endl;
if (type == type_t::unknown) {
cerr << "Unrecognized input file " << filename << endl;
exit(1);
}
if (entries.empty()) {
cerr << "didn't find any measurements in input file: " << filename << endl;
exit(1);
}
//cerr << "read " << entries.size() << " measurements from " << filename << endl;
}
};
@ -88,6 +200,11 @@ struct preprocessed_inputfile_entry_t
float efficiency;
};
bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2)
{
return e1.efficiency < e2.efficiency;
}
struct preprocessed_inputfile_t
{
string filename;
@ -96,6 +213,9 @@ struct preprocessed_inputfile_t
preprocessed_inputfile_t(const inputfile_t& inputfile)
: filename(inputfile.filename)
{
if (inputfile.type != inputfile_t::type_t::all_pot_sizes) {
abort();
}
auto it = inputfile.entries.begin();
auto it_first_with_given_product_size = it;
while (it != inputfile.entries.end()) {
@ -127,7 +247,7 @@ private:
for (auto it = begin; it != end; ++it) {
preprocessed_inputfile_entry_t entry;
entry.product_size = it->product_size;
entry.block_size = it->block_size;
entry.block_size = it->pot_block_size;
entry.efficiency = it->gflops / max_gflops;
entries.push_back(entry);
}
@ -201,14 +321,82 @@ float efficiency_of_subset(
efficiency_this_product_size = max(efficiency_this_product_size, efficiency_this_entry);
}
efficiency = min(efficiency, efficiency_this_product_size);
first_entry_index_with_this_product_size = entry_index;
product_size = first_file.entries[entry_index].product_size;
if (entry_index < num_entries) {
first_entry_index_with_this_product_size = entry_index;
product_size = first_file.entries[entry_index].product_size;
}
}
}
return efficiency;
}
void dump_table_for_subset(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
const vector<size_t>& subset)
{
const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]];
const size_t num_entries = first_file.entries.size();
size_t entry_index = 0;
size_t first_entry_index_with_this_product_size = 0;
uint16_t product_size = first_file.entries[0].product_size;
size_t i = 0;
size_triple_t min_product_size(first_file.entries.front().product_size);
size_triple_t max_product_size(first_file.entries.back().product_size);
if (!min_product_size.is_cubic() || !max_product_size.is_cubic()) {
abort();
}
if (only_cubic_sizes) {
cerr << "Can't generate tables with --only-cubic-sizes." << endl;
abort();
}
cout << "struct LookupTable {" << endl;
cout << " static const size_t BaseSize = " << min_product_size.k << ";" << endl;
const size_t NumSizes = log2_pot(max_product_size.k / min_product_size.k) + 1;
const size_t TableSize = NumSizes * NumSizes * NumSizes;
cout << " static const size_t NumSizes = " << NumSizes << ";" << endl;
cout << " static const unsigned short* Data() {" << endl;
cout << " static const unsigned short data[" << TableSize << "] = {";
while (entry_index < num_entries) {
++entry_index;
if (entry_index == num_entries ||
first_file.entries[entry_index].product_size != product_size)
{
float best_efficiency_this_product_size = 0.0f;
uint16_t best_block_size_this_product_size = 0;
for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
float efficiency_this_entry = 1.0f;
for (auto i = subset.begin(); i != subset.end(); ++i) {
efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency);
}
if (efficiency_this_entry > best_efficiency_this_product_size) {
best_efficiency_this_product_size = efficiency_this_entry;
best_block_size_this_product_size = first_file.entries[e].block_size;
}
}
if ((i++) % NumSizes) {
cout << " ";
} else {
cout << endl << " ";
}
cout << "0x" << hex << best_block_size_this_product_size << dec;
if (entry_index < num_entries) {
cout << ",";
first_entry_index_with_this_product_size = entry_index;
product_size = first_file.entries[entry_index].product_size;
}
}
}
if (i != TableSize) {
cerr << endl << "Wrote " << i << " table entries, expected " << TableSize << endl;
abort();
}
cout << endl << " };" << endl;
cout << " return data;" << endl;
cout << " }" << endl;
cout << "};" << endl;
}
float efficiency_of_partition(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
const vector<vector<size_t>>& partition)
@ -390,67 +578,299 @@ void print_partition(
for (auto file = subset->begin(); file != subset->end(); ++file) {
cout << " " << preprocessed_inputfiles[*file].filename << endl;
}
if (dump_tables) {
cout << " Table:" << endl;
dump_table_for_subset(preprocessed_inputfiles, *subset);
}
}
cout << endl;
}
int main(int argc, char* argv[])
struct action_t
{
if (argc == 1) {
cerr << "usage: " << argv[0] << " [input files]" << endl;
cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl;
exit(1);
}
cout.precision(3);
cerr.precision(3);
vector<string> inputfilenames;
for (int i = 1; i < argc; i++) {
inputfilenames.emplace_back(argv[i]);
}
virtual const char* invokation_name() const { abort(); return nullptr; }
virtual void run(const vector<string>&) const { abort(); }
virtual ~action_t() {}
};
vector<preprocessed_inputfile_t> preprocessed_inputfiles;
for (auto it = inputfilenames.begin(); it != inputfilenames.end(); ++it) {
preprocessed_inputfiles.emplace_back(inputfile_t(*it));
}
check_all_files_in_same_exact_order(preprocessed_inputfiles);
float required_efficiency_to_beat = 0.0f;
vector<vector<vector<size_t>>> partitions;
cerr << "searching for partitions...\r" << flush;
while (true)
struct partition_action_t : action_t
{
virtual const char* invokation_name() const override { return "partition"; }
virtual void run(const vector<string>& input_filenames) const override
{
vector<vector<size_t>> partition;
find_partition_with_efficiency_higher_than(
preprocessed_inputfiles,
required_efficiency_to_beat,
partition);
float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size()
<< " subsets for " << 100.0f * actual_efficiency
<< " % efficiency"
<< " \r" << flush;
partitions.push_back(partition);
if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) {
break;
vector<preprocessed_inputfile_t> preprocessed_inputfiles;
if (input_filenames.empty()) {
cerr << "The " << invokation_name() << " action needs a list of input files." << endl;
exit(1);
}
required_efficiency_to_beat = actual_efficiency;
}
cerr << " " << endl;
while (true) {
bool repeat = false;
for (size_t i = 0; i < partitions.size() - 1; i++) {
if (partitions[i].size() >= partitions[i+1].size()) {
partitions.erase(partitions.begin() + i);
repeat = true;
for (auto it = input_filenames.begin(); it != input_filenames.end(); ++it) {
inputfile_t inputfile(*it);
switch (inputfile.type) {
case inputfile_t::type_t::all_pot_sizes:
preprocessed_inputfiles.emplace_back(inputfile);
break;
case inputfile_t::type_t::default_sizes:
cerr << "The " << invokation_name() << " action only uses measurements for all pot sizes, and "
<< "has no use for " << *it << " which contains measurements for default sizes." << endl;
exit(1);
break;
default:
cerr << "Unrecognized input file: " << *it << endl;
exit(1);
}
}
check_all_files_in_same_exact_order(preprocessed_inputfiles);
float required_efficiency_to_beat = 0.0f;
vector<vector<vector<size_t>>> partitions;
cerr << "searching for partitions...\r" << flush;
while (true)
{
vector<vector<size_t>> partition;
find_partition_with_efficiency_higher_than(
preprocessed_inputfiles,
required_efficiency_to_beat,
partition);
float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size()
<< " subsets for " << 100.0f * actual_efficiency
<< " % efficiency"
<< " \r" << flush;
partitions.push_back(partition);
if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) {
break;
}
required_efficiency_to_beat = actual_efficiency;
}
cerr << " " << endl;
while (true) {
bool repeat = false;
for (size_t i = 0; i < partitions.size() - 1; i++) {
if (partitions[i].size() >= partitions[i+1].size()) {
partitions.erase(partitions.begin() + i);
repeat = true;
break;
}
}
if (!repeat) {
break;
}
}
if (!repeat) {
break;
for (auto it = partitions.begin(); it != partitions.end(); ++it) {
print_partition(preprocessed_inputfiles, *it);
}
}
for (auto it = partitions.begin(); it != partitions.end(); ++it) {
print_partition(preprocessed_inputfiles, *it);
};
struct evaluate_defaults_action_t : action_t
{
struct results_entry_t {
uint16_t product_size;
size_triple_t default_block_size;
uint16_t best_pot_block_size;
float default_gflops;
float best_pot_gflops;
float default_efficiency;
};
friend ostream& operator<<(ostream& s, const results_entry_t& entry)
{
return s
<< "Product size " << size_triple_t(entry.product_size)
<< ": default block size " << entry.default_block_size
<< " -> " << entry.default_gflops
<< " GFlop/s = " << entry.default_efficiency * 100.0f << " %"
<< " of best POT block size " << size_triple_t(entry.best_pot_block_size)
<< " -> " << entry.best_pot_gflops
<< " GFlop/s" << dec;
}
static bool lower_efficiency(const results_entry_t& e1, const results_entry_t& e2) {
return e1.default_efficiency < e2.default_efficiency;
}
virtual const char* invokation_name() const override { return "evaluate-defaults"; }
void show_usage_and_exit() const
{
cerr << "usage: " << invokation_name() << " default-sizes-data all-pot-sizes-data" << endl;
cerr << "checks how well the performance with default sizes compares to the best "
<< "performance measured over all POT sizes." << endl;
exit(1);
}
virtual void run(const vector<string>& input_filenames) const override
{
if (input_filenames.size() != 2) {
show_usage_and_exit();
}
inputfile_t inputfile_default_sizes(input_filenames[0]);
inputfile_t inputfile_all_pot_sizes(input_filenames[1]);
if (inputfile_default_sizes.type != inputfile_t::type_t::default_sizes) {
cerr << inputfile_default_sizes.filename << " is not an input file with default sizes." << endl;
show_usage_and_exit();
}
if (inputfile_all_pot_sizes.type != inputfile_t::type_t::all_pot_sizes) {
cerr << inputfile_all_pot_sizes.filename << " is not an input file with all POT sizes." << endl;
show_usage_and_exit();
}
vector<results_entry_t> results;
vector<results_entry_t> cubic_results;
uint16_t product_size = 0;
auto it_all_pot_sizes = inputfile_all_pot_sizes.entries.begin();
for (auto it_default_sizes = inputfile_default_sizes.entries.begin();
it_default_sizes != inputfile_default_sizes.entries.end();
++it_default_sizes)
{
if (it_default_sizes->product_size == product_size) {
continue;
}
product_size = it_default_sizes->product_size;
while (it_all_pot_sizes != inputfile_all_pot_sizes.entries.end() &&
it_all_pot_sizes->product_size != product_size)
{
++it_all_pot_sizes;
}
if (it_all_pot_sizes == inputfile_all_pot_sizes.entries.end()) {
break;
}
uint16_t best_pot_block_size = 0;
float best_pot_gflops = 0;
for (auto it = it_all_pot_sizes;
it != inputfile_all_pot_sizes.entries.end() && it->product_size == product_size;
++it)
{
if (it->gflops > best_pot_gflops) {
best_pot_gflops = it->gflops;
best_pot_block_size = it->pot_block_size;
}
}
results_entry_t entry;
entry.product_size = product_size;
entry.default_block_size = it_default_sizes->nonpot_block_size;
entry.best_pot_block_size = best_pot_block_size;
entry.default_gflops = it_default_sizes->gflops;
entry.best_pot_gflops = best_pot_gflops;
entry.default_efficiency = entry.default_gflops / entry.best_pot_gflops;
results.push_back(entry);
size_triple_t t(product_size);
if (t.k == t.m && t.m == t.n) {
cubic_results.push_back(entry);
}
}
cout << "All results:" << endl;
for (auto it = results.begin(); it != results.end(); ++it) {
cout << *it << endl;
}
cout << endl;
sort(results.begin(), results.end(), lower_efficiency);
const size_t n = min<size_t>(20, results.size());
cout << n << " worst results:" << endl;
for (size_t i = 0; i < n; i++) {
cout << results[i] << endl;
}
cout << endl;
cout << "cubic results:" << endl;
for (auto it = cubic_results.begin(); it != cubic_results.end(); ++it) {
cout << *it << endl;
}
cout << endl;
sort(cubic_results.begin(), cubic_results.end(), lower_efficiency);
cout.precision(2);
vector<float> a = {0.5f, 0.20f, 0.10f, 0.05f, 0.02f, 0.01f};
for (auto it = a.begin(); it != a.end(); ++it) {
size_t n = min(results.size() - 1, size_t(*it * results.size()));
cout << (100.0f * n / (results.size() - 1))
<< " % of product sizes have default efficiency <= "
<< 100.0f * results[n].default_efficiency << " %" << endl;
}
cout.precision(default_precision);
}
};
void show_usage_and_exit(int argc, char* argv[],
const vector<unique_ptr<action_t>>& available_actions)
{
cerr << "usage: " << argv[0] << " <action> [options...] <input files...>" << endl;
cerr << "available actions:" << endl;
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
cerr << " " << (*it)->invokation_name() << endl;
}
cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl;
exit(1);
}
int main(int argc, char* argv[])
{
cout.precision(default_precision);
cerr.precision(default_precision);
vector<unique_ptr<action_t>> available_actions;
available_actions.emplace_back(new partition_action_t);
available_actions.emplace_back(new evaluate_defaults_action_t);
vector<string> input_filenames;
action_t* action = nullptr;
if (argc < 2) {
show_usage_and_exit(argc, argv, available_actions);
}
for (int i = 1; i < argc; i++) {
bool arg_handled = false;
// Step 1. Try to match action invokation names.
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
if (!strcmp(argv[i], (*it)->invokation_name())) {
if (!action) {
action = it->get();
arg_handled = true;
break;
} else {
cerr << "can't specify more than one action!" << endl;
show_usage_and_exit(argc, argv, available_actions);
}
}
}
if (arg_handled) {
continue;
}
// Step 2. Try to match option names.
if (argv[i][0] == '-') {
if (!strcmp(argv[i], "--only-cubic-sizes")) {
only_cubic_sizes = true;
arg_handled = true;
}
if (!strcmp(argv[i], "--dump-tables")) {
dump_tables = true;
arg_handled = true;
}
if (!arg_handled) {
cerr << "Unrecognized option: " << argv[i] << endl;
show_usage_and_exit(argc, argv, available_actions);
}
}
if (arg_handled) {
continue;
}
// Step 3. Default to interpreting args as input filenames.
input_filenames.emplace_back(argv[i]);
}
if (dump_tables && only_cubic_sizes) {
cerr << "Incompatible options: --only-cubic-sizes and --dump-tables." << endl;
show_usage_and_exit(argc, argv, available_actions);
}
if (!action) {
show_usage_and_exit(argc, argv, available_actions);
}
action->run(input_filenames);
}

View File

@ -1,11 +1,23 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include <iostream>
#include <cstdint>
#include <cstdlib>
#include <vector>
#include <fstream>
#include <memory>
#include <cstdio>
bool eigen_use_specific_block_size;
int eigen_block_size_k, eigen_block_size_m, eigen_block_size_n;
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES eigen_use_specific_block_size
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K eigen_block_size_k
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M eigen_block_size_m
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N eigen_block_size_n
@ -31,11 +43,15 @@ const float min_accurate_time = 1e-2f;
// See --min-working-set-size command line parameter.
size_t min_working_set_size = 0;
float max_clock_speed = 0.0f;
// range of sizes that we will benchmark (in all 3 K,M,N dimensions)
const size_t maxsize = 2048;
const size_t minsize = 16;
typedef MatrixXf MatrixType;
typedef MatrixType::Scalar Scalar;
typedef internal::packet_traits<Scalar>::type Packet;
static_assert((maxsize & (maxsize - 1)) == 0, "maxsize must be a power of two");
static_assert((minsize & (minsize - 1)) == 0, "minsize must be a power of two");
@ -82,16 +98,26 @@ struct benchmark_t
{
uint16_t compact_product_size;
uint16_t compact_block_size;
bool use_default_block_size;
float gflops;
benchmark_t()
: compact_product_size(0)
, compact_block_size(0)
, use_default_block_size(false)
, gflops(0)
{}
{
}
benchmark_t(size_t pk, size_t pm, size_t pn,
size_t bk, size_t bm, size_t bn)
: compact_product_size(compact_size_triple(pk, pm, pn))
, compact_block_size(compact_size_triple(bk, bm, bn))
, use_default_block_size(false)
, gflops(0)
{}
benchmark_t(size_t pk, size_t pm, size_t pn)
: compact_product_size(compact_size_triple(pk, pm, pn))
, compact_block_size(0)
, use_default_block_size(true)
, gflops(0)
{}
@ -100,10 +126,15 @@ struct benchmark_t
ostream& operator<<(ostream& s, const benchmark_t& b)
{
s << hex;
s << b.compact_product_size
<< " " << b.compact_block_size;
s << dec;
s << hex << b.compact_product_size << dec;
if (b.use_default_block_size) {
size_triple_t t(b.compact_product_size);
Index k = t.k, m = t.m, n = t.n;
internal::computeProductBlockingSizes<Scalar, Scalar>(k, m, n);
s << " default(" << k << ", " << m << ", " << n << ")";
} else {
s << " " << hex << b.compact_block_size << dec;
}
s << " " << b.gflops;
return s;
}
@ -121,19 +152,23 @@ bool operator<(const benchmark_t& b1, const benchmark_t& b2)
void benchmark_t::run()
{
// expand our compact benchmark params into proper triples
size_triple_t productsizes(compact_product_size);
size_triple_t blocksizes(compact_block_size);
// feed eigen with our custom blocking params
eigen_block_size_k = blocksizes.k;
eigen_block_size_m = blocksizes.m;
eigen_block_size_n = blocksizes.n;
if (use_default_block_size) {
eigen_use_specific_block_size = false;
} else {
// feed eigen with our custom blocking params
eigen_use_specific_block_size = true;
size_triple_t blocksizes(compact_block_size);
eigen_block_size_k = blocksizes.k;
eigen_block_size_m = blocksizes.m;
eigen_block_size_n = blocksizes.n;
}
// set up the matrix pool
const size_t combined_three_matrices_sizes =
sizeof(MatrixType::Scalar) *
sizeof(Scalar) *
(productsizes.k * productsizes.m +
productsizes.k * productsizes.n +
productsizes.m * productsizes.n);
@ -168,7 +203,7 @@ void benchmark_t::run()
double starttime = timer.getCpuTime();
for (int i = 0; i < iters_at_a_time; i++) {
dst[matrix_index] = lhs[matrix_index] * rhs[matrix_index];
dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index];
matrix_index++;
if (matrix_index == matrix_pool_size) {
matrix_index = 0;
@ -231,9 +266,23 @@ string type_name<double>()
return "double";
}
void show_usage_and_exit(const char *progname)
struct action_t
{
cerr << "usage: " << progname << " [--min-working-set-size=N]" << endl << endl;
virtual const char* invokation_name() const { abort(); return nullptr; }
virtual void run() const { abort(); }
virtual ~action_t() {}
};
void show_usage_and_exit(int /*argc*/, char* argv[],
const vector<unique_ptr<action_t>>& available_actions)
{
cerr << "usage: " << argv[0] << " <action> [options...]" << endl << endl;
cerr << "available actions:" << endl << endl;
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
cerr << " " << (*it)->invokation_name() << endl;
}
cerr << endl;
cerr << "options:" << endl << endl;
cerr << " --min-working-set-size=N:" << endl;
cerr << " Set the minimum working set size to N bytes." << endl;
cerr << " This is rounded up as needed to a multiple of matrix size." << endl;
@ -244,93 +293,254 @@ void show_usage_and_exit(const char *progname)
cerr << " avoid warm caches." << endl;
exit(1);
}
int main(int argc, char* argv[])
float measure_clock_speed()
{
for (int i = 1; i < argc; i++) {
if (argv[i] == strstr(argv[i], "--min-working-set-size=")) {
const char* equals_sign = strchr(argv[i], '=');
min_working_set_size = strtoul(equals_sign+1, nullptr, 10);
} else {
cerr << "unrecognized option: " << argv[i] << endl << endl;
show_usage_and_exit(argv[0]);
}
cerr << "Measuring clock speed... \r" << flush;
vector<float> all_gflops;
for (int i = 0; i < 8; i++) {
benchmark_t b(1024, 1024, 1024);
b.run();
all_gflops.push_back(b.gflops);
}
cout.precision(4);
sort(all_gflops.begin(), all_gflops.end());
float stable_estimate = all_gflops[2] + all_gflops[3] + all_gflops[4] + all_gflops[5];
print_cpuinfo();
// multiply by an arbitrary constant to discourage trying doing anything with the
// returned values besides just comparing them with each other.
float result = stable_estimate * 123.456f;
cout << "benchmark parameters:" << endl;
cout << "pointer size: " << 8*sizeof(void*) << " bits" << endl;
cout << "scalar type: " << type_name<MatrixType::Scalar>() << endl;
cout << "packet size: " << internal::packet_traits<MatrixType::Scalar>::size << endl;
cout << "minsize = " << minsize << endl;
cout << "maxsize = " << maxsize << endl;
cout << "measurement_repetitions = " << measurement_repetitions << endl;
cout << "min_accurate_time = " << min_accurate_time << endl;
cout << "min_working_set_size = " << min_working_set_size;
if (min_working_set_size == 0) {
cout << " (try to outsize caches)";
return result;
}
struct human_duration_t
{
int seconds;
human_duration_t(int s) : seconds(s) {}
};
ostream& operator<<(ostream& s, const human_duration_t& d)
{
int remainder = d.seconds;
if (remainder > 3600) {
int hours = remainder / 3600;
s << hours << " h ";
remainder -= hours * 3600;
}
cout << endl << endl;
if (remainder > 60) {
int minutes = remainder / 60;
s << minutes << " min ";
remainder -= minutes * 60;
}
if (d.seconds < 600) {
s << remainder << " s";
}
return s;
}
const char session_filename[] = "/data/local/tmp/benchmark-blocking-sizes-session.data";
// assemble the array of benchmarks without running them at first
vector<benchmark_t> benchmarks;
for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
for (size_t msize = minsize; msize <= maxsize; msize *= 2) {
for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) {
for (size_t kblock = minsize; kblock <= ksize; kblock *= 2) {
for (size_t mblock = minsize; mblock <= msize; mblock *= 2) {
for (size_t nblock = minsize; nblock <= nsize; nblock *= 2) {
benchmark_t b(ksize, msize, nsize, kblock, mblock, nblock);
benchmarks.push_back(b);
}
}
void serialize_benchmarks(const char* filename, const vector<benchmark_t>& benchmarks, size_t first_benchmark_to_run)
{
FILE* file = fopen(filename, "w");
if (!file) {
cerr << "Could not open file " << filename << " for writing." << endl;
cerr << "Do you have write permissions on the current working directory?" << endl;
exit(1);
}
size_t benchmarks_vector_size = benchmarks.size();
fwrite(&max_clock_speed, sizeof(max_clock_speed), 1, file);
fwrite(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file);
fwrite(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file);
fwrite(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file);
fclose(file);
}
bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmarks, size_t& first_benchmark_to_run)
{
FILE* file = fopen(filename, "r");
if (!file) {
return false;
}
if (1 != fread(&max_clock_speed, sizeof(max_clock_speed), 1, file)) {
return false;
}
size_t benchmarks_vector_size = 0;
if (1 != fread(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file)) {
return false;
}
if (1 != fread(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file)) {
return false;
}
benchmarks.resize(benchmarks_vector_size);
if (benchmarks.size() != fread(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file)) {
return false;
}
unlink(filename);
return true;
}
void try_run_some_benchmarks(
vector<benchmark_t>& benchmarks,
double time_start,
size_t& first_benchmark_to_run)
{
if (first_benchmark_to_run == benchmarks.size()) {
return;
}
double time_last_progress_update = 0;
double time_last_clock_speed_measurement = 0;
double time_now = 0;
size_t benchmark_index = first_benchmark_to_run;
while (true) {
float ratio_done = float(benchmark_index) / benchmarks.size();
time_now = timer.getRealTime();
// We check clock speed every minute and at the end.
if (benchmark_index == benchmarks.size() ||
time_now > time_last_clock_speed_measurement + 60.0f)
{
time_last_clock_speed_measurement = time_now;
// Ensure that clock speed is as expected
float current_clock_speed = measure_clock_speed();
// The tolerance needs to be smaller than the relative difference between
// clock speeds that a device could operate under.
// It seems unlikely that a device would be throttling clock speeds by
// amounts smaller than 2%.
// With a value of 1%, I was getting within noise on a Sandy Bridge.
const float clock_speed_tolerance = 0.02f;
if (current_clock_speed > (1 + clock_speed_tolerance) * max_clock_speed) {
// Clock speed is now higher than we previously measured.
// Either our initial measurement was inaccurate, which won't happen
// too many times as we are keeping the best clock speed value and
// and allowing some tolerance; or something really weird happened,
// which invalidates all benchmark results collected so far.
// Either way, we better restart all over again now.
if (benchmark_index) {
cerr << "Restarting at " << 100.0f * ratio_done
<< " % because clock speed increased. " << endl;
}
max_clock_speed = current_clock_speed;
first_benchmark_to_run = 0;
return;
}
bool rerun_last_tests = false;
if (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
cerr << "Measurements completed so far: "
<< 100.0f * ratio_done
<< " % " << endl;
cerr << "Clock speed seems to be only "
<< current_clock_speed/max_clock_speed
<< " times what it used to be." << endl;
unsigned int seconds_to_sleep_if_lower_clock_speed = 1;
while (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
if (seconds_to_sleep_if_lower_clock_speed > 32) {
cerr << "Sleeping longer probably won't make a difference." << endl;
cerr << "Serializing benchmarks to " << session_filename << endl;
serialize_benchmarks(session_filename, benchmarks, first_benchmark_to_run);
cerr << "Now restart this benchmark, and it should pick up where we left." << endl;
exit(2);
}
rerun_last_tests = true;
cerr << "Sleeping "
<< seconds_to_sleep_if_lower_clock_speed
<< " s... \r" << endl;
sleep(seconds_to_sleep_if_lower_clock_speed);
current_clock_speed = measure_clock_speed();
seconds_to_sleep_if_lower_clock_speed *= 2;
}
}
if (rerun_last_tests) {
cerr << "Redoing the last "
<< 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size()
<< " % because clock speed had been low. " << endl;
return;
}
// nothing wrong with the clock speed so far, so there won't be a need to rerun
// benchmarks run so far in case we later encounter a lower clock speed.
first_benchmark_to_run = benchmark_index;
}
}
// randomly shuffling benchmarks allows us to get accurate enough progress info,
// as now the cheap/expensive benchmarks are randomly mixed so they average out.
random_shuffle(benchmarks.begin(), benchmarks.end());
if (benchmark_index == benchmarks.size()) {
// We're done!
first_benchmark_to_run = benchmarks.size();
// Erase progress info
cerr << " " << endl;
return;
}
// timings here are only used to display progress info.
// Whence the use of real time.
double time_start = timer.getRealTime();
double time_last_progress_update = time_start;
for (size_t i = 0; i < benchmarks.size(); i++) {
// Display progress info on stderr
double time_now = timer.getRealTime();
if (time_now > time_last_progress_update + 1.0f) {
time_last_progress_update = time_now;
float ratio_done = float(i) / benchmarks.size();
cerr.precision(3);
cerr << "Measurements... " << 100.0f * ratio_done
<< " %";
if (i > 10) {
cerr << ", ETA ";
float eta = float(time_now - time_start) * (1.0f - ratio_done) / ratio_done;
if (eta > 3600)
cerr << eta/3600 << " hours";
else if (eta > 60)
cerr << eta/60 << " minutes";
else cerr << eta << " seconds";
}
cerr << " \r" << flush;
<< " %, ETA "
<< human_duration_t(float(time_now - time_start) * (1.0f - ratio_done) / ratio_done)
<< " \r" << flush;
}
// This is where we actually run a benchmark!
benchmarks[i].run();
benchmarks[benchmark_index].run();
benchmark_index++;
}
}
void run_benchmarks(vector<benchmark_t>& benchmarks)
{
size_t first_benchmark_to_run;
vector<benchmark_t> deserialized_benchmarks;
bool use_deserialized_benchmarks = false;
if (deserialize_benchmarks(session_filename, deserialized_benchmarks, first_benchmark_to_run)) {
cerr << "Found serialized session with "
<< 100.0f * first_benchmark_to_run / deserialized_benchmarks.size()
<< " % already done" << endl;
if (deserialized_benchmarks.size() == benchmarks.size() &&
first_benchmark_to_run > 0 &&
first_benchmark_to_run < benchmarks.size())
{
use_deserialized_benchmarks = true;
}
}
// Erase progress info
cerr << " " << endl;
if (use_deserialized_benchmarks) {
benchmarks = deserialized_benchmarks;
} else {
// not using deserialized benchmarks, starting from scratch
first_benchmark_to_run = 0;
// Randomly shuffling benchmarks allows us to get accurate enough progress info,
// as now the cheap/expensive benchmarks are randomly mixed so they average out.
// It also means that if data is corrupted for some time span, the odds are that
// not all repetitions of a given benchmark will be corrupted.
random_shuffle(benchmarks.begin(), benchmarks.end());
}
for (int i = 0; i < 4; i++) {
max_clock_speed = max(max_clock_speed, measure_clock_speed());
}
double time_start = 0.0;
while (first_benchmark_to_run < benchmarks.size()) {
if (first_benchmark_to_run == 0) {
time_start = timer.getRealTime();
}
try_run_some_benchmarks(benchmarks,
time_start,
first_benchmark_to_run);
}
// Sort timings by increasing benchmark parameters, and decreasing gflops.
// The latter is very important. It means that we can ignore all but the first
@ -348,9 +558,120 @@ int main(int argc, char* argv[])
}
}
// Output data.
cout << "BEGIN MEASUREMENTS" << endl;
for (auto it = best_benchmarks.begin(); it != best_benchmarks.end(); ++it) {
cout << *it << endl;
}
// keep and return only the best benchmarks
benchmarks = best_benchmarks;
}
struct measure_all_pot_sizes_action_t : action_t
{
virtual const char* invokation_name() const { return "all-pot-sizes"; }
virtual void run() const
{
vector<benchmark_t> benchmarks;
for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
for (size_t msize = minsize; msize <= maxsize; msize *= 2) {
for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) {
for (size_t kblock = minsize; kblock <= ksize; kblock *= 2) {
for (size_t mblock = minsize; mblock <= msize; mblock *= 2) {
for (size_t nblock = minsize; nblock <= nsize; nblock *= 2) {
benchmarks.emplace_back(ksize, msize, nsize, kblock, mblock, nblock);
}
}
}
}
}
}
}
run_benchmarks(benchmarks);
cout << "BEGIN MEASUREMENTS ALL POT SIZES" << endl;
for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
cout << *it << endl;
}
}
};
struct measure_default_sizes_action_t : action_t
{
virtual const char* invokation_name() const { return "default-sizes"; }
virtual void run() const
{
vector<benchmark_t> benchmarks;
for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
for (size_t msize = minsize; msize <= maxsize; msize *= 2) {
for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) {
benchmarks.emplace_back(ksize, msize, nsize);
}
}
}
}
run_benchmarks(benchmarks);
cout << "BEGIN MEASUREMENTS DEFAULT SIZES" << endl;
for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
cout << *it << endl;
}
}
};
int main(int argc, char* argv[])
{
double time_start = timer.getRealTime();
cout.precision(4);
cerr.precision(4);
vector<unique_ptr<action_t>> available_actions;
available_actions.emplace_back(new measure_all_pot_sizes_action_t);
available_actions.emplace_back(new measure_default_sizes_action_t);
auto action = available_actions.end();
if (argc <= 1) {
show_usage_and_exit(argc, argv, available_actions);
}
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
if (!strcmp(argv[1], (*it)->invokation_name())) {
action = it;
break;
}
}
if (action == available_actions.end()) {
show_usage_and_exit(argc, argv, available_actions);
}
for (int i = 2; i < argc; i++) {
if (argv[i] == strstr(argv[i], "--min-working-set-size=")) {
const char* equals_sign = strchr(argv[i], '=');
min_working_set_size = strtoul(equals_sign+1, nullptr, 10);
} else {
cerr << "unrecognized option: " << argv[i] << endl << endl;
show_usage_and_exit(argc, argv, available_actions);
}
}
print_cpuinfo();
cout << "benchmark parameters:" << endl;
cout << "pointer size: " << 8*sizeof(void*) << " bits" << endl;
cout << "scalar type: " << type_name<Scalar>() << endl;
cout << "packet size: " << internal::packet_traits<MatrixType::Scalar>::size << endl;
cout << "minsize = " << minsize << endl;
cout << "maxsize = " << maxsize << endl;
cout << "measurement_repetitions = " << measurement_repetitions << endl;
cout << "min_accurate_time = " << min_accurate_time << endl;
cout << "min_working_set_size = " << min_working_set_size;
if (min_working_set_size == 0) {
cout << " (try to outsize caches)";
}
cout << endl << endl;
(*action)->run();
double time_end = timer.getRealTime();
cerr << "Finished in " << human_duration_t(time_end - time_start) << endl;
}

View File

@ -1,39 +1,45 @@
3.0.1
3.1.1
3.2.0
#3.0.1
#3.1.1
#3.2.0
3.2.4
5745:37f59e65eb6c
5891:d8652709345d
5893:24b4dc92c6d3
5895:997c2ef9fc8b
5904:e1eafd14eaa1
5908:f8ee3c721251
5921:ca808bb456b0
5927:8b1001f9e3ac
5937:5a4ca1ad8c53
5949:f3488f4e45b2
5969:e09031dccfd9
5992:4a429f5e0483
#5745:37f59e65eb6c
5891:d8652709345d # introduce AVX
#5893:24b4dc92c6d3 # merge
5895:997c2ef9fc8b # introduce FMA
#5904:e1eafd14eaa1 # complex and AVX
5908:f8ee3c721251 # improve packing with ptranspose
#5921:ca808bb456b0 # merge
#5927:8b1001f9e3ac
5937:5a4ca1ad8c53 # New gebp kernel handling up to 3 packets x 4 register-level blocks
#5949:f3488f4e45b2 # merge
#5969:e09031dccfd9 # Disable 3pX4 kernel on Altivec
#5992:4a429f5e0483 # merge
before-evaluators
6334:f6a45e5b8b7c
6639:c9121c60b5c7
6655:06f163b5221f
6677:700e023044e7 # FMA has been wrongly disabled
6681:11d31dafb0e3
6699:5e6e8e10aad1 # merge default to tensors
6726:ff2d2388e7b9 # merge default to tensors
6742:0cbd6195e829 # merge default to tensors
6747:853d2bafeb8f # Generalized the gebp apis
#6334:f6a45e5b8b7c # Implement evaluator for sparse outer products
#6639:c9121c60b5c7
#6655:06f163b5221f # Properly detect FMA support on ARM
#6677:700e023044e7 # FMA has been wrongly disabled
#6681:11d31dafb0e3
#6699:5e6e8e10aad1 # merge default to tensors
#6726:ff2d2388e7b9 # merge default to tensors
#6742:0cbd6195e829 # merge default to tensors
#6747:853d2bafeb8f # Generalized the gebp apis
6765:71584fd55762 # Made the blocking computation aware of the l3 cache; Also optimized the blocking parameters to take into account the number of threads used for a computation
6781:9cc5a931b2c6 # generalized gemv
6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product
6844:039efd86b75c # merge tensor
#6781:9cc5a931b2c6 # generalized gemv
#6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product
#6844:039efd86b75c # merge tensor
6845:7333ed40c6ef # change prefetching in gebp
6856:b5be5e10eb7f # merge index conversion
6893:c3a64aba7c70 # clean blocking size computation
6898:6fb31ebe6492 # rotating kernel for ARM
#6856:b5be5e10eb7f # merge index conversion
#6893:c3a64aba7c70 # clean blocking size computation
#6898:6fb31ebe6492 # rotating kernel for ARM
6899:877facace746 # rotating kernel for ARM only
6904:c250623ae9fa # result_of
#6904:c250623ae9fa # result_of
6921:915f1b1fc158 # fix prefetching change for ARM
6923:9ff25f6dacc6 # prefetching
6933:52572e60b5d3 # blocking size strategy
6933:52572e60b5d3 # blocking size strategy
6937:c8c042f286b2 # avoid redundant pack_rhs
6981:7e5d6f78da59 # dynamic loop swapping
6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache
6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1.
7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5)

View File

@ -6,6 +6,7 @@
# Options:
# -up : enforce the recomputation of existing data, and keep best results as a merging strategy
# -s : recompute selected changesets only and keep bests
if echo "$*" | grep '\-up' > /dev/null; then
@ -14,14 +15,30 @@ else
update=false
fi
if [ $update == true ]; then
if echo "$*" | grep '\-s' > /dev/null; then
selected=true
else
selected=false
fi
global_args="$*"
if [ $selected == true ]; then
echo "Recompute selected changesets only and keep bests"
elif [ $update == true ]; then
echo "(Re-)Compute all changesets and keep bests"
else
echo "Skip previously computed changesets"
fi
if [ ! -d "eigen_src" ]; then
hg clone https://bitbucket.org/eigen/eigen eigen_src
else
cd eigen_src
hg pull -u
cd ..
fi
if [ ! -z '$CXX' ]; then
@ -61,17 +78,31 @@ function test_current
scalar=$2
name=$3
prev=`grep $rev "$name.backup" | cut -c 14-`
prev=""
if [ -e "$name.backup" ]; then
prev=`grep $rev "$name.backup" | cut -c 14-`
fi
res=$prev
count_rev=`echo $prev | wc -w`
count_ref=`cat "settings.txt" | wc -l`
if [ $update == true ] || [ $count_rev != $count_ref ]; then
if echo "$global_args" | grep "$rev" > /dev/null; then
rev_found=true
else
rev_found=false
fi
# echo $update et $selected et $rev_found because $rev et "$global_args"
# echo $count_rev et $count_ref
if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then
if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src gemm.cpp -DSCALAR=$scalar -o $name; then
curr=`./$name`
echo merge $prev
echo with $curr
if [ $count_rev == $count_ref ]; then
echo "merge previous $prev"
echo "with new $curr"
else
echo "got $curr"
fi
res=`merge "$curr" "$prev"`
echo $res
# echo $res
echo "$rev $res" >> $name.out
else
echo "Compilation failed, skip rev $rev"
@ -86,12 +117,12 @@ make_backup $PREFIX"sgemm"
make_backup $PREFIX"dgemm"
make_backup $PREFIX"cgemm"
cut -f1 -d"#" < changesets.txt | while read rev
cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev
do
if [ ! -z '$rev' ]; then
echo "Testing rev $rev"
cd eigen_src
hg up -C $rev
hg up -C $rev > /dev/null
actual_rev=`hg identify | cut -f1 -d' '`
cd ..

View File

@ -1,5 +1,6 @@
8 8 8
9 9 9
24 24 24
239 239 239
240 240 240
2400 24 24
@ -8,4 +9,7 @@
24 2400 2400
2400 24 2400
2400 2400 24
2400 2400 64
4800 23 160
23 4800 160
2400 2400 2400

View File

@ -1,7 +1,7 @@
#include <stdio.h>
#if (defined __GNUC__) && (!defined __MINGW32__)
#if (defined __GNUC__) && (!defined __MINGW32__) && (!defined __CYGWIN__)
#define EIGEN_WEAK_LINKING __attribute__ ((weak))
#else
#define EIGEN_WEAK_LINKING

View File

@ -502,6 +502,10 @@ macro(ei_set_build_string)
set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-64bit)
endif()
if(EIGEN_TEST_CXX11)
set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-cxx11)
endif()
if(EIGEN_BUILD_STRING_SUFFIX)
set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-${EIGEN_BUILD_STRING_SUFFIX})
endif()

View File

@ -157,6 +157,67 @@ inline adouble abs2(const adouble& x) { return x*x; }
#endif // ADOLCSUPPORT_H
\endcode
This other example adds support for the \c mpq_class type from <a href="https://gmplib.org/">GMP</a>. It shows in particular how to change the way Eigen picks the best pivot during LU factorization. It selects the coefficient with the highest score, where the score is by default the absolute value of a number, but we can define a different score, for instance to prefer pivots with a more compact representation (this is an example, not a recommendation). Note that the scores should always be non-negative and only zero is allowed to have a score of zero. Also, this can interact badly with thresholds for inexact scalar types.
\code
#include <gmpxx.h>
#include <Eigen/Core>
#include <boost/operators.hpp>
namespace Eigen {
template<class> struct NumTraits;
template<> struct NumTraits<mpq_class>
{
typedef mpq_class Real;
typedef mpq_class NonInteger;
typedef mpq_class Nested;
static inline Real epsilon() { return 0; }
static inline Real dummy_precision() { return 0; }
enum {
IsInteger = 0,
IsSigned = 1,
IsComplex = 0,
RequireInitialization = 1,
ReadCost = 6,
AddCost = 150,
MulCost = 100
};
};
namespace internal {
template<>
struct significant_decimals_impl<mpq_class>
{
// Infinite precision when printing
static inline int run() { return 0; }
};
template<> struct scalar_score_coeff_op<mpq_class> {
struct result_type : boost::totally_ordered1<result_type> {
std::size_t len;
result_type(int i = 0) : len(i) {} // Eigen uses Score(0) and Score()
result_type(mpq_class const& q) :
len(mpz_size(q.get_num_mpz_t())+
mpz_size(q.get_den_mpz_t())-1) {}
friend bool operator<(result_type x, result_type y) {
// 0 is the worst possible pivot
if (x.len == 0) return y.len > 0;
if (y.len == 0) return false;
// Prefer a pivot with a small representation
return x.len > y.len;
}
friend bool operator==(result_type x, result_type y) {
// Only used to test if the score is 0
return x.len == y.len;
}
};
result_type operator()(mpq_class const& x) const { return x; }
};
}
}
\endcode
\sa \ref TopicPreprocessorDirectives

View File

@ -21,6 +21,9 @@ They are summarized in the following table:
<tr><td>ConjugateGradient</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>Classic iterative CG</td><td>SPD</td><td>Preconditionning</td>
<td>built-in, MPL2</td>
<td>Recommended for large symmetric problems (e.g., 3D Poisson eq.)</td></tr>
<tr><td>LSCG</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>CG for rectangular least-square problem</td><td>Rectangular</td><td>Preconditionning</td>
<td>built-in, MPL2</td>
<td>Solve for min |A'Ax-b|^2 without forming A'A</td></tr>
<tr><td>BiCGSTAB</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>Iterative stabilized bi-conjugate gradient</td><td>Square</td><td>Preconditionning</td>
<td>built-in, MPL2</td>
<td>To speedup the convergence, try it with the \ref IncompleteLUT preconditioner.</td></tr>

View File

@ -47,6 +47,18 @@ ei_add_failtest("sparse_ref_3")
ei_add_failtest("sparse_ref_4")
ei_add_failtest("sparse_ref_5")
ei_add_failtest("partialpivlu_int")
ei_add_failtest("fullpivlu_int")
ei_add_failtest("llt_int")
ei_add_failtest("ldlt_int")
ei_add_failtest("qr_int")
ei_add_failtest("colpivqr_int")
ei_add_failtest("fullpivqr_int")
ei_add_failtest("jacobisvd_int")
ei_add_failtest("bdcsvd_int")
ei_add_failtest("eigensolver_int")
ei_add_failtest("eigensolver_cplx")
if (EIGEN_FAILTEST_FAILURE_COUNT)
message(FATAL_ERROR
"${EIGEN_FAILTEST_FAILURE_COUNT} out of ${EIGEN_FAILTEST_COUNT} failtests FAILED. "

14
failtest/bdcsvd_int.cpp Normal file
View File

@ -0,0 +1,14 @@
#include "../Eigen/SVD"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
BDCSVD<Matrix<SCALAR,Dynamic,Dynamic> > qr(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

14
failtest/colpivqr_int.cpp Normal file
View File

@ -0,0 +1,14 @@
#include "../Eigen/QR"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
ColPivHouseholderQR<Matrix<SCALAR,Dynamic,Dynamic> > qr(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

View File

@ -0,0 +1,14 @@
#include "../Eigen/Eigenvalues"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR std::complex<double>
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
EigenSolver<Matrix<SCALAR,Dynamic,Dynamic> > eig(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

View File

@ -0,0 +1,14 @@
#include "../Eigen/Eigenvalues"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
EigenSolver<Matrix<SCALAR,Dynamic,Dynamic> > eig(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

View File

@ -0,0 +1,14 @@
#include "../Eigen/LU"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
FullPivLU<Matrix<SCALAR,Dynamic,Dynamic> > lu(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

View File

@ -0,0 +1,14 @@
#include "../Eigen/QR"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
FullPivHouseholderQR<Matrix<SCALAR,Dynamic,Dynamic> > qr(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

View File

@ -0,0 +1,14 @@
#include "../Eigen/SVD"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
JacobiSVD<Matrix<SCALAR,Dynamic,Dynamic> > qr(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

14
failtest/ldlt_int.cpp Normal file
View File

@ -0,0 +1,14 @@
#include "../Eigen/Cholesky"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
LDLT<Matrix<SCALAR,Dynamic,Dynamic> > ldlt(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

14
failtest/llt_int.cpp Normal file
View File

@ -0,0 +1,14 @@
#include "../Eigen/Cholesky"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
LLT<Matrix<SCALAR,Dynamic,Dynamic> > llt(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

View File

@ -0,0 +1,14 @@
#include "../Eigen/LU"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
PartialPivLU<Matrix<SCALAR,Dynamic,Dynamic> > lu(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

14
failtest/qr_int.cpp Normal file
View File

@ -0,0 +1,14 @@
#include "../Eigen/QR"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
HouseholderQR<Matrix<SCALAR,Dynamic,Dynamic> > qr(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

View File

@ -14,9 +14,9 @@ targets_to_make=`echo "$TESTSLIST" | egrep "$1" | xargs echo`
if [ -n "${EIGEN_MAKE_ARGS:+x}" ]
then
make $targets_to_make ${EIGEN_MAKE_ARGS}
@CMAKE_MAKE_PROGRAM@ $targets_to_make ${EIGEN_MAKE_ARGS}
else
make $targets_to_make
@CMAKE_MAKE_PROGRAM@ $targets_to_make @EIGEN_TEST_BUILD_FLAGS@
fi
exit $?

View File

@ -139,6 +139,7 @@ endif(TEST_LIB)
set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official")
add_custom_target(BuildOfficial)
ei_add_test(rand)
ei_add_test(meta)
ei_add_test(sizeof)
ei_add_test(dynalloc)
@ -226,6 +227,7 @@ ei_add_test(stdvector_overload)
ei_add_test(stdlist)
ei_add_test(stddeque)
ei_add_test(sparse_basic)
ei_add_test(sparse_block)
ei_add_test(sparse_vector)
ei_add_test(sparse_product)
ei_add_test(sparse_ref)
@ -234,6 +236,7 @@ ei_add_test(sparse_permutations)
ei_add_test(simplicial_cholesky)
ei_add_test(conjugate_gradient)
ei_add_test(bicgstab)
ei_add_test(lscg)
ei_add_test(sparselu)
ei_add_test(sparseqr)
ei_add_test(umeyama)

View File

@ -10,11 +10,11 @@
#include "sparse_solver.h"
#include <Eigen/IterativeLinearSolvers>
template<typename T> void test_bicgstab_T()
template<typename T, typename I> void test_bicgstab_T()
{
BiCGSTAB<SparseMatrix<T>, DiagonalPreconditioner<T> > bicgstab_colmajor_diag;
BiCGSTAB<SparseMatrix<T>, IdentityPreconditioner > bicgstab_colmajor_I;
BiCGSTAB<SparseMatrix<T>, IncompleteLUT<T> > bicgstab_colmajor_ilut;
BiCGSTAB<SparseMatrix<T,0,I>, DiagonalPreconditioner<T> > bicgstab_colmajor_diag;
BiCGSTAB<SparseMatrix<T,0,I>, IdentityPreconditioner > bicgstab_colmajor_I;
BiCGSTAB<SparseMatrix<T,0,I>, IncompleteLUT<T,I> > bicgstab_colmajor_ilut;
//BiCGSTAB<SparseMatrix<T>, SSORPreconditioner<T> > bicgstab_colmajor_ssor;
CALL_SUBTEST( check_sparse_square_solving(bicgstab_colmajor_diag) );
@ -25,6 +25,7 @@ template<typename T> void test_bicgstab_T()
void test_bicgstab()
{
CALL_SUBTEST_1(test_bicgstab_T<double>());
CALL_SUBTEST_2(test_bicgstab_T<std::complex<double> >());
CALL_SUBTEST_1((test_bicgstab_T<double,int>()) );
CALL_SUBTEST_2((test_bicgstab_T<std::complex<double>, int>()));
CALL_SUBTEST_3((test_bicgstab_T<double,long int>()));
}

View File

@ -10,13 +10,14 @@
#include "sparse_solver.h"
#include <Eigen/IterativeLinearSolvers>
template<typename T> void test_conjugate_gradient_T()
template<typename T, typename I> void test_conjugate_gradient_T()
{
ConjugateGradient<SparseMatrix<T>, Lower > cg_colmajor_lower_diag;
ConjugateGradient<SparseMatrix<T>, Upper > cg_colmajor_upper_diag;
ConjugateGradient<SparseMatrix<T>, Lower|Upper> cg_colmajor_loup_diag;
ConjugateGradient<SparseMatrix<T>, Lower, IdentityPreconditioner> cg_colmajor_lower_I;
ConjugateGradient<SparseMatrix<T>, Upper, IdentityPreconditioner> cg_colmajor_upper_I;
typedef SparseMatrix<T,0,I> SparseMatrixType;
ConjugateGradient<SparseMatrixType, Lower > cg_colmajor_lower_diag;
ConjugateGradient<SparseMatrixType, Upper > cg_colmajor_upper_diag;
ConjugateGradient<SparseMatrixType, Lower|Upper> cg_colmajor_loup_diag;
ConjugateGradient<SparseMatrixType, Lower, IdentityPreconditioner> cg_colmajor_lower_I;
ConjugateGradient<SparseMatrixType, Upper, IdentityPreconditioner> cg_colmajor_upper_I;
CALL_SUBTEST( check_sparse_spd_solving(cg_colmajor_lower_diag) );
CALL_SUBTEST( check_sparse_spd_solving(cg_colmajor_upper_diag) );
@ -27,6 +28,7 @@ template<typename T> void test_conjugate_gradient_T()
void test_conjugate_gradient()
{
CALL_SUBTEST_1(test_conjugate_gradient_T<double>());
CALL_SUBTEST_2(test_conjugate_gradient_T<std::complex<double> >());
CALL_SUBTEST_1(( test_conjugate_gradient_T<double,int>() ));
CALL_SUBTEST_2(( test_conjugate_gradient_T<std::complex<double>, int>() ));
CALL_SUBTEST_3(( test_conjugate_gradient_T<double,long int>() ));
}

29
test/lscg.cpp Normal file
View File

@ -0,0 +1,29 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "sparse_solver.h"
#include <Eigen/IterativeLinearSolvers>
template<typename T> void test_lscg_T()
{
LeastSquaresConjugateGradient<SparseMatrix<T> > lscg_colmajor_diag;
LeastSquaresConjugateGradient<SparseMatrix<T>, IdentityPreconditioner> lscg_colmajor_I;
CALL_SUBTEST( check_sparse_square_solving(lscg_colmajor_diag) );
CALL_SUBTEST( check_sparse_square_solving(lscg_colmajor_I) );
CALL_SUBTEST( check_sparse_leastsquare_solving(lscg_colmajor_diag) );
CALL_SUBTEST( check_sparse_leastsquare_solving(lscg_colmajor_I) );
}
void test_lscg()
{
CALL_SUBTEST_1(test_lscg_T<double>());
CALL_SUBTEST_2(test_lscg_T<std::complex<double> >());
}

View File

@ -42,13 +42,19 @@
#include <deque>
#include <queue>
#include <list>
#if __cplusplus >= 201103L
#include <random>
#ifdef EIGEN_USE_THREADS
#include <future>
#endif
#endif
// To test that all calls from Eigen code to std::min() and std::max() are
// protected by parenthesis against macro expansion, the min()/max() macros
// are defined here and any not-parenthesized min/max call will cause a
// compiler error.
//#define min(A,B) please_protect_your_min_with_parentheses
//#define max(A,B) please_protect_your_max_with_parentheses
#define min(A,B) please_protect_your_min_with_parentheses
#define max(A,B) please_protect_your_max_with_parentheses
#define FORBIDDEN_IDENTIFIER (this_identifier_is_forbidden_to_avoid_clashes) this_identifier_is_forbidden_to_avoid_clashes
// B0 is defined in POSIX header termios.h

View File

@ -109,8 +109,33 @@ void mat_mat_scalar_scalar_product()
double det = 6.0, wt = 0.5;
VERIFY_IS_APPROX(dNdxy.transpose()*dNdxy*det*wt, det*wt*dNdxy.transpose()*dNdxy);
}
template <typename MatrixType>
void zero_sized_objects(const MatrixType& m)
{
Index rows = m.rows();
Index cols = m.cols();
void zero_sized_objects()
{
MatrixType res, a(rows,0), b(0,cols);
VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(rows,cols) );
VERIFY_IS_APPROX( (res=a*a.transpose()), MatrixType::Zero(rows,rows) );
VERIFY_IS_APPROX( (res=b.transpose()*b), MatrixType::Zero(cols,cols) );
VERIFY_IS_APPROX( (res=b.transpose()*a.transpose()), MatrixType::Zero(cols,rows) );
}
{
MatrixType res, a(rows,cols), b(cols,0);
res = a*b;
VERIFY(res.rows()==rows && res.cols()==0);
b.resize(0,rows);
res = b*a;
VERIFY(res.rows()==0 && res.cols()==cols);
}
}
void bug_127()
{
// Bug 127
//
@ -171,7 +196,8 @@ void test_product_extra()
CALL_SUBTEST_2( mat_mat_scalar_scalar_product() );
CALL_SUBTEST_3( product_extra(MatrixXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
CALL_SUBTEST_4( product_extra(MatrixXcd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
CALL_SUBTEST_1( zero_sized_objects(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
}
CALL_SUBTEST_5( zero_sized_objects() );
CALL_SUBTEST_5( bug_127() );
CALL_SUBTEST_6( unaligned_objects() );
}

88
test/rand.cpp Normal file
View File

@ -0,0 +1,88 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
template<typename Scalar> Scalar check_in_range(Scalar x, Scalar y)
{
Scalar r = internal::random<Scalar>(x,y);
VERIFY(r>=x);
if(y>=x)
{
VERIFY(r<=y);
}
return r;
}
template<typename Scalar> void check_all_in_range(Scalar x, Scalar y)
{
Array<int,1,Dynamic> mask(y-x+1);
mask.fill(0);
long n = (y-x+1)*32;
for(long k=0; k<n; ++k)
{
mask( check_in_range(x,y)-x )++;
}
VERIFY( (mask>0).all() );
}
void test_rand()
{
long long_ref = NumTraits<long>::highest()/10;
char char_offset = (std::min)(g_repeat,64);
char short_offset = (std::min)(g_repeat,16000);
for(int i = 0; i < g_repeat*10; i++) {
CALL_SUBTEST(check_in_range<float>(10,11));
CALL_SUBTEST(check_in_range<float>(1.24234523,1.24234523));
CALL_SUBTEST(check_in_range<float>(-1,1));
CALL_SUBTEST(check_in_range<float>(-1432.2352,-1432.2352));
CALL_SUBTEST(check_in_range<double>(10,11));
CALL_SUBTEST(check_in_range<double>(1.24234523,1.24234523));
CALL_SUBTEST(check_in_range<double>(-1,1));
CALL_SUBTEST(check_in_range<double>(-1432.2352,-1432.2352));
CALL_SUBTEST(check_in_range<int>(0,-1));
CALL_SUBTEST(check_in_range<short>(0,-1));
CALL_SUBTEST(check_in_range<long>(0,-1));
CALL_SUBTEST(check_in_range<int>(-673456,673456));
CALL_SUBTEST(check_in_range<short>(-24345,24345));
CALL_SUBTEST(check_in_range<long>(-long_ref,long_ref));
}
CALL_SUBTEST(check_all_in_range<char>(11,11));
CALL_SUBTEST(check_all_in_range<char>(11,11+char_offset));
CALL_SUBTEST(check_all_in_range<char>(-5,5));
CALL_SUBTEST(check_all_in_range<char>(-11-char_offset,-11));
CALL_SUBTEST(check_all_in_range<char>(-126,-126+char_offset));
CALL_SUBTEST(check_all_in_range<char>(126-char_offset,126));
CALL_SUBTEST(check_all_in_range<char>(-126,126));
CALL_SUBTEST(check_all_in_range<short>(11,11));
CALL_SUBTEST(check_all_in_range<short>(11,11+short_offset));
CALL_SUBTEST(check_all_in_range<short>(-5,5));
CALL_SUBTEST(check_all_in_range<short>(-11-short_offset,-11));
CALL_SUBTEST(check_all_in_range<short>(-24345,-24345+short_offset));
CALL_SUBTEST(check_all_in_range<short>(24345,24345+short_offset));
CALL_SUBTEST(check_all_in_range<int>(11,11));
CALL_SUBTEST(check_all_in_range<int>(11,11+g_repeat));
CALL_SUBTEST(check_all_in_range<int>(-5,5));
CALL_SUBTEST(check_all_in_range<int>(-11-g_repeat,-11));
CALL_SUBTEST(check_all_in_range<int>(-673456,-673456+g_repeat));
CALL_SUBTEST(check_all_in_range<int>(673456,673456+g_repeat));
CALL_SUBTEST(check_all_in_range<long>(11,11));
CALL_SUBTEST(check_all_in_range<long>(11,11+g_repeat));
CALL_SUBTEST(check_all_in_range<long>(-5,5));
CALL_SUBTEST(check_all_in_range<long>(-11-g_repeat,-11));
CALL_SUBTEST(check_all_in_range<long>(-long_ref,-long_ref+g_repeat));
CALL_SUBTEST(check_all_in_range<long>( long_ref, long_ref+g_repeat));
}

View File

@ -228,6 +228,28 @@ void call_ref()
VERIFY_EVALUATION_COUNT( call_ref_7(c,c), 0);
}
typedef Matrix<double,Dynamic,Dynamic,RowMajor> RowMatrixXd;
int test_ref_overload_fun1(Ref<MatrixXd> ) { return 1; }
int test_ref_overload_fun1(Ref<RowMatrixXd> ) { return 2; }
int test_ref_overload_fun1(Ref<MatrixXf> ) { return 3; }
int test_ref_overload_fun2(Ref<const MatrixXd> ) { return 4; }
int test_ref_overload_fun2(Ref<const MatrixXf> ) { return 5; }
// See also bug 969
void test_ref_overloads()
{
MatrixXd Ad, Bd;
RowMatrixXd rAd, rBd;
VERIFY( test_ref_overload_fun1(Ad)==1 );
VERIFY( test_ref_overload_fun1(rAd)==2 );
MatrixXf Af, Bf;
VERIFY( test_ref_overload_fun2(Ad)==4 );
VERIFY( test_ref_overload_fun2(Ad+Bd)==4 );
VERIFY( test_ref_overload_fun2(Af+Bf)==5 );
}
void test_ref()
{
for(int i = 0; i < g_repeat; i++) {
@ -248,4 +270,6 @@ void test_ref()
CALL_SUBTEST_5( ref_matrix(MatrixXi(internal::random<int>(1,10),internal::random<int>(1,10))) );
CALL_SUBTEST_6( call_ref() );
}
CALL_SUBTEST_7( test_ref_overloads() );
}

View File

@ -9,16 +9,17 @@
#include "sparse_solver.h"
template<typename T> void test_simplicial_cholesky_T()
template<typename T, typename I> void test_simplicial_cholesky_T()
{
SimplicialCholesky<SparseMatrix<T>, Lower> chol_colmajor_lower_amd;
SimplicialCholesky<SparseMatrix<T>, Upper> chol_colmajor_upper_amd;
SimplicialLLT<SparseMatrix<T>, Lower> llt_colmajor_lower_amd;
SimplicialLLT<SparseMatrix<T>, Upper> llt_colmajor_upper_amd;
SimplicialLDLT<SparseMatrix<T>, Lower> ldlt_colmajor_lower_amd;
SimplicialLDLT<SparseMatrix<T>, Upper> ldlt_colmajor_upper_amd;
SimplicialLDLT<SparseMatrix<T>, Lower, NaturalOrdering<int> > ldlt_colmajor_lower_nat;
SimplicialLDLT<SparseMatrix<T>, Upper, NaturalOrdering<int> > ldlt_colmajor_upper_nat;
typedef SparseMatrix<T,0,I> SparseMatrixType;
SimplicialCholesky<SparseMatrixType, Lower> chol_colmajor_lower_amd;
SimplicialCholesky<SparseMatrixType, Upper> chol_colmajor_upper_amd;
SimplicialLLT< SparseMatrixType, Lower> llt_colmajor_lower_amd;
SimplicialLLT< SparseMatrixType, Upper> llt_colmajor_upper_amd;
SimplicialLDLT< SparseMatrixType, Lower> ldlt_colmajor_lower_amd;
SimplicialLDLT< SparseMatrixType, Upper> ldlt_colmajor_upper_amd;
SimplicialLDLT< SparseMatrixType, Lower, NaturalOrdering<I> > ldlt_colmajor_lower_nat;
SimplicialLDLT< SparseMatrixType, Upper, NaturalOrdering<I> > ldlt_colmajor_upper_nat;
check_sparse_spd_solving(chol_colmajor_lower_amd);
check_sparse_spd_solving(chol_colmajor_upper_amd);
@ -40,6 +41,7 @@ template<typename T> void test_simplicial_cholesky_T()
void test_simplicial_cholesky()
{
CALL_SUBTEST_1(test_simplicial_cholesky_T<double>());
CALL_SUBTEST_2(test_simplicial_cholesky_T<std::complex<double> >());
CALL_SUBTEST_1(( test_simplicial_cholesky_T<double,int>() ));
CALL_SUBTEST_2(( test_simplicial_cholesky_T<std::complex<double>, int>() ));
CALL_SUBTEST_3(( test_simplicial_cholesky_T<double,long int>() ));
}

View File

@ -53,15 +53,15 @@ enum {
* \param zeroCoords and nonzeroCoords allows to get the coordinate lists of the non zero,
* and zero coefficients respectively.
*/
template<typename Scalar,int Opt1,int Opt2,typename Index> void
template<typename Scalar,int Opt1,int Opt2,typename StorageIndex> void
initSparse(double density,
Matrix<Scalar,Dynamic,Dynamic,Opt1>& refMat,
SparseMatrix<Scalar,Opt2,Index>& sparseMat,
SparseMatrix<Scalar,Opt2,StorageIndex>& sparseMat,
int flags = 0,
std::vector<Matrix<Index,2,1> >* zeroCoords = 0,
std::vector<Matrix<Index,2,1> >* nonzeroCoords = 0)
std::vector<Matrix<StorageIndex,2,1> >* zeroCoords = 0,
std::vector<Matrix<StorageIndex,2,1> >* nonzeroCoords = 0)
{
enum { IsRowMajor = SparseMatrix<Scalar,Opt2,Index>::IsRowMajor };
enum { IsRowMajor = SparseMatrix<Scalar,Opt2,StorageIndex>::IsRowMajor };
sparseMat.setZero();
//sparseMat.reserve(int(refMat.rows()*refMat.cols()*density));
sparseMat.reserve(VectorXi::Constant(IsRowMajor ? refMat.rows() : refMat.cols(), int((1.5*density)*(IsRowMajor?refMat.cols():refMat.rows()))));
@ -93,11 +93,11 @@ initSparse(double density,
//sparseMat.insertBackByOuterInner(j,i) = v;
sparseMat.insertByOuterInner(j,i) = v;
if (nonzeroCoords)
nonzeroCoords->push_back(Matrix<Index,2,1> (ai,aj));
nonzeroCoords->push_back(Matrix<StorageIndex,2,1> (ai,aj));
}
else if (zeroCoords)
{
zeroCoords->push_back(Matrix<Index,2,1> (ai,aj));
zeroCoords->push_back(Matrix<StorageIndex,2,1> (ai,aj));
}
refMat(ai,aj) = v;
}

View File

@ -9,6 +9,9 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
static long g_realloc_count = 0;
#define EIGEN_SPARSE_COMPRESSED_STORAGE_REALLOCATE_PLUGIN g_realloc_count++;
#include "sparse.h"
template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& ref)
@ -55,48 +58,6 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
VERIFY_IS_APPROX(m, refMat);
// test InnerIterators and Block expressions
for (Index t=0; t<10; ++t)
{
Index j = internal::random<Index>(0,cols-1);
Index i = internal::random<Index>(0,rows-1);
Index w = internal::random<Index>(1,cols-j-1);
Index h = internal::random<Index>(1,rows-i-1);
VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w));
for(Index c=0; c<w; c++)
{
VERIFY_IS_APPROX(m.block(i,j,h,w).col(c), refMat.block(i,j,h,w).col(c));
for(Index r=0; r<h; r++)
{
// FIXME col().coeff() not implemented yet
// VERIFY_IS_APPROX(m.block(i,j,h,w).col(c).coeff(r), refMat.block(i,j,h,w).col(c).coeff(r));
}
}
for(Index r=0; r<h; r++)
{
VERIFY_IS_APPROX(m.block(i,j,h,w).row(r), refMat.block(i,j,h,w).row(r));
for(Index c=0; c<w; c++)
{
// FIXME row().coeff() not implemented yet
// VERIFY_IS_APPROX(m.block(i,j,h,w).row(r).coeff(c), refMat.block(i,j,h,w).row(r).coeff(c));
}
}
}
for(Index c=0; c<cols; c++)
{
VERIFY_IS_APPROX(m.col(c) + m.col(c), (m + m).col(c));
VERIFY_IS_APPROX(m.col(c) + m.col(c), refMat.col(c) + refMat.col(c));
}
for(Index r=0; r<rows; r++)
{
VERIFY_IS_APPROX(m.row(r) + m.row(r), (m + m).row(r));
VERIFY_IS_APPROX(m.row(r) + m.row(r), refMat.row(r) + refMat.row(r));
}
// test assertion
VERIFY_RAISES_ASSERT( m.coeffRef(-1,1) = 0 );
VERIFY_RAISES_ASSERT( m.coeffRef(0,m.cols()) = 0 );
@ -107,17 +68,31 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
DenseMatrix m1(rows,cols);
m1.setZero();
SparseMatrixType m2(rows,cols);
if(internal::random<int>()%2)
m2.reserve(VectorXi::Constant(m2.outerSize(), 2));
bool call_reserve = internal::random<int>()%2;
Index nnz = internal::random<int>(1,int(rows)/2);
if(call_reserve)
{
if(internal::random<int>()%2)
m2.reserve(VectorXi::Constant(m2.outerSize(), int(nnz)));
else
m2.reserve(m2.outerSize() * nnz);
}
g_realloc_count = 0;
for (Index j=0; j<cols; ++j)
{
for (Index k=0; k<rows/2; ++k)
for (Index k=0; k<nnz; ++k)
{
Index i = internal::random<Index>(0,rows-1);
if (m1.coeff(i,j)==Scalar(0))
m2.insert(i,j) = m1(i,j) = internal::random<Scalar>();
}
}
if(call_reserve && !SparseMatrixType::IsRowMajor)
{
VERIFY(g_realloc_count==0);
}
m2.finalize();
VERIFY_IS_APPROX(m2,m1);
}
@ -167,82 +142,6 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
VERIFY_IS_APPROX(m2,m1);
}
// test innerVector()
{
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
Index j0 = internal::random<Index>(0,outer-1);
Index j1 = internal::random<Index>(0,outer-1);
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.row(j0));
else
VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.col(j0));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.row(j0)+refMat2.row(j1));
else
VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.col(j0)+refMat2.col(j1));
SparseMatrixType m3(rows,cols);
m3.reserve(VectorXi::Constant(outer,int(inner/2)));
for(Index j=0; j<outer; ++j)
for(Index k=0; k<(std::min)(j,inner); ++k)
m3.insertByOuterInner(j,k) = k+1;
for(Index j=0; j<(std::min)(outer, inner); ++j)
{
VERIFY(j==numext::real(m3.innerVector(j).nonZeros()));
if(j>0)
VERIFY(j==numext::real(m3.innerVector(j).lastCoeff()));
}
m3.makeCompressed();
for(Index j=0; j<(std::min)(outer, inner); ++j)
{
VERIFY(j==numext::real(m3.innerVector(j).nonZeros()));
if(j>0)
VERIFY(j==numext::real(m3.innerVector(j).lastCoeff()));
}
VERIFY(m3.innerVector(j0).nonZeros() == m3.transpose().innerVector(j0).nonZeros());
// m2.innerVector(j0) = 2*m2.innerVector(j1);
// refMat2.col(j0) = 2*refMat2.col(j1);
// VERIFY_IS_APPROX(m2, refMat2);
}
// test innerVectors()
{
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
if(internal::random<float>(0,1)>0.5) m2.makeCompressed();
Index j0 = internal::random<Index>(0,outer-2);
Index j1 = internal::random<Index>(0,outer-2);
Index n0 = internal::random<Index>(1,outer-(std::max)(j0,j1));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(j0,0,n0,cols));
else
VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(0,j0,rows,n0));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0),
refMat2.middleRows(j0,n0)+refMat2.middleRows(j1,n0));
else
VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0),
refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0));
VERIFY_IS_APPROX(m2, refMat2);
VERIFY(m2.innerVectors(j0,n0).nonZeros() == m2.transpose().innerVectors(j0,n0).nonZeros());
m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0);
if(SparseMatrixType::IsRowMajor)
refMat2.middleRows(j0,n0) = (refMat2.middleRows(j0,n0) + refMat2.middleRows(j1,n0)).eval();
else
refMat2.middleCols(j0,n0) = (refMat2.middleCols(j0,n0) + refMat2.middleCols(j1,n0)).eval();
VERIFY_IS_APPROX(m2, refMat2);
}
// test basic computations
{
DenseMatrix refM1 = DenseMatrix::Zero(rows, cols);
@ -313,40 +212,6 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
VERIFY(m2.isApprox(m3));
}
// test generic blocks
{
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
Index j0 = internal::random<Index>(0,outer-2);
Index j1 = internal::random<Index>(0,outer-2);
Index n0 = internal::random<Index>(1,outer-(std::max)(j0,j1));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.block(j0,0,n0,cols), refMat2.block(j0,0,n0,cols));
else
VERIFY_IS_APPROX(m2.block(0,j0,rows,n0), refMat2.block(0,j0,rows,n0));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.block(j0,0,n0,cols)+m2.block(j1,0,n0,cols),
refMat2.block(j0,0,n0,cols)+refMat2.block(j1,0,n0,cols));
else
VERIFY_IS_APPROX(m2.block(0,j0,rows,n0)+m2.block(0,j1,rows,n0),
refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0));
Index i = internal::random<Index>(0,m2.outerSize()-1);
if(SparseMatrixType::IsRowMajor) {
m2.innerVector(i) = m2.innerVector(i) * s1;
refMat2.row(i) = refMat2.row(i) * s1;
VERIFY_IS_APPROX(m2,refMat2);
} else {
m2.innerVector(i) = m2.innerVector(i) * s1;
refMat2.col(i) = refMat2.col(i) * s1;
VERIFY_IS_APPROX(m2,refMat2);
}
}
// test prune
{
SparseMatrixType m2(rows, cols);
@ -575,7 +440,7 @@ void big_sparse_triplet(Index rows, Index cols, double density) {
void test_sparse_basic()
{
for(int i = 0; i < g_repeat; i++) {
int r = Eigen::internal::random<int>(1,100), c = Eigen::internal::random<int>(1,100);
int r = Eigen::internal::random<int>(1,200), c = Eigen::internal::random<int>(1,200);
if(Eigen::internal::random<int>(0,4) == 0) {
r = c; // check square matrices in 25% of tries
}
@ -585,11 +450,17 @@ void test_sparse_basic()
CALL_SUBTEST_2(( sparse_basic(SparseMatrix<std::complex<double>, ColMajor>(r, c)) ));
CALL_SUBTEST_2(( sparse_basic(SparseMatrix<std::complex<double>, RowMajor>(r, c)) ));
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double>(r, c)) ));
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,ColMajor,long int>(r, c)) ));
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,RowMajor,long int>(r, c)) ));
CALL_SUBTEST_5(( sparse_basic(SparseMatrix<double,ColMajor,long int>(r, c)) ));
CALL_SUBTEST_5(( sparse_basic(SparseMatrix<double,RowMajor,long int>(r, c)) ));
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,ColMajor,short int>(short(r), short(c))) ));
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,RowMajor,short int>(short(r), short(c))) ));
r = Eigen::internal::random<int>(1,100);
c = Eigen::internal::random<int>(1,100);
if(Eigen::internal::random<int>(0,4) == 0) {
r = c; // check square matrices in 25% of tries
}
CALL_SUBTEST_6(( sparse_basic(SparseMatrix<double,ColMajor,short int>(short(r), short(c))) ));
CALL_SUBTEST_6(( sparse_basic(SparseMatrix<double,RowMajor,short int>(short(r), short(c))) ));
}
// Regression test for bug 900: (manually insert higher values here, if you have enough RAM):

254
test/sparse_block.cpp Normal file
View File

@ -0,0 +1,254 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "sparse.h"
template<typename SparseMatrixType> void sparse_block(const SparseMatrixType& ref)
{
const Index rows = ref.rows();
const Index cols = ref.cols();
const Index inner = ref.innerSize();
const Index outer = ref.outerSize();
typedef typename SparseMatrixType::Scalar Scalar;
double density = (std::max)(8./(rows*cols), 0.01);
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
typedef Matrix<Scalar,Dynamic,1> DenseVector;
typedef Matrix<Scalar,1,Dynamic> RowDenseVector;
Scalar s1 = internal::random<Scalar>();
{
SparseMatrixType m(rows, cols);
DenseMatrix refMat = DenseMatrix::Zero(rows, cols);
initSparse<Scalar>(density, refMat, m);
VERIFY_IS_APPROX(m, refMat);
// test InnerIterators and Block expressions
for (int t=0; t<10; ++t)
{
Index j = internal::random<Index>(0,cols-2);
Index i = internal::random<Index>(0,rows-2);
Index w = internal::random<Index>(1,cols-j);
Index h = internal::random<Index>(1,rows-i);
VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w));
for(Index c=0; c<w; c++)
{
VERIFY_IS_APPROX(m.block(i,j,h,w).col(c), refMat.block(i,j,h,w).col(c));
for(Index r=0; r<h; r++)
{
VERIFY_IS_APPROX(m.block(i,j,h,w).col(c).coeff(r), refMat.block(i,j,h,w).col(c).coeff(r));
VERIFY_IS_APPROX(m.block(i,j,h,w).coeff(r,c), refMat.block(i,j,h,w).coeff(r,c));
}
}
for(Index r=0; r<h; r++)
{
VERIFY_IS_APPROX(m.block(i,j,h,w).row(r), refMat.block(i,j,h,w).row(r));
for(Index c=0; c<w; c++)
{
VERIFY_IS_APPROX(m.block(i,j,h,w).row(r).coeff(c), refMat.block(i,j,h,w).row(r).coeff(c));
VERIFY_IS_APPROX(m.block(i,j,h,w).coeff(r,c), refMat.block(i,j,h,w).coeff(r,c));
}
}
VERIFY_IS_APPROX(m.middleCols(j,w), refMat.middleCols(j,w));
VERIFY_IS_APPROX(m.middleRows(i,h), refMat.middleRows(i,h));
for(Index r=0; r<h; r++)
{
VERIFY_IS_APPROX(m.middleCols(j,w).row(r), refMat.middleCols(j,w).row(r));
VERIFY_IS_APPROX(m.middleRows(i,h).row(r), refMat.middleRows(i,h).row(r));
for(Index c=0; c<w; c++)
{
VERIFY_IS_APPROX(m.col(c).coeff(r), refMat.col(c).coeff(r));
VERIFY_IS_APPROX(m.row(r).coeff(c), refMat.row(r).coeff(c));
VERIFY_IS_APPROX(m.middleCols(j,w).coeff(r,c), refMat.middleCols(j,w).coeff(r,c));
VERIFY_IS_APPROX(m.middleRows(i,h).coeff(r,c), refMat.middleRows(i,h).coeff(r,c));
if(m.middleCols(j,w).coeff(r,c) != Scalar(0))
{
VERIFY_IS_APPROX(m.middleCols(j,w).coeffRef(r,c), refMat.middleCols(j,w).coeff(r,c));
}
if(m.middleRows(i,h).coeff(r,c) != Scalar(0))
{
VERIFY_IS_APPROX(m.middleRows(i,h).coeff(r,c), refMat.middleRows(i,h).coeff(r,c));
}
}
}
for(Index c=0; c<w; c++)
{
VERIFY_IS_APPROX(m.middleCols(j,w).col(c), refMat.middleCols(j,w).col(c));
VERIFY_IS_APPROX(m.middleRows(i,h).col(c), refMat.middleRows(i,h).col(c));
}
}
for(Index c=0; c<cols; c++)
{
VERIFY_IS_APPROX(m.col(c) + m.col(c), (m + m).col(c));
VERIFY_IS_APPROX(m.col(c) + m.col(c), refMat.col(c) + refMat.col(c));
}
for(Index r=0; r<rows; r++)
{
VERIFY_IS_APPROX(m.row(r) + m.row(r), (m + m).row(r));
VERIFY_IS_APPROX(m.row(r) + m.row(r), refMat.row(r) + refMat.row(r));
}
}
// test innerVector()
{
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
Index j0 = internal::random<Index>(0,outer-1);
Index j1 = internal::random<Index>(0,outer-1);
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.row(j0));
else
VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.col(j0));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.row(j0)+refMat2.row(j1));
else
VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.col(j0)+refMat2.col(j1));
SparseMatrixType m3(rows,cols);
m3.reserve(VectorXi::Constant(outer,int(inner/2)));
for(Index j=0; j<outer; ++j)
for(Index k=0; k<(std::min)(j,inner); ++k)
m3.insertByOuterInner(j,k) = k+1;
for(Index j=0; j<(std::min)(outer, inner); ++j)
{
VERIFY(j==numext::real(m3.innerVector(j).nonZeros()));
if(j>0)
VERIFY(j==numext::real(m3.innerVector(j).lastCoeff()));
}
m3.makeCompressed();
for(Index j=0; j<(std::min)(outer, inner); ++j)
{
VERIFY(j==numext::real(m3.innerVector(j).nonZeros()));
if(j>0)
VERIFY(j==numext::real(m3.innerVector(j).lastCoeff()));
}
VERIFY(m3.innerVector(j0).nonZeros() == m3.transpose().innerVector(j0).nonZeros());
// m2.innerVector(j0) = 2*m2.innerVector(j1);
// refMat2.col(j0) = 2*refMat2.col(j1);
// VERIFY_IS_APPROX(m2, refMat2);
}
// test innerVectors()
{
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
if(internal::random<float>(0,1)>0.5) m2.makeCompressed();
Index j0 = internal::random<Index>(0,outer-2);
Index j1 = internal::random<Index>(0,outer-2);
Index n0 = internal::random<Index>(1,outer-(std::max)(j0,j1));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(j0,0,n0,cols));
else
VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(0,j0,rows,n0));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0),
refMat2.middleRows(j0,n0)+refMat2.middleRows(j1,n0));
else
VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0),
refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0));
VERIFY_IS_APPROX(m2, refMat2);
VERIFY(m2.innerVectors(j0,n0).nonZeros() == m2.transpose().innerVectors(j0,n0).nonZeros());
m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0);
if(SparseMatrixType::IsRowMajor)
refMat2.middleRows(j0,n0) = (refMat2.middleRows(j0,n0) + refMat2.middleRows(j1,n0)).eval();
else
refMat2.middleCols(j0,n0) = (refMat2.middleCols(j0,n0) + refMat2.middleCols(j1,n0)).eval();
VERIFY_IS_APPROX(m2, refMat2);
}
// test generic blocks
{
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
Index j0 = internal::random<Index>(0,outer-2);
Index j1 = internal::random<Index>(0,outer-2);
Index n0 = internal::random<Index>(1,outer-(std::max)(j0,j1));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.block(j0,0,n0,cols), refMat2.block(j0,0,n0,cols));
else
VERIFY_IS_APPROX(m2.block(0,j0,rows,n0), refMat2.block(0,j0,rows,n0));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.block(j0,0,n0,cols)+m2.block(j1,0,n0,cols),
refMat2.block(j0,0,n0,cols)+refMat2.block(j1,0,n0,cols));
else
VERIFY_IS_APPROX(m2.block(0,j0,rows,n0)+m2.block(0,j1,rows,n0),
refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0));
Index i = internal::random<Index>(0,m2.outerSize()-1);
if(SparseMatrixType::IsRowMajor) {
m2.innerVector(i) = m2.innerVector(i) * s1;
refMat2.row(i) = refMat2.row(i) * s1;
VERIFY_IS_APPROX(m2,refMat2);
} else {
m2.innerVector(i) = m2.innerVector(i) * s1;
refMat2.col(i) = refMat2.col(i) * s1;
VERIFY_IS_APPROX(m2,refMat2);
}
Index r0 = internal::random<Index>(0,rows-2);
Index c0 = internal::random<Index>(0,cols-2);
Index r1 = internal::random<Index>(1,rows-r0);
Index c1 = internal::random<Index>(1,cols-c0);
VERIFY_IS_APPROX(DenseVector(m2.col(c0)), refMat2.col(c0));
VERIFY_IS_APPROX(m2.col(c0), refMat2.col(c0));
VERIFY_IS_APPROX(RowDenseVector(m2.row(r0)), refMat2.row(r0));
VERIFY_IS_APPROX(m2.row(r0), refMat2.row(r0));
VERIFY_IS_APPROX(m2.block(r0,c0,r1,c1), refMat2.block(r0,c0,r1,c1));
VERIFY_IS_APPROX((2*m2).block(r0,c0,r1,c1), (2*refMat2).block(r0,c0,r1,c1));
}
}
void test_sparse_block()
{
for(int i = 0; i < g_repeat; i++) {
int r = Eigen::internal::random<int>(1,200), c = Eigen::internal::random<int>(1,200);
if(Eigen::internal::random<int>(0,4) == 0) {
r = c; // check square matrices in 25% of tries
}
EIGEN_UNUSED_VARIABLE(r+c);
CALL_SUBTEST_1(( sparse_block(SparseMatrix<double>(1, 1)) ));
CALL_SUBTEST_1(( sparse_block(SparseMatrix<double>(8, 8)) ));
CALL_SUBTEST_1(( sparse_block(SparseMatrix<double>(r, c)) ));
CALL_SUBTEST_2(( sparse_block(SparseMatrix<std::complex<double>, ColMajor>(r, c)) ));
CALL_SUBTEST_2(( sparse_block(SparseMatrix<std::complex<double>, RowMajor>(r, c)) ));
CALL_SUBTEST_3(( sparse_block(SparseMatrix<double,ColMajor,long int>(r, c)) ));
CALL_SUBTEST_3(( sparse_block(SparseMatrix<double,RowMajor,long int>(r, c)) ));
r = Eigen::internal::random<int>(1,100);
c = Eigen::internal::random<int>(1,100);
if(Eigen::internal::random<int>(0,4) == 0) {
r = c; // check square matrices in 25% of tries
}
CALL_SUBTEST_4(( sparse_block(SparseMatrix<double,ColMajor,short int>(short(r), short(c))) ));
CALL_SUBTEST_4(( sparse_block(SparseMatrix<double,RowMajor,short int>(short(r), short(c))) ));
}
}

View File

@ -17,9 +17,9 @@ void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A,
typedef typename Mat::Scalar Scalar;
typedef typename Mat::StorageIndex StorageIndex;
DenseRhs refX = dA.lu().solve(db);
DenseRhs refX = dA.householderQr().solve(db);
{
Rhs x(b.rows(), b.cols());
Rhs x(A.cols(), b.cols());
Rhs oldb = b;
solver.compute(A);
@ -94,7 +94,7 @@ void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A,
// test dense Block as the result and rhs:
{
DenseRhs x(db.rows(), db.cols());
DenseRhs x(refX.rows(), refX.cols());
DenseRhs oldb(db);
x.setZero();
x.block(0,0,x.rows(),x.cols()) = solver.solve(db.block(0,0,db.rows(),db.cols()));
@ -119,7 +119,7 @@ void check_sparse_solving_real_cases(Solver& solver, const typename Solver::Matr
typedef typename Mat::Scalar Scalar;
typedef typename Mat::RealScalar RealScalar;
Rhs x(b.rows(), b.cols());
Rhs x(A.cols(), b.cols());
solver.compute(A);
if (solver.info() != Success)
@ -230,7 +230,7 @@ template<typename Solver> void check_sparse_spd_solving(Solver& solver)
{
typedef typename Solver::MatrixType Mat;
typedef typename Mat::Scalar Scalar;
typedef SparseMatrix<Scalar,ColMajor> SpMat;
typedef SparseMatrix<Scalar,ColMajor, typename Mat::StorageIndex> SpMat;
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
typedef Matrix<Scalar,Dynamic,1> DenseVector;
@ -304,12 +304,12 @@ template<typename Solver> void check_sparse_spd_determinant(Solver& solver)
}
template<typename Solver, typename DenseMat>
int generate_sparse_square_problem(Solver&, typename Solver::MatrixType& A, DenseMat& dA, int maxSize = 300, int options = ForceNonZeroDiag)
Index generate_sparse_square_problem(Solver&, typename Solver::MatrixType& A, DenseMat& dA, int maxSize = 300, int options = ForceNonZeroDiag)
{
typedef typename Solver::MatrixType Mat;
typedef typename Mat::Scalar Scalar;
int size = internal::random<int>(1,maxSize);
Index size = internal::random<int>(1,maxSize);
double density = (std::max)(8./(size*size), 0.01);
A.resize(size,size);
@ -324,7 +324,7 @@ template<typename Solver> void check_sparse_square_solving(Solver& solver)
{
typedef typename Solver::MatrixType Mat;
typedef typename Mat::Scalar Scalar;
typedef SparseMatrix<Scalar,ColMajor> SpMat;
typedef SparseMatrix<Scalar,ColMajor, typename Mat::StorageIndex> SpMat;
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
typedef Matrix<Scalar,Dynamic,1> DenseVector;
@ -333,7 +333,7 @@ template<typename Solver> void check_sparse_square_solving(Solver& solver)
Mat A;
DenseMatrix dA;
for (int i = 0; i < g_repeat; i++) {
int size = generate_sparse_square_problem(solver, A, dA);
Index size = generate_sparse_square_problem(solver, A, dA);
A.makeCompressed();
DenseVector b = DenseVector::Random(size);
@ -410,3 +410,53 @@ template<typename Solver> void check_sparse_square_abs_determinant(Solver& solve
}
}
template<typename Solver, typename DenseMat>
void generate_sparse_leastsquare_problem(Solver&, typename Solver::MatrixType& A, DenseMat& dA, int maxSize = 300, int options = ForceNonZeroDiag)
{
typedef typename Solver::MatrixType Mat;
typedef typename Mat::Scalar Scalar;
int rows = internal::random<int>(1,maxSize);
int cols = internal::random<int>(1,rows);
double density = (std::max)(8./(rows*cols), 0.01);
A.resize(rows,cols);
dA.resize(rows,cols);
initSparse<Scalar>(density, dA, A, options);
}
template<typename Solver> void check_sparse_leastsquare_solving(Solver& solver)
{
typedef typename Solver::MatrixType Mat;
typedef typename Mat::Scalar Scalar;
typedef SparseMatrix<Scalar,ColMajor, typename Mat::StorageIndex> SpMat;
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
typedef Matrix<Scalar,Dynamic,1> DenseVector;
int rhsCols = internal::random<int>(1,16);
Mat A;
DenseMatrix dA;
for (int i = 0; i < g_repeat; i++) {
generate_sparse_leastsquare_problem(solver, A, dA);
A.makeCompressed();
DenseVector b = DenseVector::Random(A.rows());
DenseMatrix dB(A.rows(),rhsCols);
SpMat B(A.rows(),rhsCols);
double density = (std::max)(8./(A.rows()*rhsCols), 0.1);
initSparse<Scalar>(density, dB, B, ForceNonZeroDiag);
B.makeCompressed();
check_sparse_solving(solver, A, b, dA, b);
check_sparse_solving(solver, A, dB, dA, dB);
check_sparse_solving(solver, A, B, dA, dB);
// check only once
if(i==0)
{
b = DenseVector::Zero(A.rows());
check_sparse_solving(solver, A, b, dA, b);
}
}
}

View File

@ -81,7 +81,7 @@ void construct_at_boundary(int boundary)
void unalignedassert()
{
#if EIGEN_ALIGN_STATICALLY
#if EIGEN_ALIGN_STATICALLY
construct_at_boundary<Vector2f>(4);
construct_at_boundary<Vector3f>(4);
construct_at_boundary<Vector4f>(16);
@ -100,7 +100,7 @@ void unalignedassert()
construct_at_boundary<Vector3cf>(4);
construct_at_boundary<Vector2cd>(EIGEN_ALIGN_BYTES);
construct_at_boundary<Vector3cd>(16);
#endif
#endif
check_unalignedassert_good<TestNew1>();
check_unalignedassert_good<TestNew2>();
@ -112,11 +112,12 @@ void unalignedassert()
check_unalignedassert_good<Depends<true> >();
#if EIGEN_ALIGN_STATICALLY
if(EIGEN_ALIGN_BYTES==16)
if(EIGEN_ALIGN_BYTES>=16)
{
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4f>(8));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2d>(8));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cf>(8));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4i>(8));
}
for(int b=8; b<EIGEN_ALIGN_BYTES; b+=8)
{

View File

@ -214,7 +214,7 @@ template<typename Scalar, bool Enable = internal::packet_traits<Scalar>::Vectori
>(DefaultTraversal,CompleteUnrolling)));
VERIFY((test_assign(Matrix11(), Matrix<Scalar,PacketSize,EIGEN_PLAIN_ENUM_MIN(2,PacketSize)>()*Matrix<Scalar,EIGEN_PLAIN_ENUM_MIN(2,PacketSize),PacketSize>(),
PacketSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD?DefaultTraversal:InnerVectorizedTraversal, CompleteUnrolling)));
InnerVectorizedTraversal, CompleteUnrolling)));
#endif
VERIFY(test_assign(MatrixXX(10,10),MatrixXX(20,20).block(10,10,2,3),

View File

@ -9,3 +9,4 @@ install(FILES
)
add_subdirectory(src)
add_subdirectory(CXX11)

View File

@ -0,0 +1,8 @@
set(Eigen_CXX11_HEADERS Core Tensor TensorSymmetry)
install(FILES
${Eigen_CXX11_HEADERS}
DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel
)
add_subdirectory(src)

View File

@ -0,0 +1,3 @@
add_subdirectory(Core)
add_subdirectory(Tensor)
add_subdirectory(TensorSymmetry)

View File

@ -0,0 +1 @@
add_subdirectory(util)

View File

@ -0,0 +1,6 @@
FILE(GLOB Eigen_CXX11_Core_util_SRCS "*.h")
INSTALL(FILES
${Eigen_CXX11_Core_util_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/Core/util COMPONENT Devel
)

View File

@ -0,0 +1,6 @@
FILE(GLOB Eigen_CXX11_Tensor_SRCS "*.h")
INSTALL(FILES
${Eigen_CXX11_Tensor_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/Tensor COMPONENT Devel
)

View File

@ -526,48 +526,101 @@ class TensorBase<Derived, WriteAccessors> : public TensorBase<Derived, ReadOnlyA
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorLayoutSwapOp<Derived>
const TensorLayoutSwapOp<const Derived>
swap_layout() const {
return TensorLayoutSwapOp<const Derived>(derived());
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorLayoutSwapOp<Derived>
swap_layout() {
return TensorLayoutSwapOp<Derived>(derived());
}
template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorConcatenationOp<const Axis, const Derived, const OtherDerived>
concatenate(const OtherDerived& other, const Axis& axis) const {
return TensorConcatenationOp<const Axis, const Derived, const OtherDerived>(derived(), other, axis);
}
template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorConcatenationOp<const Axis, Derived, OtherDerived>
concatenate(const OtherDerived& other, const Axis& axis) const {
return TensorConcatenationOp<const Axis, Derived, OtherDerived>(derived(), other.derived(), axis);
concatenate(const OtherDerived& other, const Axis& axis) {
return TensorConcatenationOp<const Axis, Derived, OtherDerived>(derived(), other, axis);
}
template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorReshapingOp<const NewDimensions, const Derived>
reshape(const NewDimensions& newDimensions) const {
return TensorReshapingOp<const NewDimensions, const Derived>(derived(), newDimensions);
}
template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorReshapingOp<const NewDimensions, Derived>
reshape(const NewDimensions& newDimensions) const {
reshape(const NewDimensions& newDimensions) {
return TensorReshapingOp<const NewDimensions, Derived>(derived(), newDimensions);
}
template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorSlicingOp<const StartIndices, const Sizes, const Derived>
slice(const StartIndices& startIndices, const Sizes& sizes) const {
return TensorSlicingOp<const StartIndices, const Sizes, const Derived>(derived(), startIndices, sizes);
}
template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorSlicingOp<const StartIndices, const Sizes, Derived>
slice(const StartIndices& startIndices, const Sizes& sizes) const {
slice(const StartIndices& startIndices, const Sizes& sizes) {
return TensorSlicingOp<const StartIndices, const Sizes, Derived>(derived(), startIndices, sizes);
}
template <DenseIndex DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorChippingOp<DimId, Derived>
const TensorChippingOp<DimId, const Derived>
chip(const Index offset) const {
return TensorChippingOp<DimId, const Derived>(derived(), offset, DimId);
}
template <Index DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorChippingOp<DimId, Derived>
chip(const Index offset) {
return TensorChippingOp<DimId, Derived>(derived(), offset, DimId);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorChippingOp<Dynamic, const Derived>
chip(const Index offset, const Index dim) const {
return TensorChippingOp<Dynamic, const Derived>(derived(), offset, dim);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorChippingOp<Dynamic, Derived>
chip(const Index offset, const Index dim) const {
chip(const Index offset, const Index dim) {
return TensorChippingOp<Dynamic, Derived>(derived(), offset, dim);
}
template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorReverseOp<const ReverseDimensions, const Derived>
reverse(const ReverseDimensions& rev) const {
return TensorReverseOp<const ReverseDimensions, const Derived>(derived(), rev);
}
template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorReverseOp<const ReverseDimensions, Derived>
reverse(const ReverseDimensions& rev) const {
reverse(const ReverseDimensions& rev) {
return TensorReverseOp<const ReverseDimensions, Derived>(derived(), rev);
}
template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorShufflingOp<const Shuffle, const Derived>
shuffle(const Shuffle& shuffle) const {
return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle);
}
template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorShufflingOp<const Shuffle, Derived>
shuffle(const Shuffle& shuffle) const {
shuffle(const Shuffle& shuffle) {
return TensorShufflingOp<const Shuffle, Derived>(derived(), shuffle);
}
template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorStridingOp<const Strides, const Derived>
stride(const Strides& strides) const {
return TensorStridingOp<const Strides, const Derived>(derived(), strides);
}
template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorStridingOp<const Strides, Derived>
stride(const Strides& strides) const {
stride(const Strides& strides) {
return TensorStridingOp<const Strides, Derived>(derived(), strides);
}

View File

@ -21,8 +21,7 @@ namespace Eigen {
* Example:
* C.device(EIGEN_GPU) = A + B;
*
* Todo: thread pools.
* Todo: operator +=, -=, *= and so on.
* Todo: operator *= and /=.
*/
template <typename ExpressionType, typename DeviceType> class TensorDevice {
@ -50,6 +49,18 @@ template <typename ExpressionType, typename DeviceType> class TensorDevice {
return *this;
}
template<typename OtherDerived>
EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) {
typedef typename OtherDerived::Scalar Scalar;
typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference;
Difference difference(m_expression, other);
typedef TensorAssignOp<ExpressionType, const Difference> Assign;
Assign assign(m_expression, difference);
static const bool Vectorize = TensorEvaluator<const Assign, DeviceType>::PacketAccess;
internal::TensorExecutor<const Assign, DeviceType, Vectorize>::run(assign, m_device);
return *this;
}
protected:
const DeviceType& m_device;
ExpressionType& m_expression;
@ -82,6 +93,18 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, ThreadPool
return *this;
}
template<typename OtherDerived>
EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) {
typedef typename OtherDerived::Scalar Scalar;
typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference;
Difference difference(m_expression, other);
typedef TensorAssignOp<ExpressionType, const Difference> Assign;
Assign assign(m_expression, difference);
static const bool Vectorize = TensorEvaluator<const Assign, ThreadPoolDevice>::PacketAccess;
internal::TensorExecutor<const Assign, ThreadPoolDevice, Vectorize>::run(assign, m_device);
return *this;
}
protected:
const ThreadPoolDevice& m_device;
ExpressionType& m_expression;
@ -114,6 +137,18 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, GpuDevice>
return *this;
}
template<typename OtherDerived>
EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) {
typedef typename OtherDerived::Scalar Scalar;
typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference;
Difference difference(m_expression, other);
typedef TensorAssignOp<ExpressionType, const Difference> Assign;
Assign assign(m_expression, difference);
static const bool Vectorize = TensorEvaluator<const Assign, GpuDevice>::PacketAccess;
internal::TensorExecutor<const Assign, GpuDevice, Vectorize>::run(assign, m_device);
return *this;
}
protected:
const GpuDevice& m_device;
ExpressionType m_expression;

View File

@ -77,7 +77,7 @@ template <typename T> struct MeanReducer
}
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * packet_traits<Packet>::size);
return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits<Packet>::size);
}
protected:

View File

@ -30,14 +30,14 @@ std::ostream& operator << (std::ostream& os, const TensorBase<T, ReadOnlyAccesso
typedef typename internal::remove_const<typename T::Scalar>::type Scalar;
typedef typename T::Index Index;
typedef typename TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice>::Dimensions Dimensions;
const Index total_size = internal::array_prod(tensor.dimensions());
const Index total_size = tensor.dimensions().TotalSize();
// Print the tensor as a 1d vector or a 2d matrix.
if (internal::array_size<Dimensions>::value == 1) {
Map<const Array<Scalar, Dynamic, 1> > array(const_cast<Scalar*>(tensor.data()), total_size);
os << array;
} else {
const Index first_dim = tensor.dimensions()[0];
const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions());
static const int layout = TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice>::Layout;
Map<const Array<Scalar, Dynamic, Dynamic, layout> > matrix(const_cast<Scalar*>(tensor.data()), first_dim, total_size/first_dim);
os << matrix;

View File

@ -65,7 +65,7 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_> >
static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
enum {
Options = Options_,
Flags = compute_tensor_flags<Scalar_, Options_>::ret | LvalueBit,
Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit),
};
};
@ -80,7 +80,7 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_> >
static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
enum {
Options = Options_,
Flags = compute_tensor_flags<Scalar_, Options_>::ret | LvalueBit,
Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit),
};
};
@ -97,7 +97,7 @@ struct traits<TensorMap<PlainObjectType, Options_> >
static const int Layout = BaseTraits::Layout;
enum {
Options = Options_,
Flags = ((BaseTraits::Flags | LvalueBit) & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0),
Flags = (BaseTraits::Flags & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0),
};
};
@ -113,7 +113,7 @@ struct traits<TensorRef<PlainObjectType> >
static const int Layout = BaseTraits::Layout;
enum {
Options = BaseTraits::Options,
Flags = ((BaseTraits::Flags | LvalueBit) & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0),
Flags = (BaseTraits::Flags & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0),
};
};

View File

@ -0,0 +1,8 @@
FILE(GLOB Eigen_CXX11_TensorSymmetry_SRCS "*.h")
INSTALL(FILES
${Eigen_CXX11_TensorSymmetry_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/TensorSymmetry COMPONENT Devel
)
add_subdirectory(util)

View File

@ -0,0 +1,6 @@
FILE(GLOB Eigen_CXX11_TensorSymmetry_util_SRCS "*.h")
INSTALL(FILES
${Eigen_CXX11_TensorSymmetry_util_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/TensorSymmetry/util COMPONENT Devel
)

View File

@ -141,20 +141,32 @@ int main()
public:
typedef mpfr::mpreal ResScalar;
enum {
Vectorizable = false,
LhsPacketSize = 1,
RhsPacketSize = 1,
ResPacketSize = 1,
NumberOfRegisters = 1,
nr = 1,
mr = 1,
LhsProgress = 1,
RhsProgress = 1
};
typedef ResScalar LhsPacket;
typedef ResScalar RhsPacket;
typedef ResScalar ResPacket;
};
template<typename Index, bool ConjugateLhs, bool ConjugateRhs>
struct gebp_kernel<mpfr::mpreal,mpfr::mpreal,Index,1,1,ConjugateLhs,ConjugateRhs>
template<typename Index, typename DataMapper, bool ConjugateLhs, bool ConjugateRhs>
struct gebp_kernel<mpfr::mpreal,mpfr::mpreal,Index,DataMapper,1,1,ConjugateLhs,ConjugateRhs>
{
typedef mpfr::mpreal mpreal;
EIGEN_DONT_INLINE
void operator()(mpreal* res, Index resStride, const mpreal* blockA, const mpreal* blockB, Index rows, Index depth, Index cols, mpreal alpha,
void operator()(const DataMapper& res, const mpreal* blockA, const mpreal* blockB,
Index rows, Index depth, Index cols, const mpreal& alpha,
Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0)
{
if(rows==0 || cols==0 || depth==0)
@ -170,8 +182,6 @@ int main()
{
for(Index j=0; j<cols; ++j)
{
mpreal *C1 = res + j*resStride;
const mpreal *A = blockA + i*strideA + offsetA;
const mpreal *B = blockB + j*strideB + offsetB;
@ -183,7 +193,7 @@ int main()
}
mpfr_mul(acc1.mpfr_ptr(), acc1.mpfr_srcptr(), alpha.mpfr_srcptr(), mpreal::get_default_rnd());
mpfr_add(C1[i].mpfr_ptr(), C1[i].mpfr_srcptr(), acc1.mpfr_srcptr(), mpreal::get_default_rnd());
mpfr_add(res(i,j).mpfr_ptr(), res(i,j).mpfr_srcptr(), acc1.mpfr_srcptr(), mpreal::get_default_rnd());
}
}
}

View File

@ -18,7 +18,7 @@ namespace Eigen {
namespace internal
{
template <typename Scalar>
inline bool GetMarketLine (std::stringstream& line, int& M, int& N, int& i, int& j, Scalar& value)
inline bool GetMarketLine (std::stringstream& line, Index& M, Index& N, Index& i, Index& j, Scalar& value)
{
line >> i >> j >> value;
i--;
@ -31,7 +31,7 @@ namespace internal
return false;
}
template <typename Scalar>
inline bool GetMarketLine (std::stringstream& line, int& M, int& N, int& i, int& j, std::complex<Scalar>& value)
inline bool GetMarketLine (std::stringstream& line, Index& M, Index& N, Index& i, Index& j, std::complex<Scalar>& value)
{
Scalar valR, valI;
line >> i >> j >> valR >> valI;

View File

@ -50,7 +50,7 @@ if(MPFR_FOUND)
include_directories(${MPFR_INCLUDES} ./mpreal)
ei_add_property(EIGEN_TESTED_BACKENDS "MPFR C++, ")
set(EIGEN_MPFR_TEST_LIBRARIES ${MPFR_LIBRARIES} ${GMP_LIBRARIES})
# ei_add_test(mpreal_support "" "${EIGEN_MPFR_TEST_LIBRARIES}" )
ei_add_test(mpreal_support "" "${EIGEN_MPFR_TEST_LIBRARIES}" )
else()
ei_add_property(EIGEN_MISSING_BACKENDS "MPFR C++, ")
endif()

View File

@ -54,7 +54,7 @@ static void test_equality()
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
if (random() < 0.5) {
if (internal::random<bool>()) {
mat2(i,j,k) = mat1(i,j,k);
}
}

Some files were not shown because too many files have changed in this diff Show More