Merged default into unary-array-cwise-functors

This commit is contained in:
Deanna Hood 2015-04-20 14:01:35 -04:00
commit 0250f4a9f2
102 changed files with 2595 additions and 946 deletions

View File

@ -168,6 +168,11 @@ if(NOT MSVC)
else()
ei_add_cxx_compiler_flag("-ansi")
endif()
if(ANDROID_NDK)
ei_add_cxx_compiler_flag("-pie")
ei_add_cxx_compiler_flag("-fPIE")
endif()
set(CMAKE_REQUIRED_FLAGS "")
@ -208,7 +213,7 @@ if(NOT MSVC)
endif()
option(EIGEN_TEST_FMA "Enable/Disable FMA in tests/examples" OFF)
if(EIGEN_TEST_FMA)
if(EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma")
message(STATUS "Enabling FMA in tests/examples")
endif()
@ -227,7 +232,12 @@ if(NOT MSVC)
option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
if(EIGEN_TEST_NEON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mfloat-abi=softfp")
if(EIGEN_TEST_FMA)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon-vfpv4")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp")
message(STATUS "Enabling NEON in tests/examples")
endif()

View File

@ -308,6 +308,7 @@ using std::ptrdiff_t;
#include "src/Core/arch/NEON/PacketMath.h"
#include "src/Core/arch/NEON/MathFunctions.h"
#include "src/Core/arch/NEON/Complex.h"
#include "src/Core/arch/NEON/BlockingSizesLookupTables.h"
#endif
#if defined EIGEN_VECTORIZE_CUDA
@ -381,6 +382,7 @@ using std::ptrdiff_t;
#include "src/Core/Inverse.h"
#include "src/Core/TriangularMatrix.h"
#include "src/Core/SelfAdjointView.h"
#include "src/Core/products/LookupBlockingSizesTable.h"
#include "src/Core/products/GeneralBlockPanelKernel.h"
#include "src/Core/products/Parallelizer.h"
#include "src/Core/ProductEvaluators.h"

View File

@ -17,7 +17,7 @@
*
* These iterative solvers are associated with some preconditioners:
* - IdentityPreconditioner - not really useful
* - DiagonalPreconditioner - also called JAcobi preconditioner, work very well on diagonal dominant matrices.
* - DiagonalPreconditioner - also called Jacobi preconditioner, work very well on diagonal dominant matrices.
* - IncompleteLUT - incomplete LU factorization with dual thresholding
*
* Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport.

View File

@ -226,6 +226,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
/** \internal
* Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.
@ -424,6 +429,8 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
template<typename MatrixType, int _UpLo>
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
{
check_template_parameters();
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();

View File

@ -170,6 +170,12 @@ template<typename _MatrixType, int _UpLo> class LLT
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
/** \internal
* Used to compute and store L
* The strict upper part is not used and even not initialized.
@ -377,6 +383,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
template<typename MatrixType, int _UpLo>
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
{
check_template_parameters();
eigen_assert(a.rows()==a.cols());
const Index size = a.rows();
m_matrix.resize(size, size);

View File

@ -647,11 +647,15 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
HasNoStride = HasNoInnerStride && HasNoOuterStride,
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
// TODO: should check for smaller packet types once we can handle multi-sized packet types
AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
KeepsPacketAccess = bool(HasNoInnerStride)
&& ( bool(IsDynamicSize)
|| HasNoOuterStride
|| ( OuterStrideAtCompileTime!=Dynamic
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ),
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime) % AlignBytes)==0 ) ),
Flags0 = evaluator<PlainObjectType>::Flags,
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
@ -717,7 +721,10 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0,
// TODO: should check for smaller packet types once we can handle multi-sized packet types
AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
@ -825,12 +832,16 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc
typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
{
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
typedef typename XprType::Scalar Scalar;
EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
: mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
{
// TODO: should check for smaller packet types once we can handle multi-sized packet types
const int AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar);
EIGEN_ONLY_USED_FOR_DEBUG(AlignBytes)
// FIXME this should be an internal assertion
eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned");
eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % AlignBytes) == 0) && "data is not aligned");
}
};

View File

@ -300,9 +300,10 @@ template<typename Derived>
bool DenseBase<Derived>::isApproxToConstant
(const Scalar& val, const RealScalar& prec) const
{
typename internal::nested_eval<Derived,1>::type self(derived());
for(Index j = 0; j < cols(); ++j)
for(Index i = 0; i < rows(); ++i)
if(!internal::isApprox(this->coeff(i, j), val, prec))
if(!internal::isApprox(self.coeff(i, j), val, prec))
return false;
return true;
}
@ -484,9 +485,10 @@ DenseBase<Derived>::Zero()
template<typename Derived>
bool DenseBase<Derived>::isZero(const RealScalar& prec) const
{
typename internal::nested_eval<Derived,1>::type self(derived());
for(Index j = 0; j < cols(); ++j)
for(Index i = 0; i < rows(); ++i)
if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<Scalar>(1), prec))
if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<Scalar>(1), prec))
return false;
return true;
}
@ -719,18 +721,19 @@ template<typename Derived>
bool MatrixBase<Derived>::isIdentity
(const RealScalar& prec) const
{
typename internal::nested_eval<Derived,1>::type self(derived());
for(Index j = 0; j < cols(); ++j)
{
for(Index i = 0; i < rows(); ++i)
{
if(i == j)
{
if(!internal::isApprox(this->coeff(i, j), static_cast<Scalar>(1), prec))
if(!internal::isApprox(self.coeff(i, j), static_cast<Scalar>(1), prec))
return false;
}
else
{
if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<RealScalar>(1), prec))
if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<RealScalar>(1), prec))
return false;
}
}

View File

@ -34,14 +34,35 @@ void check_static_allocation_size()
#endif
}
template<typename T, int Size, typename Packet = typename packet_traits<T>::type,
bool Match = bool((Size%unpacket_traits<Packet>::size)==0),
bool TryHalf = bool(int(unpacket_traits<Packet>::size) > 1)
&& bool(int(unpacket_traits<Packet>::size) > int(unpacket_traits<typename unpacket_traits<Packet>::half>::size)) >
struct compute_default_alignment
{
enum { value = 0 };
};
template<typename T, int Size, typename Packet, bool TryHalf>
struct compute_default_alignment<T, Size, Packet, true, TryHalf> // Match
{
enum { value = sizeof(T) * unpacket_traits<Packet>::size };
};
template<typename T, int Size, typename Packet>
struct compute_default_alignment<T, Size, Packet, false, true> // Try-half
{
// current packet too large, try with an half-packet
enum { value = compute_default_alignment<T, Size, typename unpacket_traits<Packet>::half>::value };
};
/** \internal
* Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:
* to 16 bytes boundary if the total size is a multiple of 16 bytes.
*/
template <typename T, int Size, int MatrixOrArrayOptions,
int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
: (((Size*sizeof(T))%EIGEN_ALIGN_BYTES)==0) ? EIGEN_ALIGN_BYTES
: 0 >
: compute_default_alignment<T,Size>::value >
struct plain_array
{
T array[Size];
@ -81,14 +102,71 @@ struct plain_array
#endif
template <typename T, int Size, int MatrixOrArrayOptions>
struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES>
struct plain_array<T, Size, MatrixOrArrayOptions, 8>
{
EIGEN_USER_ALIGN_DEFAULT T array[Size];
EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
EIGEN_DEVICE_FUNC
plain_array()
{
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(EIGEN_ALIGN_BYTES-1);
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
check_static_allocation_size<T,Size>();
}
EIGEN_DEVICE_FUNC
plain_array(constructor_without_unaligned_array_assert)
{
check_static_allocation_size<T,Size>();
}
};
template <typename T, int Size, int MatrixOrArrayOptions>
struct plain_array<T, Size, MatrixOrArrayOptions, 16>
{
EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
EIGEN_DEVICE_FUNC
plain_array()
{
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
check_static_allocation_size<T,Size>();
}
EIGEN_DEVICE_FUNC
plain_array(constructor_without_unaligned_array_assert)
{
check_static_allocation_size<T,Size>();
}
};
template <typename T, int Size, int MatrixOrArrayOptions>
struct plain_array<T, Size, MatrixOrArrayOptions, 32>
{
EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
EIGEN_DEVICE_FUNC
plain_array()
{
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
check_static_allocation_size<T,Size>();
}
EIGEN_DEVICE_FUNC
plain_array(constructor_without_unaligned_array_assert)
{
check_static_allocation_size<T,Size>();
}
};
template <typename T, int Size, int MatrixOrArrayOptions>
struct plain_array<T, Size, MatrixOrArrayOptions, 64>
{
EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
EIGEN_DEVICE_FUNC
plain_array()
{
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
check_static_allocation_size<T,Size>();
}

View File

@ -224,13 +224,13 @@ bool MatrixBase<Derived>::isOrthogonal
template<typename Derived>
bool MatrixBase<Derived>::isUnitary(const RealScalar& prec) const
{
typename Derived::Nested nested(derived());
typename internal::nested_eval<Derived,1>::type self(derived());
for(Index i = 0; i < cols(); ++i)
{
if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
if(!internal::isApprox(self.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
return false;
for(Index j = 0; j < i; ++j)
if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast<Scalar>(1), prec))
if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast<Scalar>(1), prec))
return false;
}
return true;

View File

@ -328,6 +328,7 @@ struct hypot_impl
p = _y;
qp = _x / p;
}
if(p==RealScalar(0)) return RealScalar(0);
return p * sqrt(RealScalar(1) + qp*qp);
}
};
@ -560,48 +561,48 @@ struct random_default_impl<Scalar, false, false>
};
enum {
floor_log2_terminate,
floor_log2_move_up,
floor_log2_move_down,
floor_log2_bogus
meta_floor_log2_terminate,
meta_floor_log2_move_up,
meta_floor_log2_move_down,
meta_floor_log2_bogus
};
template<unsigned int n, int lower, int upper> struct floor_log2_selector
template<unsigned int n, int lower, int upper> struct meta_floor_log2_selector
{
enum { middle = (lower + upper) / 2,
value = (upper <= lower + 1) ? int(floor_log2_terminate)
: (n < (1 << middle)) ? int(floor_log2_move_down)
: (n==0) ? int(floor_log2_bogus)
: int(floor_log2_move_up)
value = (upper <= lower + 1) ? int(meta_floor_log2_terminate)
: (n < (1 << middle)) ? int(meta_floor_log2_move_down)
: (n==0) ? int(meta_floor_log2_bogus)
: int(meta_floor_log2_move_up)
};
};
template<unsigned int n,
int lower = 0,
int upper = sizeof(unsigned int) * CHAR_BIT - 1,
int selector = floor_log2_selector<n, lower, upper>::value>
struct floor_log2 {};
int selector = meta_floor_log2_selector<n, lower, upper>::value>
struct meta_floor_log2 {};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_move_down>
struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down>
{
enum { value = floor_log2<n, lower, floor_log2_selector<n, lower, upper>::middle>::value };
enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value };
};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_move_up>
struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up>
{
enum { value = floor_log2<n, floor_log2_selector<n, lower, upper>::middle, upper>::value };
enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value };
};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_terminate>
struct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate>
{
enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower };
};
template<unsigned int n, int lower, int upper>
struct floor_log2<n, lower, upper, floor_log2_bogus>
struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>
{
// no value, error at compile time
};
@ -609,11 +610,24 @@ struct floor_log2<n, lower, upper, floor_log2_bogus>
template<typename Scalar>
struct random_default_impl<Scalar, false, true>
{
typedef typename NumTraits<Scalar>::NonInteger NonInteger;
static inline Scalar run(const Scalar& x, const Scalar& y)
{
return x + Scalar((NonInteger(y)-x+1) * std::rand() / (RAND_MAX + NonInteger(1)));
{
using std::max;
using std::min;
typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
if(y<x)
return x;
std::size_t range = ScalarX(y)-ScalarX(x);
std::size_t offset = 0;
// rejection sampling
std::size_t divisor = (range+RAND_MAX-1)/(range+1);
std::size_t multiplier = (range+RAND_MAX-1)/std::size_t(RAND_MAX);
do {
offset = ( (std::size_t(std::rand()) * multiplier) / divisor );
} while (offset > range);
return Scalar(ScalarX(x) + offset);
}
static inline Scalar run()
@ -621,7 +635,7 @@ struct random_default_impl<Scalar, false, true>
#ifdef EIGEN_MAKING_DOCS
return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
#else
enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value,
enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
scalar_bits = sizeof(Scalar) * CHAR_BIT,
shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0

View File

@ -409,7 +409,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
CoeffReadCost = (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
: (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
@ -484,7 +485,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
{
PacketScalar res;
typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
Unroll ? InnerSize-1 : Dynamic,
Unroll ? InnerSize : Dynamic,
LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl;
PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
@ -527,7 +528,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode>(UnrollingIndex-1, col), res);
}
};
@ -537,12 +538,12 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{
@ -551,7 +552,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{
@ -559,14 +560,31 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
{
res = pset1<Packet>(0);
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
{
res = pset1<Packet>(0);
}
};
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
for(Index i = 1; i < innerDim; ++i)
res = pset1<Packet>(0);
for(Index i = 0; i < innerDim; ++i)
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
}
};
@ -576,9 +594,8 @@ struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
for(Index i = 1; i < innerDim; ++i)
res = pset1<Packet>(0);
for(Index i = 0; i < innerDim; ++i)
res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
}
};
@ -678,8 +695,7 @@ public:
//_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit
//(int(MatrixFlags)&int(DiagFlags)&AlignedBit),
Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0)
};
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)

View File

@ -200,17 +200,82 @@ DenseBase<Derived>::reverse() const
* In most cases it is probably better to simply use the reversed expression
* of a matrix. However, when reversing the matrix data itself is really needed,
* then this "in-place" version is probably the right choice because it provides
* the following additional features:
* the following additional benefits:
* - less error prone: doing the same operation with .reverse() requires special care:
* \code m = m.reverse().eval(); \endcode
* - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap)
* - this API enables reverse operations without the need for a temporary
* - it allows future optimizations (cache friendliness, etc.)
*
* \sa reverse() */
* \sa VectorwiseOp::reverseInPlace(), reverse() */
template<typename Derived>
inline void DenseBase<Derived>::reverseInPlace()
{
derived() = derived().reverse().eval();
if(cols()>rows())
{
Index half = cols()/2;
leftCols(half).swap(rightCols(half).reverse());
if((cols()%2)==1)
{
Index half2 = rows()/2;
col(half).head(half2).swap(col(half).tail(half2).reverse());
}
}
else
{
Index half = rows()/2;
topRows(half).swap(bottomRows(half).reverse());
if((rows()%2)==1)
{
Index half2 = cols()/2;
row(half).head(half2).swap(row(half).tail(half2).reverse());
}
}
}
namespace internal {
template<int Direction>
struct vectorwise_reverse_inplace_impl;
template<>
struct vectorwise_reverse_inplace_impl<Vertical>
{
template<typename ExpressionType>
static void run(ExpressionType &xpr)
{
Index half = xpr.rows()/2;
xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse());
}
};
template<>
struct vectorwise_reverse_inplace_impl<Horizontal>
{
template<typename ExpressionType>
static void run(ExpressionType &xpr)
{
Index half = xpr.cols()/2;
xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse());
}
};
} // end namespace internal
/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this.
*
* In most cases it is probably better to simply use the reversed expression
* of a matrix. However, when reversing the matrix data itself is really needed,
* then this "in-place" version is probably the right choice because it provides
* the following additional benefits:
* - less error prone: doing the same operation with .reverse() requires special care:
* \code m = m.reverse().eval(); \endcode
* - this API enables reverse operations without the need for a temporary
*
* \sa DenseBase::reverseInPlace(), reverse() */
template<typename ExpressionType, int Direction>
void VectorwiseOp<ExpressionType,Direction>::reverseInPlace()
{
internal::vectorwise_reverse_inplace_impl<Direction>::run(_expression().const_cast_derived());
}
} // end namespace Eigen

View File

@ -38,13 +38,17 @@ public:
template<int StoreMode, int LoadMode>
void assignPacket(Index row, Index col)
{
m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col));
PacketScalar tmp = m_src.template packet<LoadMode>(row,col);
const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(row,col, m_dst.template packet<StoreMode>(row,col));
m_dst.template writePacket<StoreMode>(row,col,tmp);
}
template<int StoreMode, int LoadMode>
void assignPacket(Index index)
{
m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index));
PacketScalar tmp = m_src.template packet<LoadMode>(index);
const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode>(index));
m_dst.template writePacket<StoreMode>(index,tmp);
}
// TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)

View File

@ -562,6 +562,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
void normalize() {
m_matrix = this->normalized();
}
inline void reverseInPlace();
/////////// Geometry module ///////////

View File

@ -197,21 +197,21 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(cons
}
#endif
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, int stride) {
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
}
template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, int stride) {
template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
return make_double2(from[0*stride], from[1*stride]);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, int stride) {
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
to[stride*0] = from.x;
to[stride*1] = from.y;
to[stride*2] = from.z;
to[stride*3] = from.w;
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, int stride) {
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
to[stride*0] = from.x;
to[stride*1] = from.y;
}
@ -245,14 +245,14 @@ template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a)
}
template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
return make_float4(fabs(a.x), fabs(a.y), fabs(a.z), fabs(a.w));
return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
}
template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
return make_double2(abs(a.x), abs(a.y));
return make_double2(fabs(a.x), fabs(a.y));
}
template<> EIGEN_DEVICE_FUNC inline void
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<float4,4>& kernel) {
double tmp = kernel.packet[0].y;
kernel.packet[0].y = kernel.packet[1].x;
@ -279,7 +279,7 @@ ptranspose(PacketBlock<float4,4>& kernel) {
kernel.packet[3].z = tmp;
}
template<> EIGEN_DEVICE_FUNC inline void
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<double2,2>& kernel) {
double tmp = kernel.packet[0].y;
kernel.packet[0].y = kernel.packet[1].x;

View File

@ -0,0 +1,110 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H
#define EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H
namespace Eigen {
namespace internal {
/* The following lookup table was generated from measurements on a Nexus 5,
* which has a Qualcomm Krait 400 CPU. This is very representative of current
* 32bit (ARMv7) Android devices. On the other hand, I don't know how
* representative that is outside of these conditions. Accordingly,
* let's only use this lookup table on ARM 32bit on Android for now.
*
* Measurements were single-threaded, with Scalar=float, compiled with
* -mfpu=neon-vfpv4, so the pmadd instruction used was VFMA.F32.
*
* The device was cooled, allowing it to run a the max clock speed throughout.
* This may not be representative of real-world thermal conditions.
*
* The benchmark attempted to flush caches to test cold-cache performance.
*/
#if EIGEN_ARCH_ARM && EIGEN_OS_ANDROID
template<>
struct BlockingSizesLookupTable<float, float> {
static const size_t BaseSize = 16;
static const size_t NumSizes = 8;
static const unsigned short* Data() {
static const unsigned short data[512] = {
0x444, 0x445, 0x446, 0x447, 0x448, 0x449, 0x447, 0x447,
0x454, 0x455, 0x456, 0x457, 0x458, 0x459, 0x45a, 0x456,
0x464, 0x465, 0x466, 0x467, 0x468, 0x469, 0x46a, 0x467,
0x474, 0x475, 0x476, 0x467, 0x478, 0x479, 0x476, 0x478,
0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x476, 0x476,
0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x496, 0x488,
0x474, 0x475, 0x476, 0x4a6, 0x496, 0x496, 0x495, 0x4a6,
0x474, 0x475, 0x466, 0x4a6, 0x497, 0x4a5, 0x496, 0x4a5,
0x544, 0x545, 0x546, 0x547, 0x548, 0x549, 0x54a, 0x54b,
0x554, 0x555, 0x556, 0x557, 0x558, 0x559, 0x55a, 0x55b,
0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x56b,
0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x576,
0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x587,
0x564, 0x565, 0x566, 0x567, 0x596, 0x596, 0x596, 0x597,
0x574, 0x565, 0x566, 0x596, 0x596, 0x5a6, 0x5a6, 0x5a6,
0x564, 0x565, 0x5a6, 0x596, 0x5a6, 0x5a6, 0x5a6, 0x5a6,
0x644, 0x645, 0x646, 0x647, 0x648, 0x649, 0x64a, 0x64b,
0x644, 0x655, 0x656, 0x657, 0x658, 0x659, 0x65a, 0x65b,
0x664, 0x665, 0x666, 0x667, 0x668, 0x669, 0x65a, 0x667,
0x654, 0x665, 0x676, 0x677, 0x678, 0x679, 0x67a, 0x675,
0x684, 0x675, 0x686, 0x687, 0x688, 0x688, 0x687, 0x686,
0x664, 0x685, 0x666, 0x677, 0x697, 0x696, 0x697, 0x697,
0x664, 0x665, 0x696, 0x696, 0x685, 0x6a6, 0x696, 0x696,
0x664, 0x675, 0x686, 0x696, 0x6a6, 0x696, 0x696, 0x696,
0x744, 0x745, 0x746, 0x747, 0x748, 0x749, 0x74a, 0x747,
0x754, 0x755, 0x756, 0x757, 0x758, 0x759, 0x75a, 0x757,
0x764, 0x765, 0x756, 0x767, 0x768, 0x759, 0x75a, 0x766,
0x744, 0x755, 0x766, 0x777, 0x768, 0x759, 0x778, 0x777,
0x744, 0x745, 0x766, 0x777, 0x788, 0x786, 0x786, 0x788,
0x754, 0x755, 0x766, 0x787, 0x796, 0x796, 0x787, 0x796,
0x684, 0x695, 0x696, 0x6a6, 0x795, 0x786, 0x795, 0x796,
0x684, 0x695, 0x696, 0x795, 0x786, 0x796, 0x795, 0x796,
0x844, 0x845, 0x846, 0x847, 0x848, 0x849, 0x848, 0x848,
0x844, 0x855, 0x846, 0x847, 0x848, 0x849, 0x855, 0x857,
0x844, 0x845, 0x846, 0x857, 0x848, 0x859, 0x866, 0x865,
0x844, 0x855, 0x846, 0x847, 0x878, 0x859, 0x877, 0x877,
0x844, 0x855, 0x846, 0x867, 0x886, 0x887, 0x885, 0x886,
0x784, 0x785, 0x786, 0x877, 0x897, 0x885, 0x896, 0x896,
0x684, 0x695, 0x686, 0x886, 0x885, 0x885, 0x886, 0x896,
0x694, 0x6a5, 0x6a6, 0x885, 0x885, 0x886, 0x896, 0x896,
0x944, 0x945, 0x946, 0x947, 0x948, 0x847, 0x847, 0x848,
0x954, 0x855, 0x856, 0x947, 0x858, 0x857, 0x858, 0x858,
0x944, 0x945, 0x946, 0x867, 0x948, 0x866, 0x867, 0x867,
0x944, 0x975, 0x976, 0x877, 0x877, 0x877, 0x877, 0x877,
0x784, 0x785, 0x886, 0x887, 0x886, 0x887, 0x887, 0x887,
0x784, 0x785, 0x786, 0x796, 0x887, 0x897, 0x896, 0x896,
0x684, 0x695, 0x6a6, 0x886, 0x886, 0x896, 0x896, 0x896,
0x6a4, 0x6a5, 0x696, 0x896, 0x886, 0x896, 0x896, 0x896,
0xa44, 0xa45, 0xa46, 0xa47, 0x847, 0x848, 0x847, 0x848,
0xa44, 0xa45, 0x856, 0x857, 0x857, 0x857, 0x857, 0x857,
0xa44, 0xa65, 0x866, 0x867, 0x867, 0x867, 0x867, 0x867,
0x774, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877,
0x784, 0x785, 0x886, 0x887, 0x887, 0x887, 0x887, 0x887,
0x784, 0x785, 0x786, 0x787, 0x887, 0x896, 0x897, 0x897,
0x684, 0x6a5, 0x696, 0x886, 0x886, 0x896, 0x896, 0x896,
0x684, 0x6a5, 0x6a5, 0x886, 0x886, 0x896, 0x896, 0x896,
0xb44, 0x845, 0x846, 0x847, 0x847, 0x945, 0x846, 0x946,
0xb54, 0x855, 0x856, 0x857, 0x857, 0x856, 0x857, 0x856,
0x864, 0x865, 0x866, 0x867, 0x867, 0x866, 0x866, 0x867,
0x864, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877,
0x784, 0x885, 0x886, 0x787, 0x887, 0x887, 0x887, 0x887,
0x784, 0x785, 0x786, 0x796, 0x886, 0x897, 0x897, 0x897,
0x684, 0x695, 0x696, 0x886, 0x896, 0x896, 0x896, 0x896,
0x684, 0x685, 0x696, 0xb57, 0x896, 0x896, 0x896, 0x896
};
return data;
}
};
#endif
}
}
#endif // EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H

View File

@ -150,14 +150,6 @@ template<typename Scalar> struct swap_assign_op {
swap(a,const_cast<Scalar&>(b));
#endif
}
template<int LhsAlignment, int RhsAlignment, typename Packet>
EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const
{
Packet tmp = internal::ploadt<Packet,RhsAlignment>(b);
internal::pstoret<Scalar,Packet,RhsAlignment>(b, internal::ploadt<Packet,LhsAlignment>(a));
internal::pstoret<Scalar,Packet,LhsAlignment>(a, tmp);
}
};
template<typename Scalar>
struct functor_traits<swap_assign_op<Scalar> > {

View File

@ -25,21 +25,31 @@ inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff
return a<=0 ? b : a;
}
#if EIGEN_ARCH_i386_OR_x86_64
const std::ptrdiff_t defaultL1CacheSize = 32*1024;
const std::ptrdiff_t defaultL2CacheSize = 256*1024;
const std::ptrdiff_t defaultL3CacheSize = 2*1024*1024;
#else
const std::ptrdiff_t defaultL1CacheSize = 16*1024;
const std::ptrdiff_t defaultL2CacheSize = 512*1024;
const std::ptrdiff_t defaultL3CacheSize = 512*1024;
#endif
/** \internal */
inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3)
{
static bool m_cache_sizes_initialized = false;
static std::ptrdiff_t m_l1CacheSize = 32*1024;
static std::ptrdiff_t m_l2CacheSize = 256*1024;
static std::ptrdiff_t m_l3CacheSize = 2*1024*1024;
static std::ptrdiff_t m_l1CacheSize = 0;
static std::ptrdiff_t m_l2CacheSize = 0;
static std::ptrdiff_t m_l3CacheSize = 0;
if(!m_cache_sizes_initialized)
{
int l1CacheSize, l2CacheSize, l3CacheSize;
queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize);
m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, 8*1024);
m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, 256*1024);
m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, 8*1024*1024);
m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize);
m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize);
m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize);
m_cache_sizes_initialized = true;
}
@ -64,45 +74,23 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
}
}
/** \brief Computes the blocking parameters for a m x k times k x n matrix product
*
* \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
* \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
* \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
*
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
* this function computes the blocking size parameters along the respective dimensions
* for matrix products and related algorithms. The blocking sizes depends on various
* parameters:
* - the L1 and L2 cache sizes,
* - the register level blocking sizes defined by gebp_traits,
* - the number of scalars that fit into a packet (when vectorization is enabled).
*
* \sa setCpuCacheSizes */
/* Helper for computeProductBlockingSizes.
*
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
* this function computes the blocking size parameters along the respective dimensions
* for matrix products and related algorithms. The blocking sizes depends on various
* parameters:
* - the L1 and L2 cache sizes,
* - the register level blocking sizes defined by gebp_traits,
* - the number of scalars that fit into a packet (when vectorization is enabled).
*
* \sa setCpuCacheSizes */
template<typename LhsScalar, typename RhsScalar, int KcFactor>
void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1)
{
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
EIGEN_UNUSED_VARIABLE(num_threads);
enum {
kr = 8,
mr = Traits::mr,
nr = Traits::nr
};
k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
if (k > kr) k -= k % kr;
m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
if (m > mr) m -= m % mr;
n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
if (n > nr) n -= n % nr;
return;
}
#endif
// Explanations:
// Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and
// kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed
@ -261,16 +249,69 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
actual_lm = l2;
max_mc = 576;
}
Index mc = (std::min<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);
if (mc > Traits::mr) mc -= mc % Traits::mr;
else if (mc==0) return;
m = (m%mc)==0 ? mc
: (mc - Traits::mr * ((mc/*-1*/-(m%mc))/(Traits::mr*(m/mc+1))));
}
}
}
inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)
{
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
return true;
}
#else
EIGEN_UNUSED_VARIABLE(k)
EIGEN_UNUSED_VARIABLE(m)
EIGEN_UNUSED_VARIABLE(n)
#endif
return false;
}
/** \brief Computes the blocking parameters for a m x k times k x n matrix product
*
* \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
* \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
* \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
*
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
* this function computes the blocking size parameters along the respective dimensions
* for matrix products and related algorithms.
*
* The blocking size parameters may be evaluated:
* - either by a heuristic based on cache sizes;
* - or using a precomputed lookup table;
* - or using fixed prescribed values (for testing purposes).
*
* \sa setCpuCacheSizes */
template<typename LhsScalar, typename RhsScalar, int KcFactor>
void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
{
if (!useSpecificBlockingSizes(k, m, n)) {
if (!lookupBlockingSizesFromTable<LhsScalar, RhsScalar>(k, m, n, num_threads)) {
evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor>(k, m, n, num_threads);
}
}
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
enum {
kr = 8,
mr = Traits::mr,
nr = Traits::nr
};
if (k > kr) k -= k % kr;
if (m > mr) m -= m % mr;
if (n > nr) n -= n % nr;
}
template<typename LhsScalar, typename RhsScalar>
inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
{
@ -339,11 +380,14 @@ public:
nr = 4,
// register block size along the M direction (currently, this one cannot be modified)
default_mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
// we assume 16 registers
mr = 3*LhsPacketSize,
// See bug 992, if the scalar type is not vectorizable but that EIGEN_HAS_SINGLE_INSTRUCTION_MADD is defined,
// then using 3*LhsPacketSize triggers non-implemented paths in syrk.
mr = Vectorizable ? 3*LhsPacketSize : default_mr,
#else
mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
mr = default_mr,
#endif
LhsProgress = LhsPacketSize,
@ -974,12 +1018,11 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
// Blocking sizes, i.e., 'depth' has been computed so that the micro horizontal panel of the lhs fit in L1.
// However, if depth is too small, we can extend the number of rows of these horizontal panels.
// This actual number of rows is computed as follow:
const Index l1 = 32*1024; // in Bytes, TODO, l1 should be passed to this function.
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.
// The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size
// suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),
// or because we are testing specific blocking sizes.
const Index actual_panel_rows = (3*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) ));
#else
const Index actual_panel_rows = (3*LhsProgress) * ( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) );
#endif
for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows)
{
const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3);
@ -1211,12 +1254,12 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
//---------- Process 2 * LhsProgress rows at once ----------
if(mr>=2*Traits::LhsProgress)
{
const Index l1 = 32*1024; // in Bytes, TODO, l1 should be passed to this function.
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.
// The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size
// suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),
// or because we are testing specific blocking sizes.
Index actual_panel_rows = (2*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) ));
#else
Index actual_panel_rows = (2*LhsProgress) * ( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) );
#endif
for(Index i1=peeled_mc3; i1<peeled_mc2; i1+=actual_panel_rows)
{
Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc2);

View File

@ -0,0 +1,97 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
#define EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
namespace Eigen {
namespace internal {
template <typename LhsScalar,
typename RhsScalar,
bool HasLookupTable = BlockingSizesLookupTable<LhsScalar, RhsScalar>::NumSizes != 0 >
struct LookupBlockingSizesFromTableImpl
{
static bool run(Index&, Index&, Index&, Index)
{
return false;
}
};
inline size_t floor_log2_helper(unsigned short& x, size_t offset)
{
unsigned short y = x >> offset;
if (y) {
x = y;
return offset;
} else {
return 0;
}
}
inline size_t floor_log2(unsigned short x)
{
return floor_log2_helper(x, 8)
+ floor_log2_helper(x, 4)
+ floor_log2_helper(x, 2)
+ floor_log2_helper(x, 1);
}
inline size_t ceil_log2(unsigned short x)
{
return x > 1 ? floor_log2(x - 1) + 1 : 0;
}
template <typename LhsScalar,
typename RhsScalar>
struct LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar, true>
{
static bool run(Index& k, Index& m, Index& n, Index)
{
using std::min;
using std::max;
typedef BlockingSizesLookupTable<LhsScalar, RhsScalar> Table;
const unsigned short minsize = Table::BaseSize;
const unsigned short maxsize = minsize << (Table::NumSizes - 1);
const unsigned short k_clamped = max<unsigned short>(minsize, min<Index>(k, maxsize));
const unsigned short m_clamped = max<unsigned short>(minsize, min<Index>(m, maxsize));
const unsigned short n_clamped = max<unsigned short>(minsize, min<Index>(n, maxsize));
const size_t k_index = ceil_log2(k_clamped / minsize);
const size_t m_index = ceil_log2(m_clamped / minsize);
const size_t n_index = ceil_log2(n_clamped / minsize);
const size_t index = n_index + Table::NumSizes * (m_index + Table::NumSizes * k_index);
const unsigned short table_entry = Table::Data()[index];
k = min<Index>(k, 1 << ((table_entry & 0xf00) >> 8));
m = min<Index>(m, 1 << ((table_entry & 0x0f0) >> 4));
n = min<Index>(n, 1 << ((table_entry & 0x00f) >> 0));
return true;
}
};
template <typename LhsScalar,
typename RhsScalar>
bool lookupBlockingSizesFromTable(Index& k, Index& m, Index& n, Index num_threads)
{
if (num_threads > 1) {
// We don't currently have lookup tables recorded for multithread performance,
// and we have confirmed experimentally that our single-thread-recorded LUTs are
// poor for multithread performance, and our LUTs don't currently contain
// any annotation about multithread status (FIXME - we need that).
// So for now, we just early-return here.
return false;
}
return LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar>::run(k, m, n, num_threads);
}
}
}
#endif // EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H

View File

@ -214,7 +214,7 @@ class blas_data_mapper {
}
template<typename SubPacket>
EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, SubPacket p) const {
EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
}

View File

@ -287,6 +287,14 @@ struct stem_function
typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
typedef ComplexScalar type(ComplexScalar, int);
};
template <typename LhsScalar,
typename RhsScalar>
struct BlockingSizesLookupTable
{
static const size_t NumSizes = 0;
};
}
} // end namespace Eigen

View File

@ -213,7 +213,8 @@
#endif
/// \internal EIGEN_OS_ANDROID set to 1 if the OS is Android
#if defined(__ANDROID__)
// note: ANDROID is defined when using ndk_build, __ANDROID__ is defined when using a standalone toolchain.
#if defined(__ANDROID__) || defined(ANDROID)
#define EIGEN_OS_ANDROID 1
#else
#define EIGEN_OS_ANDROID 0
@ -318,6 +319,9 @@
// Defined the boundary (in bytes) on which the data needs to be aligned. Note
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
// aligned at all regardless of the value of this #define.
// TODO should be renamed EIGEN_MAXIMAL_ALIGN_BYTES,
// for instance with AVX 1 EIGEN_MAXIMAL_ALIGN_BYTES=32 while for 'int' 16 bytes alignment is always enough,
// and 16 bytes alignment is also enough for Vector4f.
#define EIGEN_ALIGN_BYTES 16
#ifdef EIGEN_DONT_ALIGN

View File

@ -159,13 +159,16 @@ class compute_matrix_evaluator_flags
enum {
row_major_bit = Options&RowMajor ? RowMajorBit : 0,
is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
// TODO: should check for smaller packet types once we can handle multi-sized packet types
align_bytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
aligned_bit =
(
((Options&DontAlign)==0)
&& (
#if EIGEN_ALIGN_STATICALLY
((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0))
((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0))
#else
0
#endif

View File

@ -234,6 +234,12 @@ template<typename _MatrixType> class ComplexEigenSolver
}
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
EigenvectorType m_eivec;
EigenvalueType m_eivalues;
ComplexSchur<MatrixType> m_schur;
@ -251,6 +257,8 @@ template<typename MatrixType>
ComplexEigenSolver<MatrixType>&
ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
{
check_template_parameters();
// this code is inspired from Jampack
eigen_assert(matrix.cols() == matrix.rows());

View File

@ -299,6 +299,13 @@ template<typename _MatrixType> class EigenSolver
void doComputeEigenvectors();
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
}
MatrixType m_eivec;
EigenvalueType m_eivalues;
bool m_isInitialized;
@ -366,6 +373,8 @@ template<typename MatrixType>
EigenSolver<MatrixType>&
EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
{
check_template_parameters();
using std::sqrt;
using std::abs;
using numext::isfinite;
@ -408,7 +417,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
{
Scalar t0 = m_matT.coeff(i+1, i);
Scalar t1 = m_matT.coeff(i, i+1);
Scalar maxval = numext::maxi(abs(p),numext::maxi(abs(t0),abs(t1)));
Scalar maxval = numext::maxi<Scalar>(abs(p),numext::maxi<Scalar>(abs(t0),abs(t1)));
t0 /= maxval;
t1 /= maxval;
Scalar p0 = p/maxval;
@ -599,7 +608,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
}
// Overflow control
Scalar t = numext::maxi(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n)));
Scalar t = numext::maxi<Scalar>(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n)));
if ((eps * t) * t > Scalar(1))
m_matT.block(i, n-1, size-i, 2) /= t;

View File

@ -263,6 +263,13 @@ template<typename _MatrixType> class GeneralizedEigenSolver
}
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
}
MatrixType m_eivec;
ComplexVectorType m_alphas;
VectorType m_betas;
@ -290,6 +297,8 @@ template<typename MatrixType>
GeneralizedEigenSolver<MatrixType>&
GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors)
{
check_template_parameters();
using std::sqrt;
using std::abs;
eigen_assert(A.cols() == A.rows() && B.cols() == A.rows() && B.cols() == B.rows());

View File

@ -240,10 +240,10 @@ namespace Eigen {
m_S.coeffRef(i,j) = Scalar(0.0);
m_S.rightCols(dim-j-1).applyOnTheLeft(i-1,i,G.adjoint());
m_T.rightCols(dim-i+1).applyOnTheLeft(i-1,i,G.adjoint());
// update Q
if (m_computeQZ)
m_Q.applyOnTheRight(i-1,i,G);
}
// update Q
if (m_computeQZ)
m_Q.applyOnTheRight(i-1,i,G);
// kill T(i,i-1)
if(m_T.coeff(i,i-1)!=Scalar(0))
{
@ -251,10 +251,10 @@ namespace Eigen {
m_T.coeffRef(i,i-1) = Scalar(0.0);
m_S.applyOnTheRight(i,i-1,G);
m_T.topRows(i).applyOnTheRight(i,i-1,G);
// update Z
if (m_computeQZ)
m_Z.applyOnTheLeft(i,i-1,G.adjoint());
}
// update Z
if (m_computeQZ)
m_Z.applyOnTheLeft(i,i-1,G.adjoint());
}
}
}

View File

@ -347,6 +347,11 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
static const int m_maxIterations = 30;
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
MatrixType m_eivec;
RealVectorType m_eivalues;
typename TridiagonalizationType::SubDiagonalType m_subdiag;
@ -382,6 +387,8 @@ EIGEN_DEVICE_FUNC
SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
::compute(const MatrixType& matrix, int options)
{
check_template_parameters();
using std::abs;
eigen_assert(matrix.cols() == matrix.rows());
eigen_assert((options&~(EigVecMask|GenEigMask))==0

View File

@ -161,8 +161,8 @@ class QuaternionBase : public RotationBase<Derived, 3>
bool isApprox(const QuaternionBase<OtherDerived>& other, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const
{ return coeffs().isApprox(other.coeffs(), prec); }
/** return the result vector of \a v through the rotation*/
EIGEN_STRONG_INLINE Vector3 _transformVector(Vector3 v) const;
/** return the result vector of \a v through the rotation*/
EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const;
/** \returns \c *this with scalar type casted to \a NewScalarType
*
@ -462,7 +462,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const Quaterni
*/
template <class Derived>
EIGEN_STRONG_INLINE typename QuaternionBase<Derived>::Vector3
QuaternionBase<Derived>::_transformVector(Vector3 v) const
QuaternionBase<Derived>::_transformVector(const Vector3& v) const
{
// Note that this algorithm comes from the optimization by hand
// of the conversion to a Matrix followed by a Matrix/Vector product.

View File

@ -390,6 +390,12 @@ template<typename _MatrixType> class FullPivLU
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
MatrixType m_lu;
PermutationPType m_p;
PermutationQType m_q;
@ -434,6 +440,8 @@ FullPivLU<MatrixType>::FullPivLU(const MatrixType& matrix)
template<typename MatrixType>
FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
{
check_template_parameters();
// the permutations are stored as int indices, so just to be sure:
eigen_assert(matrix.rows()<=NumTraits<int>::highest() && matrix.cols()<=NumTraits<int>::highest());

View File

@ -209,6 +209,12 @@ template<typename _MatrixType> class PartialPivLU
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
MatrixType m_lu;
PermutationType m_p;
TranspositionType m_rowsTranspositions;
@ -425,6 +431,8 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t
template<typename MatrixType>
PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const MatrixType& matrix)
{
check_template_parameters();
// the row permutation is stored as int indices, so just to be sure:
eigen_assert(matrix.rows()<NumTraits<int>::highest());

View File

@ -137,22 +137,27 @@ void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,StorageIndex>& C, Perm
degree[i] = len[i]; // degree of node i
}
mark = internal::cs_wclear<StorageIndex>(0, 0, w, n); /* clear w */
elen[n] = -2; /* n is a dead element */
Cp[n] = -1; /* n is a root of assembly tree */
w[n] = 0; /* n is a dead element */
/* --- Initialize degree lists ------------------------------------------ */
for(i = 0; i < n; i++)
{
bool has_diag = false;
for(p = Cp[i]; p<Cp[i+1]; ++p)
if(Ci[p]==i)
{
has_diag = true;
break;
}
d = degree[i];
if(d == 0) /* node i is empty */
if(d == 1) /* node i is empty */
{
elen[i] = -2; /* element i is dead */
nel++;
Cp[i] = -1; /* i is a root of assembly tree */
w[i] = 0;
}
else if(d > dense) /* node i is dense */
else if(d > dense || !has_diag) /* node i is dense or has no structural diagonal element */
{
nv[i] = 0; /* absorb i into element n */
elen[i] = -1; /* node i is dead */
@ -168,6 +173,10 @@ void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,StorageIndex>& C, Perm
}
}
elen[n] = -2; /* n is a dead element */
Cp[n] = -1; /* n is a root of assembly tree */
w[n] = 0; /* n is a dead element */
while (nel < n) /* while (selecting pivots) do */
{
/* --- Select node of minimum approximate degree -------------------- */

View File

@ -398,6 +398,12 @@ template<typename _MatrixType> class ColPivHouseholderQR
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
MatrixType m_qr;
HCoeffsType m_hCoeffs;
PermutationType m_colsPermutation;
@ -436,6 +442,8 @@ typename MatrixType::RealScalar ColPivHouseholderQR<MatrixType>::logAbsDetermina
template<typename MatrixType>
ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const MatrixType& matrix)
{
check_template_parameters();
using std::abs;
Index rows = matrix.rows();
Index cols = matrix.cols();

View File

@ -380,6 +380,12 @@ template<typename _MatrixType> class FullPivHouseholderQR
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
MatrixType m_qr;
HCoeffsType m_hCoeffs;
IntDiagSizeVectorType m_rows_transpositions;
@ -419,6 +425,8 @@ typename MatrixType::RealScalar FullPivHouseholderQR<MatrixType>::logAbsDetermin
template<typename MatrixType>
FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(const MatrixType& matrix)
{
check_template_parameters();
using std::abs;
Index rows = matrix.rows();
Index cols = matrix.cols();

View File

@ -196,6 +196,12 @@ template<typename _MatrixType> class HouseholderQR
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
MatrixType m_qr;
HCoeffsType m_hCoeffs;
RowVectorType m_temp;
@ -348,6 +354,8 @@ void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) c
template<typename MatrixType>
HouseholderQR<MatrixType>& HouseholderQR<MatrixType>::compute(const MatrixType& matrix)
{
check_template_parameters();
Index rows = matrix.rows();
Index cols = matrix.cols();
Index size = (std::min)(rows,cols);

View File

@ -84,6 +84,8 @@ public:
typedef Matrix<RealScalar, Dynamic, 1> VectorType;
typedef Array<RealScalar, Dynamic, 1> ArrayXr;
typedef Array<Index,1,Dynamic> ArrayXi;
typedef Ref<ArrayXr> ArrayRef;
typedef Ref<ArrayXi> IndicesRef;
/** \brief Default Constructor.
*
@ -159,21 +161,23 @@ private:
void allocate(Index rows, Index cols, unsigned int computationOptions);
void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift);
void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V);
void computeSingVals(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi& perm, VectorType& singVals, ArrayXr& shifts, ArrayXr& mus);
void perturbCol0(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi& perm, const VectorType& singVals, const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat);
void computeSingVecs(const ArrayXr& zhat, const ArrayXr& diag, const ArrayXi& perm, const VectorType& singVals, const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V);
void computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, VectorType& singVals, ArrayRef shifts, ArrayRef mus);
void perturbCol0(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat);
void computeSingVecs(const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V);
void deflation43(Index firstCol, Index shift, Index i, Index size);
void deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size);
void deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift);
template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
void copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naivev);
static void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1);
static RealScalar secularEq(RealScalar x, const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const ArrayXr& diagShifted, RealScalar shift);
void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1);
static RealScalar secularEq(RealScalar x, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift);
protected:
MatrixXr m_naiveU, m_naiveV;
MatrixXr m_computed;
Index m_nRec;
ArrayXr m_workspace;
ArrayXi m_workspaceI;
int m_algoswap;
bool m_isTranspose, m_compU, m_compV;
@ -212,6 +216,9 @@ void BDCSVD<MatrixType>::allocate(Index rows, Index cols, unsigned int computati
else m_naiveU = MatrixXr::Zero(2, m_diagSize + 1 );
if (m_compV) m_naiveV = MatrixXr::Zero(m_diagSize, m_diagSize);
m_workspace.resize((m_diagSize+1)*(m_diagSize+1)*3);
m_workspaceI.resize(3*m_diagSize);
}// end allocate
template<typename MatrixType>
@ -223,6 +230,19 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
allocate(matrix.rows(), matrix.cols(), computationOptions);
using std::abs;
//**** step -1 - If the problem is too small, directly falls back to JacobiSVD and return
if(matrix.cols() < m_algoswap)
{
// FIXME this line involves temporaries
JacobiSVD<MatrixType> jsvd(matrix,computationOptions);
if(computeU()) m_matrixU = jsvd.matrixU();
if(computeV()) m_matrixV = jsvd.matrixV();
m_singularValues = jsvd.singularValues();
m_nonzeroSingularValues = jsvd.nonzeroSingularValues();
m_isInitialized = true;
return *this;
}
//**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows
RealScalar scale = matrix.cwiseAbs().maxCoeff();
if(scale==RealScalar(0)) scale = RealScalar(1);
@ -231,11 +251,13 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
else copy = matrix/scale;
//**** step 1 - Bidiagonalization
// FIXME this line involves temporaries
internal::UpperBidiagonalization<MatrixX> bid(copy);
//**** step 2 - Divide & Conquer
m_naiveU.setZero();
m_naiveV.setZero();
// FIXME this line involves a temporary matrix
m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();
m_computed.template bottomRows<1>().setZero();
divide(0, m_diagSize - 1, 0, 0, 0);
@ -257,6 +279,7 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
break;
}
}
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
// std::cout << "m_naiveU\n" << m_naiveU << "\n\n";
// std::cout << "m_naiveV\n" << m_naiveV << "\n\n";
@ -279,14 +302,14 @@ void BDCSVD<MatrixType>::copyUV(const HouseholderU &householderU, const Househol
Index Ucols = m_computeThinU ? m_diagSize : householderU.cols();
m_matrixU = MatrixX::Identity(householderU.cols(), Ucols);
m_matrixU.topLeftCorner(m_diagSize, m_diagSize) = naiveV.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
householderU.applyThisOnTheLeft(m_matrixU);
householderU.applyThisOnTheLeft(m_matrixU); // FIXME this line involves a temporary buffer
}
if (computeV())
{
Index Vcols = m_computeThinV ? m_diagSize : householderV.cols();
m_matrixV = MatrixX::Identity(householderV.cols(), Vcols);
m_matrixV.topLeftCorner(m_diagSize, m_diagSize) = naiveU.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
householderV.applyThisOnTheLeft(m_matrixV);
householderV.applyThisOnTheLeft(m_matrixV); // FIXME this line involves a temporary buffer
}
}
@ -307,7 +330,10 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co
// If the matrices are large enough, let's exploit the sparse structure of A by
// splitting it in half (wrt n1), and packing the non-zero columns.
Index n2 = n - n1;
MatrixXr A1(n1,n), A2(n2,n), B1(n,n), B2(n,n);
Map<MatrixXr> A1(m_workspace.data() , n1, n);
Map<MatrixXr> A2(m_workspace.data()+ n1*n, n2, n);
Map<MatrixXr> B1(m_workspace.data()+ n*n, n, n);
Map<MatrixXr> B2(m_workspace.data()+2*n*n, n, n);
Index k1=0, k2=0;
for(Index j=0; j<n; ++j)
{
@ -329,7 +355,11 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co
A.bottomRows(n2).noalias() = A2.leftCols(k2) * B2.topRows(k2);
}
else
A *= B; // FIXME this requires a temporary
{
Map<MatrixXr,Aligned> tmp(m_workspace.data(),n,n);
tmp.noalias() = A*B;
A = tmp;
}
}
// The divide algorithm is done "in place", we are always working on subsets of the same matrix. The divide methods takes as argument the
@ -360,7 +390,8 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
// matrices.
if (n < m_algoswap)
{
JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0)) ;
// FIXME this line involves temporaries
JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0));
if (m_compU)
m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = b.matrixU();
else
@ -438,7 +469,7 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
}
else
{
RealScalar q1 = (m_naiveU(0, firstCol + k));
RealScalar q1 = m_naiveU(0, firstCol + k);
// we shift Q1 to the right
for (Index i = firstCol + k - 1; i >= firstCol; i--)
m_naiveU(0, i + 1) = m_naiveU(0, i);
@ -491,8 +522,14 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
assert(VofSVD.allFinite());
#endif
if (m_compU) structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2);
else m_naiveU.middleCols(firstCol, n + 1) *= UofSVD; // FIXME this requires a temporary, and exploit that there are 2 rows at compile time
if (m_compU)
structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2);
else
{
Map<Matrix<RealScalar,2,Dynamic>,Aligned> tmp(m_workspace.data(),2,n+1);
tmp.noalias() = m_naiveU.middleCols(firstCol, n+1) * UofSVD;
m_naiveU.middleCols(firstCol, n + 1) = tmp;
}
if (m_compV) structured_update(m_naiveV.block(firstRowW, firstColW, n, n), VofSVD, (n+1)/2);
@ -517,10 +554,9 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
template <typename MatrixType>
void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V)
{
// TODO Get rid of these copies (?)
// FIXME at least preallocate them
ArrayXr col0 = m_computed.col(firstCol).segment(firstCol, n);
ArrayXr diag = m_computed.block(firstCol, firstCol, n, n).diagonal();
ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n);
m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal();
ArrayRef diag = m_workspace.head(n);
diag(0) = 0;
// Allocate space for singular values and vectors
@ -539,13 +575,14 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
Index actual_n = n;
while(actual_n>1 && diag(actual_n-1)==0) --actual_n;
Index m = 0; // size of the deflated problem
ArrayXi perm(actual_n);
for(Index k=0;k<actual_n;++k)
if(col0(k)!=0)
perm(m++) = k;
perm.conservativeResize(m);
m_workspaceI(m++) = k;
Map<ArrayXi> perm(m_workspaceI.data(),m);
ArrayXr shifts(n), mus(n), zhat(n);
Map<ArrayXr> shifts(m_workspace.data()+1*n, n);
Map<ArrayXr> mus(m_workspace.data()+2*n, n);
Map<ArrayXr> zhat(m_workspace.data()+3*n, n);
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
std::cout << "computeSVDofM using:\n";
@ -622,8 +659,8 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
// Reverse order so that singular values in increased order
// Because of deflation, the zeros singular-values are already at the end
singVals.head(actual_n).reverseInPlace();
U.leftCols(actual_n) = U.leftCols(actual_n).rowwise().reverse().eval(); // FIXME this requires a temporary
if (m_compV) V.leftCols(actual_n) = V.leftCols(actual_n).rowwise().reverse().eval(); // FIXME this requires a temporary
U.leftCols(actual_n).rowwise().reverseInPlace();
if (m_compV) V.leftCols(actual_n).rowwise().reverseInPlace();
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
JacobiSVD<MatrixXr> jsvd(m_computed.block(firstCol, firstCol, n, n) );
@ -634,7 +671,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
}
template <typename MatrixType>
typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const ArrayXr& diagShifted, RealScalar shift)
typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift)
{
Index m = perm.size();
RealScalar res = 1;
@ -647,8 +684,8 @@ typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar
}
template <typename MatrixType>
void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm,
VectorType& singVals, ArrayXr& shifts, ArrayXr& mus)
void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm,
VectorType& singVals, ArrayRef shifts, ArrayRef mus)
{
using std::abs;
using std::swap;
@ -703,7 +740,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right;
// measure everything relative to shift
ArrayXr diagShifted = diag - shift;
Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n);
diagShifted = diag - shift;
// initial guess
RealScalar muPrev, muCur;
@ -730,7 +768,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
// rational interpolation: fit a function of the form a / mu + b through the two previous
// iterates and use its zero to compute the next iterate
bool useBisection = fPrev*fCur>0;
while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
{
++m_numIters;
@ -773,7 +811,10 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
}
RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift);
#if defined EIGEN_INTERNAL_DEBUGGING || defined EIGEN_BDCSVD_DEBUG_VERBOSE
RealScalar fRight = secularEq(rightShifted, col0, diag, perm, diagShifted, shift);
#endif
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
if(!(fLeft * fRight<0))
@ -781,14 +822,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
#endif
eigen_internal_assert(fLeft * fRight < 0);
while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi(abs(leftShifted), abs(rightShifted)))
while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
{
RealScalar midShifted = (leftShifted + rightShifted) / 2;
RealScalar fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
if (fLeft * fMid < 0)
{
rightShifted = midShifted;
fRight = fMid;
}
else
{
@ -816,8 +856,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
// zhat is perturbation of col0 for which singular vectors can be computed stably (see Section 3.1)
template <typename MatrixType>
void BDCSVD<MatrixType>::perturbCol0
(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const VectorType& singVals,
const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat)
(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,
const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat)
{
using std::sqrt;
Index n = col0.size();
@ -865,8 +905,8 @@ void BDCSVD<MatrixType>::perturbCol0
// compute singular vectors
template <typename MatrixType>
void BDCSVD<MatrixType>::computeSingVecs
(const ArrayXr& zhat, const ArrayXr& diag, const ArrayXi &perm, const VectorType& singVals,
const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V)
(const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,
const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V)
{
Index n = zhat.size();
Index m = perm.size();
@ -991,7 +1031,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff();
RealScalar epsilon_strict = NumTraits<RealScalar>::epsilon() * maxDiag;
RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi(col0.cwiseAbs().maxCoeff(), maxDiag);
RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
#ifdef EIGEN_BDCSVD_SANITY_CHECKS
assert(m_naiveU.allFinite());
@ -1047,7 +1087,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
// Sort the diagonal entries, since diag(1:k-1) and diag(k:length) are already sorted, let's do a sorted merge.
// First, compute the respective permutation.
Index *permutation = new Index[length]; // FIXME avoid repeated dynamic memory allocation
Index *permutation = m_workspaceI.data();
{
permutation[0] = 0;
Index p = 1;
@ -1084,8 +1124,8 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
}
// Current index of each col, and current column of each index
Index *realInd = new Index[length]; // FIXME avoid repeated dynamic memory allocation
Index *realCol = new Index[length]; // FIXME avoid repeated dynamic memory allocation
Index *realInd = m_workspaceI.data()+length;
Index *realCol = m_workspaceI.data()+2*length;
for(int pos = 0; pos< length; pos++)
{
@ -1115,9 +1155,6 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
realInd[J] = realI;
realInd[i] = pi;
}
delete[] permutation;
delete[] realInd;
delete[] realCol;
}
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
std::cout << "sorted: " << diag.transpose().format(bdcsvdfmt) << "\n";

View File

@ -425,12 +425,13 @@ void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q,
// If d!=0, then t/d cannot overflow because the magnitude of the
// entries forming d are not too small compared to the ones forming t.
RealScalar u = t / d;
rot1.s() = RealScalar(1) / sqrt(RealScalar(1) + numext::abs2(u));
rot1.c() = rot1.s() * u;
RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u));
rot1.s() = RealScalar(1) / tmp;
rot1.c() = u / tmp;
}
m.applyOnTheLeft(0,1,rot1);
j_right->makeJacobi(m,0,1);
*j_left = rot1 * j_right->transpose();
*j_left = rot1 * j_right->transpose();
}
template<typename _MatrixType, int QRPreconditioner>
@ -680,6 +681,8 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon();
// limit for very small denormal numbers to be considered zero in order to avoid infinite loops (see bug 286)
// FIXME What about considerering any denormal numbers as zero, using:
// const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
const RealScalar considerAsZero = RealScalar(2) * std::numeric_limits<RealScalar>::denorm_min();
// Scaling factor to reduce over/under-flows
@ -719,8 +722,9 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
// if this 2x2 sub-matrix is not diagonal already...
// notice that this comparison will evaluate to false if any NaN is involved, ensuring that NaN's don't
// keep us iterating forever. Similarly, small denormal numbers are considered zero.
RealScalar threshold = numext::maxi(considerAsZero, precision * numext::maxi(abs(m_workMatrix.coeff(p,p)),
abs(m_workMatrix.coeff(q,q))));
RealScalar threshold = numext::maxi<RealScalar>(considerAsZero,
precision * numext::maxi<RealScalar>(abs(m_workMatrix.coeff(p,p)),
abs(m_workMatrix.coeff(q,q))));
// We compare both values to threshold instead of calling max to be robust to NaN (See bug 791)
if(abs(m_workMatrix.coeff(p,q))>threshold || abs(m_workMatrix.coeff(q,p)) > threshold)
{

View File

@ -130,9 +130,10 @@ public:
inline Index rank() const
{
using std::abs;
using std::max;
eigen_assert(m_isInitialized && "JacobiSVD is not initialized.");
if(m_singularValues.size()==0) return 0;
RealScalar premultiplied_threshold = m_singularValues.coeff(0) * threshold();
RealScalar premultiplied_threshold = (max)(m_singularValues.coeff(0) * threshold(), (std::numeric_limits<RealScalar>::min)());
Index i = m_nonzeroSingularValues-1;
while(i>=0 && m_singularValues.coeff(i) < premultiplied_threshold) --i;
return i+1;
@ -217,6 +218,12 @@ public:
#endif
protected:
static void check_template_parameters()
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
}
// return true if already allocated
bool allocate(Index rows, Index cols, unsigned int computationOptions) ;
@ -240,7 +247,9 @@ protected:
m_usePrescribedThreshold(false),
m_computationOptions(0),
m_rows(-1), m_cols(-1), m_diagSize(0)
{}
{
check_template_parameters();
}
};

View File

@ -86,7 +86,12 @@ class CompressedStorage
void resize(Index size, double reserveSizeFactor = 0)
{
if (m_allocatedSize<size)
reallocate(size + Index(reserveSizeFactor*double(size)));
{
Index realloc_size = (std::min<Index>)(NumTraits<StorageIndex>::highest(), size + Index(reserveSizeFactor*double(size)));
if(realloc_size<size)
internal::throw_std_bad_alloc();
reallocate(realloc_size);
}
m_size = size;
}

View File

@ -30,16 +30,16 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
std::memset(mask,0,sizeof(bool)*rows);
typename evaluator<Lhs>::type lhsEval(lhs);
typename evaluator<Rhs>::type rhsEval(rhs);
// estimate the number of non zero entries
// given a rhs column containing Y non zeros, we assume that the respective Y columns
// of the lhs differs in average of one non zeros, thus the number of non zeros for
// the product of a rhs column with the lhs is X+Y where X is the average number of non zero
// per column of the lhs.
// Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)
Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros();
typename evaluator<Lhs>::type lhsEval(lhs);
typename evaluator<Rhs>::type rhsEval(rhs);
Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate();
res.setZero();
res.reserve(Index(estimated_nnz_prod));

View File

@ -49,6 +49,16 @@ public:
return nnz;
}
inline const Scalar coeff(Index row, Index col) const
{
return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
}
inline const Scalar coeff(Index index) const
{
return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart);
}
inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
@ -80,7 +90,8 @@ class sparse_matrix_block_impl
typedef Block<SparseMatrixType, BlockRows, BlockCols, true> BlockType;
public:
enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor };
EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
typedef SparseCompressedBase<Block<SparseMatrixType,BlockRows,BlockCols,true> > Base;
_EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
protected:
typedef typename Base::IndexVector IndexVector;
enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
@ -188,27 +199,31 @@ public:
{ return m_matrix.const_cast_derived().outerIndexPtr() + m_outerStart; }
inline const StorageIndex* innerNonZeroPtr() const
{ return isCompressed() ? 0 : m_matrix.innerNonZeroPtr(); }
{ return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }
inline StorageIndex* innerNonZeroPtr()
{ return isCompressed() ? 0 : m_matrix.const_cast_derived().innerNonZeroPtr(); }
Index nonZeros() const
{
if(m_matrix.isCompressed())
return ( (m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()])
- (m_matrix.outerIndexPtr()[m_outerStart]));
else if(m_outerSize.value()==0)
return 0;
else
return Map<const IndexVector>(m_matrix.innerNonZeroPtr()+m_outerStart, m_outerSize.value()).sum();
}
{ return isCompressed() ? 0 : (m_matrix.const_cast_derived().innerNonZeroPtr()+m_outerStart); }
bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; }
inline Scalar& coeffRef(Index row, Index col)
{
return m_matrix.const_cast_derived().coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
}
inline const Scalar coeff(Index row, Index col) const
{
return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
}
inline const Scalar coeff(Index index) const
{
return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart);
}
const Scalar& lastCoeff() const
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(sparse_matrix_block_impl);
eigen_assert(nonZeros()>0);
eigen_assert(Base::nonZeros()>0);
if(m_matrix.isCompressed())
return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart+1]-1];
else
@ -314,17 +329,6 @@ SparseMatrixBase<Derived>::innerVectors(Index outerStart, Index outerSize) const
}
namespace internal {
template< typename XprType, int BlockRows, int BlockCols, bool InnerPanel,
bool OuterVector = (BlockCols==1 && XprType::IsRowMajor)
| // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
// revert to || as soon as not needed anymore.
(BlockRows==1 && !XprType::IsRowMajor)>
class GenericSparseBlockInnerIteratorImpl;
}
/** Generic implementation of sparse Block expression.
* Real-only.
*/
@ -390,8 +394,11 @@ public:
Index blockCols() const { return m_blockCols.value(); }
protected:
friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;
// friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;
friend class ReverseInnerIterator;
friend struct internal::unary_evaluator<Block<XprType,BlockRows,BlockCols,InnerPanel>, internal::IteratorBased, Scalar >;
Index nonZeros() const { return Dynamic; }
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
@ -404,94 +411,6 @@ public:
};
namespace internal {
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
class GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel,false> : public Block<XprType, BlockRows, BlockCols, InnerPanel>::_MatrixTypeNested::InnerIterator
{
typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
enum {
IsRowMajor = BlockType::IsRowMajor
};
typedef typename BlockType::_MatrixTypeNested _MatrixTypeNested;
typedef typename BlockType::StorageIndex StorageIndex;
typedef typename _MatrixTypeNested::InnerIterator Base;
const BlockType& m_block;
Index m_end;
public:
EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer)
: Base(block.derived().nestedExpression(), outer + (IsRowMajor ? block.m_startRow.value() : block.m_startCol.value())),
m_block(block),
m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
{
while( (Base::operator bool()) && (Base::index() < (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value())) )
Base::operator++();
}
inline Index index() const { return Base::index() - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
inline Index outer() const { return Base::outer() - (IsRowMajor ? m_block.m_startRow.value() : m_block.m_startCol.value()); }
inline Index row() const { return Base::row() - m_block.m_startRow.value(); }
inline Index col() const { return Base::col() - m_block.m_startCol.value(); }
inline operator bool() const { return Base::operator bool() && Base::index() < m_end; }
};
// Row vector of a column-major sparse matrix or column of a row-major one.
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
class GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel,true>
{
typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
enum {
IsRowMajor = BlockType::IsRowMajor
};
typedef typename BlockType::_MatrixTypeNested _MatrixTypeNested;
typedef typename BlockType::StorageIndex StorageIndex;
typedef typename BlockType::Scalar Scalar;
const BlockType& m_block;
Index m_outerPos;
Index m_innerIndex;
Scalar m_value;
Index m_end;
public:
explicit EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer = 0)
:
m_block(block),
m_outerPos( (IsRowMajor ? block.m_startCol.value() : block.m_startRow.value()) - 1), // -1 so that operator++ finds the first non-zero entry
m_innerIndex(IsRowMajor ? block.m_startRow.value() : block.m_startCol.value()),
m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
{
EIGEN_UNUSED_VARIABLE(outer);
eigen_assert(outer==0);
++(*this);
}
inline Index index() const { return m_outerPos - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
inline Index outer() const { return 0; }
inline Index row() const { return IsRowMajor ? 0 : index(); }
inline Index col() const { return IsRowMajor ? index() : 0; }
inline Scalar value() const { return m_value; }
inline GenericSparseBlockInnerIteratorImpl& operator++()
{
// search next non-zero entry
while(++m_outerPos<m_end)
{
typename XprType::InnerIterator it(m_block.m_matrix, m_outerPos);
// search for the key m_innerIndex in the current outer-vector
while(it && it.index() < m_innerIndex) ++it;
if(it && it.index()==m_innerIndex)
{
m_value = it.value();
break;
}
}
return *this;
}
inline operator bool() const { return m_outerPos < m_end; }
};
template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
struct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased >
@ -523,9 +442,16 @@ struct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBa
explicit unary_evaluator(const XprType& op)
: m_argImpl(op.nestedExpression()), m_block(op)
{}
inline Index nonZerosEstimate() const {
Index nnz = m_block.nonZeros();
if(nnz<0)
return m_argImpl.nonZerosEstimate() * m_block.size() / m_block.nestedExpression().size();
return nnz;
}
protected:
typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
typename evaluator<ArgType>::nestedType m_argImpl;
const XprType &m_block;
@ -570,6 +496,7 @@ public:
: m_eval(aEval),
m_outerPos( (IsRowMajor ? aEval.m_block.startCol() : aEval.m_block.startRow()) - 1), // -1 so that operator++ finds the first non-zero entry
m_innerIndex(IsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol()),
m_value(0),
m_end(IsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows())
{
EIGEN_UNUSED_VARIABLE(outer);

View File

@ -35,6 +35,25 @@ class SparseCompressedBase
class InnerIterator;
class ReverseInnerIterator;
protected:
typedef typename Base::IndexVector IndexVector;
Eigen::Map<IndexVector> innerNonZeros() { return Eigen::Map<IndexVector>(innerNonZeroPtr(), isCompressed()?0:derived().outerSize()); }
const Eigen::Map<const IndexVector> innerNonZeros() const { return Eigen::Map<const IndexVector>(innerNonZeroPtr(), isCompressed()?0:derived().outerSize()); }
public:
/** \returns the number of non zero coefficients */
inline Index nonZeros() const
{
if(isCompressed())
return outerIndexPtr()[derived().outerSize()]-outerIndexPtr()[0];
else if(derived().outerSize()==0)
return 0;
else
return innerNonZeros().sum();
}
/** \returns a const pointer to the array of values.
* This function is aimed at interoperability with other libraries.
* \sa innerIndexPtr(), outerIndexPtr() */
@ -165,6 +184,10 @@ struct evaluator<SparseCompressedBase<Derived> >
evaluator() : m_matrix(0) {}
explicit evaluator(const Derived &mat) : m_matrix(&mat) {}
inline Index nonZerosEstimate() const {
return m_matrix->nonZeros();
}
operator Derived&() { return m_matrix->const_cast_derived(); }
operator const Derived&() const { return *m_matrix; }

View File

@ -121,6 +121,10 @@ public:
m_lhsImpl(xpr.lhs()),
m_rhsImpl(xpr.rhs())
{ }
inline Index nonZerosEstimate() const {
return m_lhsImpl.nonZerosEstimate() + m_rhsImpl.nonZerosEstimate();
}
protected:
const BinaryOp m_functor;
@ -198,6 +202,10 @@ public:
m_lhsImpl(xpr.lhs()),
m_rhsImpl(xpr.rhs())
{ }
inline Index nonZerosEstimate() const {
return (std::min)(m_lhsImpl.nonZerosEstimate(), m_rhsImpl.nonZerosEstimate());
}
protected:
const BinaryOp m_functor;
@ -243,7 +251,7 @@ public:
EIGEN_STRONG_INLINE Index col() const { return m_rhsIter.col(); }
EIGEN_STRONG_INLINE operator bool() const { return m_rhsIter; }
protected:
const LhsEvaluator &m_lhsEval;
RhsIterator m_rhsIter;
@ -262,6 +270,10 @@ public:
m_lhsImpl(xpr.lhs()),
m_rhsImpl(xpr.rhs())
{ }
inline Index nonZerosEstimate() const {
return m_rhsImpl.nonZerosEstimate();
}
protected:
const BinaryOp m_functor;
@ -308,7 +320,7 @@ public:
EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); }
EIGEN_STRONG_INLINE operator bool() const { return m_lhsIter; }
protected:
LhsIterator m_lhsIter;
const RhsEvaluator &m_rhsEval;
@ -327,6 +339,10 @@ public:
m_lhsImpl(xpr.lhs()),
m_rhsImpl(xpr.rhs())
{ }
inline Index nonZerosEstimate() const {
return m_lhsImpl.nonZerosEstimate();
}
protected:
const BinaryOp m_functor;

View File

@ -30,6 +30,10 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>
};
explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {}
inline Index nonZerosEstimate() const {
return m_argImpl.nonZerosEstimate();
}
protected:
typedef typename evaluator<ArgType>::InnerIterator EvalIterator;

View File

@ -105,9 +105,6 @@ class SparseMapBase<Derived,ReadOnlyAccessors>
return ((*r==inner) && (id<end)) ? m_values[id] : Scalar(0);
}
/** \returns the number of non zero coefficients */
inline Index nonZeros() const { return m_nnz; }
inline SparseMapBase(Index rows, Index cols, Index nnz, IndexPointer outerIndexPtr, IndexPointer innerIndexPtr,
ScalarPointer valuePtr, IndexPointer innerNonZerosPtr = 0)
: m_outerSize(IsRowMajor?rows:cols), m_innerSize(IsRowMajor?cols:rows), m_nnz(nnz), m_outerIndex(outerIndexPtr),

View File

@ -95,6 +95,7 @@ class SparseMatrix
public:
typedef SparseCompressedBase<SparseMatrix> Base;
using Base::isCompressed;
using Base::nonZeros;
_EIGEN_SPARSE_PUBLIC_INTERFACE(SparseMatrix)
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, +=)
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, -=)
@ -122,9 +123,6 @@ class SparseMatrix
StorageIndex* m_outerIndex;
StorageIndex* m_innerNonZeros; // optional, if null then the data is compressed
Storage m_data;
Eigen::Map<IndexVector> innerNonZeros() { return Eigen::Map<IndexVector>(m_innerNonZeros, m_innerNonZeros?m_outerSize:0); }
const Eigen::Map<const IndexVector> innerNonZeros() const { return Eigen::Map<const IndexVector>(m_innerNonZeros, m_innerNonZeros?m_outerSize:0); }
public:
@ -252,14 +250,6 @@ class SparseMatrix
memset(m_innerNonZeros, 0, (m_outerSize)*sizeof(StorageIndex));
}
/** \returns the number of non zero coefficients */
inline Index nonZeros() const
{
if(m_innerNonZeros)
return innerNonZeros().sum();
return convert_index(Index(m_data.size()));
}
/** Preallocates \a reserveSize non zeros.
*
* Precondition: the matrix must be in compressed mode. */
@ -1172,8 +1162,12 @@ typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Op
return (m_data.value(p) = 0);
}
// make sure the matrix is compatible to random un-compressed insertion:
m_data.resize(m_data.allocatedSize());
if(m_data.size() != m_data.allocatedSize())
{
// make sure the matrix is compatible to random un-compressed insertion:
m_data.resize(m_data.allocatedSize());
this->reserveInnerVectors(Array<StorageIndex,Dynamic,1>::Constant(2*m_outerSize, convert_index(m_outerSize)));
}
return insertUncompressed(row,col);
}

View File

@ -149,9 +149,6 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
/** \returns the number of coefficients, which is \a rows()*cols().
* \sa rows(), cols(). */
inline Index size() const { return rows() * cols(); }
/** \returns the number of nonzero coefficients which is in practice the number
* of stored coefficients. */
inline Index nonZeros() const { return derived().nonZeros(); }
/** \returns true if either the number of rows or the number of columns is equal to 1.
* In other words, this function returns
* \code rows()==1 || cols()==1 \endcode

View File

@ -33,14 +33,6 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
// allocate a temporary buffer
AmbiVector<Scalar,StorageIndex> tempVector(rows);
// estimate the number of non zero entries
// given a rhs column containing Y non zeros, we assume that the respective Y columns
// of the lhs differs in average of one non zeros, thus the number of non zeros for
// the product of a rhs column with the lhs is X+Y where X is the average number of non zero
// per column of the lhs.
// Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)
Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros();
// mimics a resizeByInnerOuter:
if(ResultType::IsRowMajor)
res.resize(cols, rows);
@ -49,6 +41,14 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
typename evaluator<Lhs>::type lhsEval(lhs);
typename evaluator<Rhs>::type rhsEval(rhs);
// estimate the number of non zero entries
// given a rhs column containing Y non zeros, we assume that the respective Y columns
// of the lhs differs in average of one non zeros, thus the number of non zeros for
// the product of a rhs column with the lhs is X+Y where X is the average number of non zero
// per column of the lhs.
// Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)
Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate();
res.reserve(estimated_nnz_prod);
double ratioColRes = double(estimated_nnz_prod)/double(lhs.rows()*rhs.cols());

View File

@ -40,15 +40,11 @@ namespace internal {
};
}
// Implement nonZeros() for transpose. I'm not sure that's the best approach for that.
// Perhaps it should be implemented in Transpose<> itself.
template<typename MatrixType> class TransposeImpl<MatrixType,Sparse>
: public internal::SparseTransposeImpl<MatrixType>
{
protected:
typedef internal::SparseTransposeImpl<MatrixType> Base;
public:
inline Index nonZeros() const { return Base::derived().nestedExpression().nonZeros(); }
};
namespace internal {
@ -61,6 +57,10 @@ struct unary_evaluator<Transpose<ArgType>, IteratorBased>
typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator;
public:
typedef Transpose<ArgType> XprType;
inline Index nonZerosEstimate() const {
return m_argImpl.nonZerosEstimate();
}
class InnerIterator : public EvalIterator
{

View File

@ -50,13 +50,6 @@ protected:
template<typename OtherDerived> void solveInPlace(MatrixBase<OtherDerived>& other) const;
template<typename OtherDerived> void solveInPlace(SparseMatrixBase<OtherDerived>& other) const;
inline Index nonZeros() const {
// FIXME HACK number of nonZeros is required for product logic
// this returns only an upper bound (but should be OK for most purposes)
return derived().nestedExpression().nonZeros();
}
};
@ -191,6 +184,10 @@ public:
explicit unary_evaluator(const XprType &xpr) : m_argImpl(xpr.nestedExpression()) {}
inline Index nonZerosEstimate() const {
return m_argImpl.nonZerosEstimate();
}
class InnerIterator : public EvalIterator
{
typedef EvalIterator Base;

View File

@ -442,6 +442,10 @@ struct evaluator<SparseVector<_Scalar,_Options,_Index> >
explicit evaluator(const SparseVectorType &mat) : m_matrix(mat) {}
inline Index nonZerosEstimate() const {
return m_matrix.nonZeros();
}
operator SparseVectorType&() { return m_matrix.const_cast_derived(); }
operator const SparseVectorType&() const { return m_matrix; }

View File

@ -165,8 +165,9 @@ struct SluMatrix : SuperMatrix
}
template<typename MatrixType>
static SluMatrix Map(SparseMatrixBase<MatrixType>& mat)
static SluMatrix Map(SparseMatrixBase<MatrixType>& a_mat)
{
MatrixType &mat(a_mat.derived());
SluMatrix res;
if ((MatrixType::Flags&RowMajorBit)==RowMajorBit)
{
@ -184,9 +185,9 @@ struct SluMatrix : SuperMatrix
res.Mtype = SLU_GE;
res.storage.nnz = internal::convert_index<int>(mat.nonZeros());
res.storage.values = mat.derived().valuePtr();
res.storage.innerInd = mat.derived().innerIndexPtr();
res.storage.outerInd = mat.derived().outerIndexPtr();
res.storage.values = mat.valuePtr();
res.storage.innerInd = mat.innerIndexPtr();
res.storage.outerInd = mat.outerIndexPtr();
res.setScalarType<typename MatrixType::Scalar>();
@ -302,6 +303,7 @@ class SuperLUBase : public SparseSolverBase<Derived>
typedef Matrix<Scalar,Dynamic,1> Vector;
typedef Matrix<int, 1, MatrixType::ColsAtCompileTime> IntRowVectorType;
typedef Matrix<int, MatrixType::RowsAtCompileTime, 1> IntColVectorType;
typedef Map<PermutationMatrix<Dynamic,Dynamic,int> > PermutationMap;
typedef SparseMatrix<Scalar> LUMatrixType;
public:
@ -459,10 +461,11 @@ class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> >
typedef typename Base::RealScalar RealScalar;
typedef typename Base::StorageIndex StorageIndex;
typedef typename Base::IntRowVectorType IntRowVectorType;
typedef typename Base::IntColVectorType IntColVectorType;
typedef typename Base::IntColVectorType IntColVectorType;
typedef typename Base::PermutationMap PermutationMap;
typedef typename Base::LUMatrixType LUMatrixType;
typedef TriangularView<LUMatrixType, Lower|UnitDiag> LMatrixType;
typedef TriangularView<LUMatrixType, Upper> UMatrixType;
typedef TriangularView<LUMatrixType, Upper> UMatrixType;
public:
using Base::_solve_impl;
@ -774,6 +777,8 @@ typename SuperLU<MatrixType>::Scalar SuperLU<MatrixType>::determinant() const
det *= m_u.valuePtr()[lastId];
}
}
if(PermutationMap(m_p.data(),m_p.size()).determinant()*PermutationMap(m_q.data(),m_q.size()).determinant()<0)
det = -det;
if(m_sluEqued!='N')
return det/m_sluRscale.prod()/m_sluCscale.prod();
else

View File

@ -25,6 +25,12 @@ using namespace std;
const int default_precision = 4;
// see --only-cubic-sizes
bool only_cubic_sizes = false;
// see --dump-tables
bool dump_tables = false;
uint8_t log2_pot(size_t x) {
size_t l = 0;
while (x >>= 1) l++;
@ -130,6 +136,9 @@ struct inputfile_t
cerr << "offending line:" << endl << line << endl;
exit(1);
}
if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) {
continue;
}
inputfile_entry_t entry;
entry.product_size = uint16_t(product_size);
entry.pot_block_size = uint16_t(block_size);
@ -155,6 +164,9 @@ struct inputfile_t
cerr << "offending line:" << endl << line << endl;
exit(1);
}
if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) {
continue;
}
inputfile_entry_t entry;
entry.product_size = uint16_t(product_size);
entry.pot_block_size = 0;
@ -309,14 +321,82 @@ float efficiency_of_subset(
efficiency_this_product_size = max(efficiency_this_product_size, efficiency_this_entry);
}
efficiency = min(efficiency, efficiency_this_product_size);
first_entry_index_with_this_product_size = entry_index;
product_size = first_file.entries[entry_index].product_size;
if (entry_index < num_entries) {
first_entry_index_with_this_product_size = entry_index;
product_size = first_file.entries[entry_index].product_size;
}
}
}
return efficiency;
}
void dump_table_for_subset(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
const vector<size_t>& subset)
{
const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]];
const size_t num_entries = first_file.entries.size();
size_t entry_index = 0;
size_t first_entry_index_with_this_product_size = 0;
uint16_t product_size = first_file.entries[0].product_size;
size_t i = 0;
size_triple_t min_product_size(first_file.entries.front().product_size);
size_triple_t max_product_size(first_file.entries.back().product_size);
if (!min_product_size.is_cubic() || !max_product_size.is_cubic()) {
abort();
}
if (only_cubic_sizes) {
cerr << "Can't generate tables with --only-cubic-sizes." << endl;
abort();
}
cout << "struct LookupTable {" << endl;
cout << " static const size_t BaseSize = " << min_product_size.k << ";" << endl;
const size_t NumSizes = log2_pot(max_product_size.k / min_product_size.k) + 1;
const size_t TableSize = NumSizes * NumSizes * NumSizes;
cout << " static const size_t NumSizes = " << NumSizes << ";" << endl;
cout << " static const unsigned short* Data() {" << endl;
cout << " static const unsigned short data[" << TableSize << "] = {";
while (entry_index < num_entries) {
++entry_index;
if (entry_index == num_entries ||
first_file.entries[entry_index].product_size != product_size)
{
float best_efficiency_this_product_size = 0.0f;
uint16_t best_block_size_this_product_size = 0;
for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
float efficiency_this_entry = 1.0f;
for (auto i = subset.begin(); i != subset.end(); ++i) {
efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency);
}
if (efficiency_this_entry > best_efficiency_this_product_size) {
best_efficiency_this_product_size = efficiency_this_entry;
best_block_size_this_product_size = first_file.entries[e].block_size;
}
}
if ((i++) % NumSizes) {
cout << " ";
} else {
cout << endl << " ";
}
cout << "0x" << hex << best_block_size_this_product_size << dec;
if (entry_index < num_entries) {
cout << ",";
first_entry_index_with_this_product_size = entry_index;
product_size = first_file.entries[entry_index].product_size;
}
}
}
if (i != TableSize) {
cerr << endl << "Wrote " << i << " table entries, expected " << TableSize << endl;
abort();
}
cout << endl << " };" << endl;
cout << " return data;" << endl;
cout << " }" << endl;
cout << "};" << endl;
}
float efficiency_of_partition(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
const vector<vector<size_t>>& partition)
@ -498,6 +578,10 @@ void print_partition(
for (auto file = subset->begin(); file != subset->end(); ++file) {
cout << " " << preprocessed_inputfiles[*file].filename << endl;
}
if (dump_tables) {
cout << " Table:" << endl;
dump_table_for_subset(preprocessed_inputfiles, *subset);
}
}
cout << endl;
}
@ -505,28 +589,23 @@ void print_partition(
struct action_t
{
virtual const char* invokation_name() const { abort(); return nullptr; }
virtual void run(int, char*[]) const { abort(); }
virtual void run(const vector<string>&) const { abort(); }
virtual ~action_t() {}
};
struct partition_action_t : action_t
{
virtual const char* invokation_name() const { return "partition"; }
virtual void run(int argc, char *argv[]) const
virtual const char* invokation_name() const override { return "partition"; }
virtual void run(const vector<string>& input_filenames) const override
{
vector<preprocessed_inputfile_t> preprocessed_inputfiles;
if (!argc) {
if (input_filenames.empty()) {
cerr << "The " << invokation_name() << " action needs a list of input files." << endl;
exit(1);
}
vector<string> inputfilenames;
for (int i = 0; i < argc; i++) {
inputfilenames.emplace_back(argv[i]);
}
for (auto it = inputfilenames.begin(); it != inputfilenames.end(); ++it) {
for (auto it = input_filenames.begin(); it != input_filenames.end(); ++it) {
inputfile_t inputfile(*it);
switch (inputfile.type) {
case inputfile_t::type_t::all_pot_sizes:
@ -610,7 +689,7 @@ struct evaluate_defaults_action_t : action_t
static bool lower_efficiency(const results_entry_t& e1, const results_entry_t& e2) {
return e1.default_efficiency < e2.default_efficiency;
}
virtual const char* invokation_name() const { return "evaluate-defaults"; }
virtual const char* invokation_name() const override { return "evaluate-defaults"; }
void show_usage_and_exit() const
{
cerr << "usage: " << invokation_name() << " default-sizes-data all-pot-sizes-data" << endl;
@ -618,13 +697,13 @@ struct evaluate_defaults_action_t : action_t
<< "performance measured over all POT sizes." << endl;
exit(1);
}
virtual void run(int argc, char *argv[]) const
virtual void run(const vector<string>& input_filenames) const override
{
if (argc != 2) {
if (input_filenames.size() != 2) {
show_usage_and_exit();
}
inputfile_t inputfile_default_sizes(argv[0]);
inputfile_t inputfile_all_pot_sizes(argv[1]);
inputfile_t inputfile_default_sizes(input_filenames[0]);
inputfile_t inputfile_all_pot_sizes(input_filenames[1]);
if (inputfile_default_sizes.type != inputfile_t::type_t::default_sizes) {
cerr << inputfile_default_sizes.filename << " is not an input file with default sizes." << endl;
show_usage_and_exit();
@ -719,7 +798,7 @@ struct evaluate_defaults_action_t : action_t
void show_usage_and_exit(int argc, char* argv[],
const vector<unique_ptr<action_t>>& available_actions)
{
cerr << "usage: " << argv[0] << " <action> <input files...>" << endl;
cerr << "usage: " << argv[0] << " <action> [options...] <input files...>" << endl;
cerr << "available actions:" << endl;
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
cerr << " " << (*it)->invokation_name() << endl;
@ -737,21 +816,61 @@ int main(int argc, char* argv[])
available_actions.emplace_back(new partition_action_t);
available_actions.emplace_back(new evaluate_defaults_action_t);
auto action = available_actions.end();
vector<string> input_filenames;
action_t* action = nullptr;
if (argc < 2) {
show_usage_and_exit(argc, argv, available_actions);
}
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
if (!strcmp(argv[1], (*it)->invokation_name())) {
action = it;
break;
for (int i = 1; i < argc; i++) {
bool arg_handled = false;
// Step 1. Try to match action invokation names.
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
if (!strcmp(argv[i], (*it)->invokation_name())) {
if (!action) {
action = it->get();
arg_handled = true;
break;
} else {
cerr << "can't specify more than one action!" << endl;
show_usage_and_exit(argc, argv, available_actions);
}
}
}
if (arg_handled) {
continue;
}
// Step 2. Try to match option names.
if (argv[i][0] == '-') {
if (!strcmp(argv[i], "--only-cubic-sizes")) {
only_cubic_sizes = true;
arg_handled = true;
}
if (!strcmp(argv[i], "--dump-tables")) {
dump_tables = true;
arg_handled = true;
}
if (!arg_handled) {
cerr << "Unrecognized option: " << argv[i] << endl;
show_usage_and_exit(argc, argv, available_actions);
}
}
if (arg_handled) {
continue;
}
// Step 3. Default to interpreting args as input filenames.
input_filenames.emplace_back(argv[i]);
}
if (action == available_actions.end()) {
if (dump_tables && only_cubic_sizes) {
cerr << "Incompatible options: --only-cubic-sizes and --dump-tables." << endl;
show_usage_and_exit(argc, argv, available_actions);
}
(*action)->run(argc - 2, argv + 2);
if (!action) {
show_usage_and_exit(argc, argv, available_actions);
}
action->run(input_filenames);
}

View File

@ -446,7 +446,7 @@ void try_run_some_benchmarks(
unsigned int seconds_to_sleep_if_lower_clock_speed = 1;
while (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
if (seconds_to_sleep_if_lower_clock_speed > 30) {
if (seconds_to_sleep_if_lower_clock_speed > 32) {
cerr << "Sleeping longer probably won't make a difference." << endl;
cerr << "Serializing benchmarks to " << session_filename << endl;
serialize_benchmarks(session_filename, benchmarks, first_benchmark_to_run);
@ -456,7 +456,7 @@ void try_run_some_benchmarks(
rerun_last_tests = true;
cerr << "Sleeping "
<< seconds_to_sleep_if_lower_clock_speed
<< " s..." << endl;
<< " s... \r" << endl;
sleep(seconds_to_sleep_if_lower_clock_speed);
current_clock_speed = measure_clock_speed();
seconds_to_sleep_if_lower_clock_speed *= 2;

View File

@ -41,3 +41,5 @@ before-evaluators
6981:7e5d6f78da59 # dynamic loop swapping
6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache
6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1.
7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5)

View File

@ -26,7 +26,7 @@ macro(_metis_check_version)
string(REGEX MATCH "define[ \t]+METIS_VER_SUBMINOR[ \t]+([0-9]+)" _metis_subminor_version_match "${_metis_version_header}")
set(METIS_SUBMINOR_VERSION "${CMAKE_MATCH_1}")
if(NOT METIS_MAJOR_VERSION)
message(WARNING "Could not determine Metis version. Assuming version 4.0.0")
message(STATUS "Could not determine Metis version. Assuming version 4.0.0")
set(METIS_VERSION 4.0.0)
else()
set(METIS_VERSION ${METIS_MAJOR_VERSION}.${METIS_MINOR_VERSION}.${METIS_SUBMINOR_VERSION})

View File

@ -10,9 +10,10 @@ if(QT4_FOUND)
target_link_libraries(Tutorial_sparse_example ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO} ${QT_QTCORE_LIBRARY} ${QT_QTGUI_LIBRARY})
add_custom_command(
TARGET Tutorial_sparse_example
POST_BUILD
COMMAND Tutorial_sparse_example ARGS ${CMAKE_CURRENT_BINARY_DIR}/../html/Tutorial_sparse_example.jpeg
TARGET Tutorial_sparse_example
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/../html/
COMMAND Tutorial_sparse_example ARGS ${CMAKE_CURRENT_BINARY_DIR}/../html/Tutorial_sparse_example.jpeg
)
add_dependencies(all_examples Tutorial_sparse_example)

View File

@ -47,6 +47,18 @@ ei_add_failtest("sparse_ref_3")
ei_add_failtest("sparse_ref_4")
ei_add_failtest("sparse_ref_5")
ei_add_failtest("partialpivlu_int")
ei_add_failtest("fullpivlu_int")
ei_add_failtest("llt_int")
ei_add_failtest("ldlt_int")
ei_add_failtest("qr_int")
ei_add_failtest("colpivqr_int")
ei_add_failtest("fullpivqr_int")
ei_add_failtest("jacobisvd_int")
ei_add_failtest("bdcsvd_int")
ei_add_failtest("eigensolver_int")
ei_add_failtest("eigensolver_cplx")
if (EIGEN_FAILTEST_FAILURE_COUNT)
message(FATAL_ERROR
"${EIGEN_FAILTEST_FAILURE_COUNT} out of ${EIGEN_FAILTEST_COUNT} failtests FAILED. "

14
failtest/bdcsvd_int.cpp Normal file
View File

@ -0,0 +1,14 @@
#include "../Eigen/SVD"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
BDCSVD<Matrix<SCALAR,Dynamic,Dynamic> > qr(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

14
failtest/colpivqr_int.cpp Normal file
View File

@ -0,0 +1,14 @@
#include "../Eigen/QR"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
ColPivHouseholderQR<Matrix<SCALAR,Dynamic,Dynamic> > qr(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

View File

@ -0,0 +1,14 @@
#include "../Eigen/Eigenvalues"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR std::complex<double>
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
EigenSolver<Matrix<SCALAR,Dynamic,Dynamic> > eig(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

View File

@ -0,0 +1,14 @@
#include "../Eigen/Eigenvalues"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
EigenSolver<Matrix<SCALAR,Dynamic,Dynamic> > eig(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

View File

@ -0,0 +1,14 @@
#include "../Eigen/LU"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
FullPivLU<Matrix<SCALAR,Dynamic,Dynamic> > lu(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

View File

@ -0,0 +1,14 @@
#include "../Eigen/QR"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
FullPivHouseholderQR<Matrix<SCALAR,Dynamic,Dynamic> > qr(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

View File

@ -0,0 +1,14 @@
#include "../Eigen/SVD"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
JacobiSVD<Matrix<SCALAR,Dynamic,Dynamic> > qr(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

14
failtest/ldlt_int.cpp Normal file
View File

@ -0,0 +1,14 @@
#include "../Eigen/Cholesky"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
LDLT<Matrix<SCALAR,Dynamic,Dynamic> > ldlt(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

14
failtest/llt_int.cpp Normal file
View File

@ -0,0 +1,14 @@
#include "../Eigen/Cholesky"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
LLT<Matrix<SCALAR,Dynamic,Dynamic> > llt(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

View File

@ -0,0 +1,14 @@
#include "../Eigen/LU"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
PartialPivLU<Matrix<SCALAR,Dynamic,Dynamic> > lu(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

14
failtest/qr_int.cpp Normal file
View File

@ -0,0 +1,14 @@
#include "../Eigen/QR"
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
#define SCALAR int
#else
#define SCALAR float
#endif
using namespace Eigen;
int main()
{
HouseholderQR<Matrix<SCALAR,Dynamic,Dynamic> > qr(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
}

View File

@ -139,6 +139,7 @@ endif(TEST_LIB)
set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official")
add_custom_target(BuildOfficial)
ei_add_test(rand)
ei_add_test(meta)
ei_add_test(sizeof)
ei_add_test(dynalloc)
@ -226,6 +227,7 @@ ei_add_test(stdvector_overload)
ei_add_test(stdlist)
ei_add_test(stddeque)
ei_add_test(sparse_basic)
ei_add_test(sparse_block)
ei_add_test(sparse_vector)
ei_add_test(sparse_product)
ei_add_test(sparse_ref)
@ -330,3 +332,8 @@ endif(EIGEN_TEST_NVCC)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/failtests)
add_test(NAME failtests WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/failtests COMMAND ${CMAKE_COMMAND} ${Eigen_SOURCE_DIR} -G "${CMAKE_GENERATOR}" -DEIGEN_FAILTEST=ON)
option(EIGEN_TEST_BUILD_DOCUMENTATION "Test building the doxygen documentation" OFF)
IF(EIGEN_TEST_BUILD_DOCUMENTATION)
add_dependencies(buildtests doc)
ENDIF()

View File

@ -24,7 +24,7 @@ template<typename MatrixType> void reverse(const MatrixType& m)
// this test relies a lot on Random.h, and there's not much more that we can do
// to test it, hence I consider that we will have tested Random.h
MatrixType m1 = MatrixType::Random(rows, cols);
MatrixType m1 = MatrixType::Random(rows, cols), m2;
VectorType v1 = VectorType::Random(rows);
MatrixType m1_r = m1.reverse();
@ -96,6 +96,26 @@ template<typename MatrixType> void reverse(const MatrixType& m)
m1.reverse()(r, c) = x;
VERIFY_IS_APPROX(x, m1(rows - 1 - r, cols - 1 - c));
m2 = m1;
m2.reverseInPlace();
VERIFY_IS_APPROX(m2,m1.reverse().eval());
m2 = m1;
m2.col(0).reverseInPlace();
VERIFY_IS_APPROX(m2.col(0),m1.col(0).reverse().eval());
m2 = m1;
m2.row(0).reverseInPlace();
VERIFY_IS_APPROX(m2.row(0),m1.row(0).reverse().eval());
m2 = m1;
m2.rowwise().reverseInPlace();
VERIFY_IS_APPROX(m2,m1.rowwise().reverse().eval());
m2 = m1;
m2.colwise().reverseInPlace();
VERIFY_IS_APPROX(m2,m1.colwise().reverse().eval());
/*
m1.colwise().reverse()(r, c) = x;
@ -113,11 +133,11 @@ void test_array_reverse()
CALL_SUBTEST_2( reverse(Matrix2f()) );
CALL_SUBTEST_3( reverse(Matrix4f()) );
CALL_SUBTEST_4( reverse(Matrix4d()) );
CALL_SUBTEST_5( reverse(MatrixXcf(3, 3)) );
CALL_SUBTEST_6( reverse(MatrixXi(6, 3)) );
CALL_SUBTEST_7( reverse(MatrixXcd(20, 20)) );
CALL_SUBTEST_5( reverse(MatrixXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
CALL_SUBTEST_6( reverse(MatrixXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
CALL_SUBTEST_7( reverse(MatrixXcd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
CALL_SUBTEST_8( reverse(Matrix<float, 100, 100>()) );
CALL_SUBTEST_9( reverse(Matrix<float,Dynamic,Dynamic,RowMajor>(6,3)) );
CALL_SUBTEST_9( reverse(Matrix<float,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
}
#ifdef EIGEN_TEST_PART_3
Vector4f x; x << 1, 2, 3, 4;

View File

@ -17,6 +17,7 @@ template<typename MatrixType> void diagonalmatrices(const MatrixType& m)
typedef Matrix<Scalar, Rows, 1> VectorType;
typedef Matrix<Scalar, 1, Cols> RowVectorType;
typedef Matrix<Scalar, Rows, Rows> SquareMatrixType;
typedef Matrix<Scalar, Dynamic, Dynamic> DynMatrixType;
typedef DiagonalMatrix<Scalar, Rows> LeftDiagonalMatrix;
typedef DiagonalMatrix<Scalar, Cols> RightDiagonalMatrix;
typedef Matrix<Scalar, Rows==Dynamic?Dynamic:2*Rows, Cols==Dynamic?Dynamic:2*Cols> BigMatrix;
@ -64,6 +65,13 @@ template<typename MatrixType> void diagonalmatrices(const MatrixType& m)
VERIFY_IS_APPROX( (((v1+v2).asDiagonal() * (m1+m2))(i,j)) , (v1+v2)(i) * (m1+m2)(i,j) );
VERIFY_IS_APPROX( ((m1 * (rv1+rv2).asDiagonal())(i,j)) , (rv1+rv2)(j) * m1(i,j) );
VERIFY_IS_APPROX( (((m1+m2) * (rv1+rv2).asDiagonal())(i,j)) , (rv1+rv2)(j) * (m1+m2)(i,j) );
if(rows>1)
{
DynMatrixType tmp = m1.topRows(rows/2), res;
VERIFY_IS_APPROX( (res = m1.topRows(rows/2) * rv1.asDiagonal()), tmp * rv1.asDiagonal() );
VERIFY_IS_APPROX( (res = v1.head(rows/2).asDiagonal()*m1.topRows(rows/2)), v1.head(rows/2).asDiagonal()*tmp );
}
BigMatrix big;
big.setZero(2*rows, 2*cols);
@ -93,6 +101,17 @@ template<typename MatrixType> void diagonalmatrices(const MatrixType& m)
VERIFY_IS_APPROX( (sq_m1 = (s1*v1).asDiagonal()), (s1*v1).asDiagonal().toDenseMatrix() );
}
template<int>
void bug987()
{
Matrix3Xd points = Matrix3Xd::Random(3, 3);
Vector2d diag = Vector2d::Random();
Matrix2Xd tmp1 = points.topRows<2>(), res1, res2;
VERIFY_IS_APPROX( res1 = diag.asDiagonal() * points.topRows<2>(), res2 = diag.asDiagonal() * tmp1 );
Matrix2d tmp2 = points.topLeftCorner<2,2>();
VERIFY_IS_APPROX(( res1 = points.topLeftCorner<2,2>()*diag.asDiagonal()) , res2 = tmp2*diag.asDiagonal() );
}
void test_diagonalmatrices()
{
for(int i = 0; i < g_repeat; i++) {
@ -106,4 +125,5 @@ void test_diagonalmatrices()
CALL_SUBTEST_8( diagonalmatrices(Matrix<double,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
CALL_SUBTEST_9( diagonalmatrices(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
}
CALL_SUBTEST_10( bug987<0>() );
}

View File

@ -95,6 +95,9 @@
namespace Eigen
{
static std::vector<std::string> g_test_stack;
// level == 0 <=> abort if test fail
// level >= 1 <=> warning message to std::cerr if test fail
static int g_test_level = 0;
static int g_repeat;
static unsigned int g_seed;
static bool g_has_set_repeat, g_has_set_seed;
@ -229,6 +232,8 @@ inline void verify_impl(bool condition, const char *testname, const char *file,
{
if (!condition)
{
if(Eigen::g_test_level>0)
std::cerr << "WARNING: ";
std::cerr << "Test " << testname << " failed in " << file << " (" << line << ")"
<< std::endl << " " << condition_as_string << std::endl;
std::cerr << "Stack:\n";
@ -236,7 +241,8 @@ inline void verify_impl(bool condition, const char *testname, const char *file,
for(int i=test_stack_size-1; i>=0; --i)
std::cerr << " - " << Eigen::g_test_stack[i] << "\n";
std::cerr << "\n";
abort();
if(Eigen::g_test_level==0)
abort();
}
}

View File

@ -113,6 +113,9 @@ void mat_mat_scalar_scalar_product()
template <typename MatrixType>
void zero_sized_objects(const MatrixType& m)
{
typedef typename MatrixType::Scalar Scalar;
const int PacketSize = internal::packet_traits<Scalar>::size;
const int PacketSize1 = PacketSize>1 ? PacketSize-1 : 1;
Index rows = m.rows();
Index cols = m.cols();
@ -132,9 +135,41 @@ void zero_sized_objects(const MatrixType& m)
res = b*a;
VERIFY(res.rows()==0 && res.cols()==cols);
}
{
Matrix<Scalar,PacketSize,0> a;
Matrix<Scalar,0,1> b;
Matrix<Scalar,PacketSize,1> res;
VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(PacketSize,1) );
VERIFY_IS_APPROX( (res=a.lazyProduct(b)), MatrixType::Zero(PacketSize,1) );
}
{
Matrix<Scalar,PacketSize1,0> a;
Matrix<Scalar,0,1> b;
Matrix<Scalar,PacketSize1,1> res;
VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(PacketSize1,1) );
VERIFY_IS_APPROX( (res=a.lazyProduct(b)), MatrixType::Zero(PacketSize1,1) );
}
{
Matrix<Scalar,PacketSize,Dynamic> a(PacketSize,0);
Matrix<Scalar,Dynamic,1> b(0,1);
Matrix<Scalar,PacketSize,1> res;
VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(PacketSize,1) );
VERIFY_IS_APPROX( (res=a.lazyProduct(b)), MatrixType::Zero(PacketSize,1) );
}
{
Matrix<Scalar,PacketSize1,Dynamic> a(PacketSize1,0);
Matrix<Scalar,Dynamic,1> b(0,1);
Matrix<Scalar,PacketSize1,1> res;
VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(PacketSize1,1) );
VERIFY_IS_APPROX( (res=a.lazyProduct(b)), MatrixType::Zero(PacketSize1,1) );
}
}
template<int>
void bug_127()
{
// Bug 127
@ -159,6 +194,7 @@ void bug_127()
a*b;
}
template<int>
void unaligned_objects()
{
// Regression test for the bug reported here:
@ -188,6 +224,29 @@ void unaligned_objects()
}
}
template<typename T>
EIGEN_DONT_INLINE
Index test_compute_block_size(Index m, Index n, Index k)
{
Index mc(m), nc(n), kc(k);
internal::computeProductBlockingSizes<T,T>(kc, mc, nc);
return kc+mc+nc;
}
template<typename T>
Index compute_block_size()
{
Index ret = 0;
ret += test_compute_block_size<T>(0,1,1);
ret += test_compute_block_size<T>(1,0,1);
ret += test_compute_block_size<T>(1,1,0);
ret += test_compute_block_size<T>(0,0,1);
ret += test_compute_block_size<T>(0,1,0);
ret += test_compute_block_size<T>(1,0,0);
ret += test_compute_block_size<T>(0,0,0);
return ret;
}
void test_product_extra()
{
for(int i = 0; i < g_repeat; i++) {
@ -198,6 +257,9 @@ void test_product_extra()
CALL_SUBTEST_4( product_extra(MatrixXcd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
CALL_SUBTEST_1( zero_sized_objects(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
}
CALL_SUBTEST_5( bug_127() );
CALL_SUBTEST_6( unaligned_objects() );
CALL_SUBTEST_5( bug_127<0>() );
CALL_SUBTEST_6( unaligned_objects<0>() );
CALL_SUBTEST_7( compute_block_size<float>() );
CALL_SUBTEST_7( compute_block_size<double>() );
CALL_SUBTEST_7( compute_block_size<std::complex<double> >() );
}

88
test/rand.cpp Normal file
View File

@ -0,0 +1,88 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
template<typename Scalar> Scalar check_in_range(Scalar x, Scalar y)
{
Scalar r = internal::random<Scalar>(x,y);
VERIFY(r>=x);
if(y>=x)
{
VERIFY(r<=y);
}
return r;
}
template<typename Scalar> void check_all_in_range(Scalar x, Scalar y)
{
Array<int,1,Dynamic> mask(y-x+1);
mask.fill(0);
long n = (y-x+1)*32;
for(long k=0; k<n; ++k)
{
mask( check_in_range(x,y)-x )++;
}
VERIFY( (mask>0).all() );
}
void test_rand()
{
long long_ref = NumTraits<long>::highest()/10;
char char_offset = (std::min)(g_repeat,64);
char short_offset = (std::min)(g_repeat,16000);
for(int i = 0; i < g_repeat*10; i++) {
CALL_SUBTEST(check_in_range<float>(10,11));
CALL_SUBTEST(check_in_range<float>(1.24234523,1.24234523));
CALL_SUBTEST(check_in_range<float>(-1,1));
CALL_SUBTEST(check_in_range<float>(-1432.2352,-1432.2352));
CALL_SUBTEST(check_in_range<double>(10,11));
CALL_SUBTEST(check_in_range<double>(1.24234523,1.24234523));
CALL_SUBTEST(check_in_range<double>(-1,1));
CALL_SUBTEST(check_in_range<double>(-1432.2352,-1432.2352));
CALL_SUBTEST(check_in_range<int>(0,-1));
CALL_SUBTEST(check_in_range<short>(0,-1));
CALL_SUBTEST(check_in_range<long>(0,-1));
CALL_SUBTEST(check_in_range<int>(-673456,673456));
CALL_SUBTEST(check_in_range<short>(-24345,24345));
CALL_SUBTEST(check_in_range<long>(-long_ref,long_ref));
}
CALL_SUBTEST(check_all_in_range<char>(11,11));
CALL_SUBTEST(check_all_in_range<char>(11,11+char_offset));
CALL_SUBTEST(check_all_in_range<char>(-5,5));
CALL_SUBTEST(check_all_in_range<char>(-11-char_offset,-11));
CALL_SUBTEST(check_all_in_range<char>(-126,-126+char_offset));
CALL_SUBTEST(check_all_in_range<char>(126-char_offset,126));
CALL_SUBTEST(check_all_in_range<char>(-126,126));
CALL_SUBTEST(check_all_in_range<short>(11,11));
CALL_SUBTEST(check_all_in_range<short>(11,11+short_offset));
CALL_SUBTEST(check_all_in_range<short>(-5,5));
CALL_SUBTEST(check_all_in_range<short>(-11-short_offset,-11));
CALL_SUBTEST(check_all_in_range<short>(-24345,-24345+short_offset));
CALL_SUBTEST(check_all_in_range<short>(24345,24345+short_offset));
CALL_SUBTEST(check_all_in_range<int>(11,11));
CALL_SUBTEST(check_all_in_range<int>(11,11+g_repeat));
CALL_SUBTEST(check_all_in_range<int>(-5,5));
CALL_SUBTEST(check_all_in_range<int>(-11-g_repeat,-11));
CALL_SUBTEST(check_all_in_range<int>(-673456,-673456+g_repeat));
CALL_SUBTEST(check_all_in_range<int>(673456,673456+g_repeat));
CALL_SUBTEST(check_all_in_range<long>(11,11));
CALL_SUBTEST(check_all_in_range<long>(11,11+g_repeat));
CALL_SUBTEST(check_all_in_range<long>(-5,5));
CALL_SUBTEST(check_all_in_range<long>(-11-g_repeat,-11));
CALL_SUBTEST(check_all_in_range<long>(-long_ref,-long_ref+g_repeat));
CALL_SUBTEST(check_all_in_range<long>( long_ref, long_ref+g_repeat));
}

View File

@ -25,6 +25,22 @@ template<typename MatrixType> void real_qz(const MatrixType& m)
MatrixType A = MatrixType::Random(dim,dim),
B = MatrixType::Random(dim,dim);
// Regression test for bug 985: Randomly set rows or columns to zero
Index k=internal::random<Index>(0, dim-1);
switch(internal::random<int>(0,10)) {
case 0:
A.row(k).setZero(); break;
case 1:
A.col(k).setZero(); break;
case 2:
B.row(k).setZero(); break;
case 3:
B.col(k).setZero(); break;
default:
break;
}
RealQZ<MatrixType> qz(A,B);
VERIFY_IS_EQUAL(qz.info(), Success);

View File

@ -58,48 +58,6 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
VERIFY_IS_APPROX(m, refMat);
// test InnerIterators and Block expressions
for (Index t=0; t<10; ++t)
{
Index j = internal::random<Index>(0,cols-1);
Index i = internal::random<Index>(0,rows-1);
Index w = internal::random<Index>(1,cols-j-1);
Index h = internal::random<Index>(1,rows-i-1);
VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w));
for(Index c=0; c<w; c++)
{
VERIFY_IS_APPROX(m.block(i,j,h,w).col(c), refMat.block(i,j,h,w).col(c));
for(Index r=0; r<h; r++)
{
// FIXME col().coeff() not implemented yet
// VERIFY_IS_APPROX(m.block(i,j,h,w).col(c).coeff(r), refMat.block(i,j,h,w).col(c).coeff(r));
}
}
for(Index r=0; r<h; r++)
{
VERIFY_IS_APPROX(m.block(i,j,h,w).row(r), refMat.block(i,j,h,w).row(r));
for(Index c=0; c<w; c++)
{
// FIXME row().coeff() not implemented yet
// VERIFY_IS_APPROX(m.block(i,j,h,w).row(r).coeff(c), refMat.block(i,j,h,w).row(r).coeff(c));
}
}
}
for(Index c=0; c<cols; c++)
{
VERIFY_IS_APPROX(m.col(c) + m.col(c), (m + m).col(c));
VERIFY_IS_APPROX(m.col(c) + m.col(c), refMat.col(c) + refMat.col(c));
}
for(Index r=0; r<rows; r++)
{
VERIFY_IS_APPROX(m.row(r) + m.row(r), (m + m).row(r));
VERIFY_IS_APPROX(m.row(r) + m.row(r), refMat.row(r) + refMat.row(r));
}
// test assertion
VERIFY_RAISES_ASSERT( m.coeffRef(-1,1) = 0 );
VERIFY_RAISES_ASSERT( m.coeffRef(0,m.cols()) = 0 );
@ -184,82 +142,6 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
VERIFY_IS_APPROX(m2,m1);
}
// test innerVector()
{
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
Index j0 = internal::random<Index>(0,outer-1);
Index j1 = internal::random<Index>(0,outer-1);
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.row(j0));
else
VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.col(j0));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.row(j0)+refMat2.row(j1));
else
VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.col(j0)+refMat2.col(j1));
SparseMatrixType m3(rows,cols);
m3.reserve(VectorXi::Constant(outer,int(inner/2)));
for(Index j=0; j<outer; ++j)
for(Index k=0; k<(std::min)(j,inner); ++k)
m3.insertByOuterInner(j,k) = k+1;
for(Index j=0; j<(std::min)(outer, inner); ++j)
{
VERIFY(j==numext::real(m3.innerVector(j).nonZeros()));
if(j>0)
VERIFY(j==numext::real(m3.innerVector(j).lastCoeff()));
}
m3.makeCompressed();
for(Index j=0; j<(std::min)(outer, inner); ++j)
{
VERIFY(j==numext::real(m3.innerVector(j).nonZeros()));
if(j>0)
VERIFY(j==numext::real(m3.innerVector(j).lastCoeff()));
}
VERIFY(m3.innerVector(j0).nonZeros() == m3.transpose().innerVector(j0).nonZeros());
// m2.innerVector(j0) = 2*m2.innerVector(j1);
// refMat2.col(j0) = 2*refMat2.col(j1);
// VERIFY_IS_APPROX(m2, refMat2);
}
// test innerVectors()
{
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
if(internal::random<float>(0,1)>0.5) m2.makeCompressed();
Index j0 = internal::random<Index>(0,outer-2);
Index j1 = internal::random<Index>(0,outer-2);
Index n0 = internal::random<Index>(1,outer-(std::max)(j0,j1));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(j0,0,n0,cols));
else
VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(0,j0,rows,n0));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0),
refMat2.middleRows(j0,n0)+refMat2.middleRows(j1,n0));
else
VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0),
refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0));
VERIFY_IS_APPROX(m2, refMat2);
VERIFY(m2.innerVectors(j0,n0).nonZeros() == m2.transpose().innerVectors(j0,n0).nonZeros());
m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0);
if(SparseMatrixType::IsRowMajor)
refMat2.middleRows(j0,n0) = (refMat2.middleRows(j0,n0) + refMat2.middleRows(j1,n0)).eval();
else
refMat2.middleCols(j0,n0) = (refMat2.middleCols(j0,n0) + refMat2.middleCols(j1,n0)).eval();
VERIFY_IS_APPROX(m2, refMat2);
}
// test basic computations
{
DenseMatrix refM1 = DenseMatrix::Zero(rows, cols);
@ -330,40 +212,6 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
VERIFY(m2.isApprox(m3));
}
// test generic blocks
{
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
Index j0 = internal::random<Index>(0,outer-2);
Index j1 = internal::random<Index>(0,outer-2);
Index n0 = internal::random<Index>(1,outer-(std::max)(j0,j1));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.block(j0,0,n0,cols), refMat2.block(j0,0,n0,cols));
else
VERIFY_IS_APPROX(m2.block(0,j0,rows,n0), refMat2.block(0,j0,rows,n0));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.block(j0,0,n0,cols)+m2.block(j1,0,n0,cols),
refMat2.block(j0,0,n0,cols)+refMat2.block(j1,0,n0,cols));
else
VERIFY_IS_APPROX(m2.block(0,j0,rows,n0)+m2.block(0,j1,rows,n0),
refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0));
Index i = internal::random<Index>(0,m2.outerSize()-1);
if(SparseMatrixType::IsRowMajor) {
m2.innerVector(i) = m2.innerVector(i) * s1;
refMat2.row(i) = refMat2.row(i) * s1;
VERIFY_IS_APPROX(m2,refMat2);
} else {
m2.innerVector(i) = m2.innerVector(i) * s1;
refMat2.col(i) = refMat2.col(i) * s1;
VERIFY_IS_APPROX(m2,refMat2);
}
}
// test prune
{
SparseMatrixType m2(rows, cols);
@ -602,8 +450,8 @@ void test_sparse_basic()
CALL_SUBTEST_2(( sparse_basic(SparseMatrix<std::complex<double>, ColMajor>(r, c)) ));
CALL_SUBTEST_2(( sparse_basic(SparseMatrix<std::complex<double>, RowMajor>(r, c)) ));
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double>(r, c)) ));
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,ColMajor,long int>(r, c)) ));
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,RowMajor,long int>(r, c)) ));
CALL_SUBTEST_5(( sparse_basic(SparseMatrix<double,ColMajor,long int>(r, c)) ));
CALL_SUBTEST_5(( sparse_basic(SparseMatrix<double,RowMajor,long int>(r, c)) ));
r = Eigen::internal::random<int>(1,100);
c = Eigen::internal::random<int>(1,100);
@ -611,8 +459,8 @@ void test_sparse_basic()
r = c; // check square matrices in 25% of tries
}
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,ColMajor,short int>(short(r), short(c))) ));
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,RowMajor,short int>(short(r), short(c))) ));
CALL_SUBTEST_6(( sparse_basic(SparseMatrix<double,ColMajor,short int>(short(r), short(c))) ));
CALL_SUBTEST_6(( sparse_basic(SparseMatrix<double,RowMajor,short int>(short(r), short(c))) ));
}
// Regression test for bug 900: (manually insert higher values here, if you have enough RAM):

254
test/sparse_block.cpp Normal file
View File

@ -0,0 +1,254 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "sparse.h"
template<typename SparseMatrixType> void sparse_block(const SparseMatrixType& ref)
{
const Index rows = ref.rows();
const Index cols = ref.cols();
const Index inner = ref.innerSize();
const Index outer = ref.outerSize();
typedef typename SparseMatrixType::Scalar Scalar;
double density = (std::max)(8./(rows*cols), 0.01);
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
typedef Matrix<Scalar,Dynamic,1> DenseVector;
typedef Matrix<Scalar,1,Dynamic> RowDenseVector;
Scalar s1 = internal::random<Scalar>();
{
SparseMatrixType m(rows, cols);
DenseMatrix refMat = DenseMatrix::Zero(rows, cols);
initSparse<Scalar>(density, refMat, m);
VERIFY_IS_APPROX(m, refMat);
// test InnerIterators and Block expressions
for (int t=0; t<10; ++t)
{
Index j = internal::random<Index>(0,cols-2);
Index i = internal::random<Index>(0,rows-2);
Index w = internal::random<Index>(1,cols-j);
Index h = internal::random<Index>(1,rows-i);
VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w));
for(Index c=0; c<w; c++)
{
VERIFY_IS_APPROX(m.block(i,j,h,w).col(c), refMat.block(i,j,h,w).col(c));
for(Index r=0; r<h; r++)
{
VERIFY_IS_APPROX(m.block(i,j,h,w).col(c).coeff(r), refMat.block(i,j,h,w).col(c).coeff(r));
VERIFY_IS_APPROX(m.block(i,j,h,w).coeff(r,c), refMat.block(i,j,h,w).coeff(r,c));
}
}
for(Index r=0; r<h; r++)
{
VERIFY_IS_APPROX(m.block(i,j,h,w).row(r), refMat.block(i,j,h,w).row(r));
for(Index c=0; c<w; c++)
{
VERIFY_IS_APPROX(m.block(i,j,h,w).row(r).coeff(c), refMat.block(i,j,h,w).row(r).coeff(c));
VERIFY_IS_APPROX(m.block(i,j,h,w).coeff(r,c), refMat.block(i,j,h,w).coeff(r,c));
}
}
VERIFY_IS_APPROX(m.middleCols(j,w), refMat.middleCols(j,w));
VERIFY_IS_APPROX(m.middleRows(i,h), refMat.middleRows(i,h));
for(Index r=0; r<h; r++)
{
VERIFY_IS_APPROX(m.middleCols(j,w).row(r), refMat.middleCols(j,w).row(r));
VERIFY_IS_APPROX(m.middleRows(i,h).row(r), refMat.middleRows(i,h).row(r));
for(Index c=0; c<w; c++)
{
VERIFY_IS_APPROX(m.col(c).coeff(r), refMat.col(c).coeff(r));
VERIFY_IS_APPROX(m.row(r).coeff(c), refMat.row(r).coeff(c));
VERIFY_IS_APPROX(m.middleCols(j,w).coeff(r,c), refMat.middleCols(j,w).coeff(r,c));
VERIFY_IS_APPROX(m.middleRows(i,h).coeff(r,c), refMat.middleRows(i,h).coeff(r,c));
if(m.middleCols(j,w).coeff(r,c) != Scalar(0))
{
VERIFY_IS_APPROX(m.middleCols(j,w).coeffRef(r,c), refMat.middleCols(j,w).coeff(r,c));
}
if(m.middleRows(i,h).coeff(r,c) != Scalar(0))
{
VERIFY_IS_APPROX(m.middleRows(i,h).coeff(r,c), refMat.middleRows(i,h).coeff(r,c));
}
}
}
for(Index c=0; c<w; c++)
{
VERIFY_IS_APPROX(m.middleCols(j,w).col(c), refMat.middleCols(j,w).col(c));
VERIFY_IS_APPROX(m.middleRows(i,h).col(c), refMat.middleRows(i,h).col(c));
}
}
for(Index c=0; c<cols; c++)
{
VERIFY_IS_APPROX(m.col(c) + m.col(c), (m + m).col(c));
VERIFY_IS_APPROX(m.col(c) + m.col(c), refMat.col(c) + refMat.col(c));
}
for(Index r=0; r<rows; r++)
{
VERIFY_IS_APPROX(m.row(r) + m.row(r), (m + m).row(r));
VERIFY_IS_APPROX(m.row(r) + m.row(r), refMat.row(r) + refMat.row(r));
}
}
// test innerVector()
{
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
Index j0 = internal::random<Index>(0,outer-1);
Index j1 = internal::random<Index>(0,outer-1);
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.row(j0));
else
VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.col(j0));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.row(j0)+refMat2.row(j1));
else
VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.col(j0)+refMat2.col(j1));
SparseMatrixType m3(rows,cols);
m3.reserve(VectorXi::Constant(outer,int(inner/2)));
for(Index j=0; j<outer; ++j)
for(Index k=0; k<(std::min)(j,inner); ++k)
m3.insertByOuterInner(j,k) = k+1;
for(Index j=0; j<(std::min)(outer, inner); ++j)
{
VERIFY(j==numext::real(m3.innerVector(j).nonZeros()));
if(j>0)
VERIFY(j==numext::real(m3.innerVector(j).lastCoeff()));
}
m3.makeCompressed();
for(Index j=0; j<(std::min)(outer, inner); ++j)
{
VERIFY(j==numext::real(m3.innerVector(j).nonZeros()));
if(j>0)
VERIFY(j==numext::real(m3.innerVector(j).lastCoeff()));
}
VERIFY(m3.innerVector(j0).nonZeros() == m3.transpose().innerVector(j0).nonZeros());
// m2.innerVector(j0) = 2*m2.innerVector(j1);
// refMat2.col(j0) = 2*refMat2.col(j1);
// VERIFY_IS_APPROX(m2, refMat2);
}
// test innerVectors()
{
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
if(internal::random<float>(0,1)>0.5) m2.makeCompressed();
Index j0 = internal::random<Index>(0,outer-2);
Index j1 = internal::random<Index>(0,outer-2);
Index n0 = internal::random<Index>(1,outer-(std::max)(j0,j1));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(j0,0,n0,cols));
else
VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(0,j0,rows,n0));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0),
refMat2.middleRows(j0,n0)+refMat2.middleRows(j1,n0));
else
VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0),
refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0));
VERIFY_IS_APPROX(m2, refMat2);
VERIFY(m2.innerVectors(j0,n0).nonZeros() == m2.transpose().innerVectors(j0,n0).nonZeros());
m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0);
if(SparseMatrixType::IsRowMajor)
refMat2.middleRows(j0,n0) = (refMat2.middleRows(j0,n0) + refMat2.middleRows(j1,n0)).eval();
else
refMat2.middleCols(j0,n0) = (refMat2.middleCols(j0,n0) + refMat2.middleCols(j1,n0)).eval();
VERIFY_IS_APPROX(m2, refMat2);
}
// test generic blocks
{
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
Index j0 = internal::random<Index>(0,outer-2);
Index j1 = internal::random<Index>(0,outer-2);
Index n0 = internal::random<Index>(1,outer-(std::max)(j0,j1));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.block(j0,0,n0,cols), refMat2.block(j0,0,n0,cols));
else
VERIFY_IS_APPROX(m2.block(0,j0,rows,n0), refMat2.block(0,j0,rows,n0));
if(SparseMatrixType::IsRowMajor)
VERIFY_IS_APPROX(m2.block(j0,0,n0,cols)+m2.block(j1,0,n0,cols),
refMat2.block(j0,0,n0,cols)+refMat2.block(j1,0,n0,cols));
else
VERIFY_IS_APPROX(m2.block(0,j0,rows,n0)+m2.block(0,j1,rows,n0),
refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0));
Index i = internal::random<Index>(0,m2.outerSize()-1);
if(SparseMatrixType::IsRowMajor) {
m2.innerVector(i) = m2.innerVector(i) * s1;
refMat2.row(i) = refMat2.row(i) * s1;
VERIFY_IS_APPROX(m2,refMat2);
} else {
m2.innerVector(i) = m2.innerVector(i) * s1;
refMat2.col(i) = refMat2.col(i) * s1;
VERIFY_IS_APPROX(m2,refMat2);
}
Index r0 = internal::random<Index>(0,rows-2);
Index c0 = internal::random<Index>(0,cols-2);
Index r1 = internal::random<Index>(1,rows-r0);
Index c1 = internal::random<Index>(1,cols-c0);
VERIFY_IS_APPROX(DenseVector(m2.col(c0)), refMat2.col(c0));
VERIFY_IS_APPROX(m2.col(c0), refMat2.col(c0));
VERIFY_IS_APPROX(RowDenseVector(m2.row(r0)), refMat2.row(r0));
VERIFY_IS_APPROX(m2.row(r0), refMat2.row(r0));
VERIFY_IS_APPROX(m2.block(r0,c0,r1,c1), refMat2.block(r0,c0,r1,c1));
VERIFY_IS_APPROX((2*m2).block(r0,c0,r1,c1), (2*refMat2).block(r0,c0,r1,c1));
}
}
void test_sparse_block()
{
for(int i = 0; i < g_repeat; i++) {
int r = Eigen::internal::random<int>(1,200), c = Eigen::internal::random<int>(1,200);
if(Eigen::internal::random<int>(0,4) == 0) {
r = c; // check square matrices in 25% of tries
}
EIGEN_UNUSED_VARIABLE(r+c);
CALL_SUBTEST_1(( sparse_block(SparseMatrix<double>(1, 1)) ));
CALL_SUBTEST_1(( sparse_block(SparseMatrix<double>(8, 8)) ));
CALL_SUBTEST_1(( sparse_block(SparseMatrix<double>(r, c)) ));
CALL_SUBTEST_2(( sparse_block(SparseMatrix<std::complex<double>, ColMajor>(r, c)) ));
CALL_SUBTEST_2(( sparse_block(SparseMatrix<std::complex<double>, RowMajor>(r, c)) ));
CALL_SUBTEST_3(( sparse_block(SparseMatrix<double,ColMajor,long int>(r, c)) ));
CALL_SUBTEST_3(( sparse_block(SparseMatrix<double,RowMajor,long int>(r, c)) ));
r = Eigen::internal::random<int>(1,100);
c = Eigen::internal::random<int>(1,100);
if(Eigen::internal::random<int>(0,4) == 0) {
r = c; // check square matrices in 25% of tries
}
CALL_SUBTEST_4(( sparse_block(SparseMatrix<double,ColMajor,short int>(short(r), short(c))) ));
CALL_SUBTEST_4(( sparse_block(SparseMatrix<double,RowMajor,short int>(short(r), short(c))) ));
}
}

View File

@ -67,6 +67,9 @@ template<typename SparseMatrixType> void sparse_product()
VERIFY_IS_APPROX(m4 = m2*m3/s1, refMat4 = refMat2*refMat3/s1);
VERIFY_IS_APPROX(m4 = m2*m3*s1, refMat4 = refMat2*refMat3*s1);
VERIFY_IS_APPROX(m4 = s2*m2*m3*s1, refMat4 = s2*refMat2*refMat3*s1);
VERIFY_IS_APPROX(m4 = (m2+m2)*m3, refMat4 = (refMat2+refMat2)*refMat3);
VERIFY_IS_APPROX(m4 = m2*m3.leftCols(cols/2), refMat4 = refMat2*refMat3.leftCols(cols/2));
VERIFY_IS_APPROX(m4 = m2*(m3+m3).leftCols(cols/2), refMat4 = refMat2*(refMat3+refMat3).leftCols(cols/2));
VERIFY_IS_APPROX(m4=(m2*m3).pruned(0), refMat4=refMat2*refMat3);
VERIFY_IS_APPROX(m4=(m2t.transpose()*m3).pruned(0), refMat4=refMat2t.transpose()*refMat3);
@ -194,7 +197,7 @@ template<typename SparseMatrixType> void sparse_product()
VERIFY_IS_APPROX(d3=d1*m2.transpose(), refM3=d1*refM2.transpose());
}
// test self-adjoint and traingular-view products
// test self-adjoint and triangular-view products
{
DenseMatrix b = DenseMatrix::Random(rows, rows);
DenseMatrix x = DenseMatrix::Random(rows, rows);

View File

@ -49,18 +49,39 @@ void svd_compare_to_full(const MatrixType& m,
unsigned int computationOptions,
const SvdType& referenceSvd)
{
typedef typename MatrixType::Index Index;
typedef typename MatrixType::RealScalar RealScalar;
Index rows = m.rows();
Index cols = m.cols();
Index diagSize = (std::min)(rows, cols);
RealScalar prec = test_precision<RealScalar>();
SvdType svd(m, computationOptions);
VERIFY_IS_APPROX(svd.singularValues(), referenceSvd.singularValues());
if(computationOptions & (ComputeFullV|ComputeThinV))
{
VERIFY( (svd.matrixV().adjoint()*svd.matrixV()).isIdentity(prec) );
VERIFY_IS_APPROX( svd.matrixV().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint(),
referenceSvd.matrixV().leftCols(diagSize) * referenceSvd.singularValues().asDiagonal() * referenceSvd.matrixV().leftCols(diagSize).adjoint());
}
if(computationOptions & (ComputeFullU|ComputeThinU))
{
VERIFY( (svd.matrixU().adjoint()*svd.matrixU()).isIdentity(prec) );
VERIFY_IS_APPROX( svd.matrixU().leftCols(diagSize) * svd.singularValues().cwiseAbs2().asDiagonal() * svd.matrixU().leftCols(diagSize).adjoint(),
referenceSvd.matrixU().leftCols(diagSize) * referenceSvd.singularValues().cwiseAbs2().asDiagonal() * referenceSvd.matrixU().leftCols(diagSize).adjoint());
}
// The following checks are not critical.
// For instance, with Dived&Conquer SVD, if only the factor 'V' is computedt then different matrix-matrix product implementation will be used
// and the resulting 'V' factor might be significantly different when the SVD decomposition is not unique, especially with single precision float.
++g_test_level;
if(computationOptions & ComputeFullU) VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU());
if(computationOptions & ComputeThinU) VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU().leftCols(diagSize));
if(computationOptions & ComputeFullV) VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV());
if(computationOptions & ComputeFullV) VERIFY_IS_APPROX(svd.matrixV().cwiseAbs(), referenceSvd.matrixV().cwiseAbs());
if(computationOptions & ComputeThinV) VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV().leftCols(diagSize));
--g_test_level;
}
//
@ -85,33 +106,48 @@ void svd_least_square(const MatrixType& m, unsigned int computationOptions)
SvdType svd(m, computationOptions);
if(internal::is_same<RealScalar,double>::value) svd.setThreshold(1e-8);
else if(internal::is_same<RealScalar,float>::value) svd.setThreshold(1e-4);
else if(internal::is_same<RealScalar,float>::value) svd.setThreshold(2e-4);
SolutionType x = svd.solve(rhs);
// evaluate normal equation which works also for least-squares solutions
if(internal::is_same<RealScalar,double>::value || svd.rank()==m.diagonal().size())
{
// This test is not stable with single precision.
// This is probably because squaring m signicantly affects the precision.
VERIFY_IS_APPROX(m.adjoint()*(m*x),m.adjoint()*rhs);
}
RealScalar residual = (m*x-rhs).norm();
// Check that there is no significantly better solution in the neighborhood of x
RealScalar rhs_norm = rhs.norm();
if(!test_isMuchSmallerThan(residual,rhs.norm()))
{
// ^^^ If the residual is very small, then we have an exact solution, so we are already good.
// evaluate normal equation which works also for least-squares solutions
if(internal::is_same<RealScalar,double>::value || svd.rank()==m.diagonal().size())
{
using std::sqrt;
// This test is not stable with single precision.
// This is probably because squaring m signicantly affects the precision.
if(internal::is_same<RealScalar,float>::value) ++g_test_level;
VERIFY_IS_APPROX(m.adjoint()*(m*x),m.adjoint()*rhs);
if(internal::is_same<RealScalar,float>::value) --g_test_level;
}
// Check that there is no significantly better solution in the neighborhood of x
for(Index k=0;k<x.rows();++k)
{
using std::abs;
SolutionType y(x);
y.row(k) = (1.+2*NumTraits<RealScalar>::epsilon())*x.row(k);
RealScalar residual_y = (m*y-rhs).norm();
VERIFY( test_isMuchSmallerThan(abs(residual_y-residual), rhs_norm) || residual < residual_y );
if(internal::is_same<RealScalar,float>::value) ++g_test_level;
VERIFY( test_isApprox(residual_y,residual) || residual < residual_y );
if(internal::is_same<RealScalar,float>::value) --g_test_level;
y.row(k) = (1.-2*NumTraits<RealScalar>::epsilon())*x.row(k);
residual_y = (m*y-rhs).norm();
VERIFY( test_isMuchSmallerThan(abs(residual_y-residual), rhs_norm) || residual < residual_y );
if(internal::is_same<RealScalar,float>::value) ++g_test_level;
VERIFY( test_isApprox(residual_y,residual) || residual < residual_y );
if(internal::is_same<RealScalar,float>::value) --g_test_level;
}
}
}

View File

@ -82,8 +82,10 @@ template<typename MatrixType> void swap(const MatrixType& m)
void test_swap()
{
int s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE);
CALL_SUBTEST_1( swap(Matrix3f()) ); // fixed size, no vectorization
CALL_SUBTEST_2( swap(Matrix4d()) ); // fixed size, possible vectorization
CALL_SUBTEST_3( swap(MatrixXd(3,3)) ); // dyn size, no vectorization
CALL_SUBTEST_4( swap(MatrixXf(30,30)) ); // dyn size, possible vectorization
CALL_SUBTEST_3( swap(MatrixXd(s,s)) ); // dyn size, no vectorization
CALL_SUBTEST_4( swap(MatrixXf(s,s)) ); // dyn size, possible vectorization
TEST_SET_BUT_UNUSED_VARIABLE(s)
}

View File

@ -9,7 +9,17 @@
#include "main.h"
typedef Matrix<float,8,1> Vector8f;
typedef Matrix<float, 6,1> Vector6f;
typedef Matrix<float, 8,1> Vector8f;
typedef Matrix<float, 12,1> Vector12f;
typedef Matrix<double, 5,1> Vector5d;
typedef Matrix<double, 6,1> Vector6d;
typedef Matrix<double, 7,1> Vector7d;
typedef Matrix<double, 8,1> Vector8d;
typedef Matrix<double, 9,1> Vector9d;
typedef Matrix<double,10,1> Vector10d;
typedef Matrix<double,12,1> Vector12d;
struct TestNew1
{
@ -81,10 +91,13 @@ void construct_at_boundary(int boundary)
void unalignedassert()
{
#if EIGEN_ALIGN_STATICALLY
#if EIGEN_ALIGN_STATICALLY
construct_at_boundary<Vector2f>(4);
construct_at_boundary<Vector3f>(4);
construct_at_boundary<Vector4f>(16);
construct_at_boundary<Vector6f>(4);
construct_at_boundary<Vector8f>(EIGEN_ALIGN_BYTES);
construct_at_boundary<Vector12f>(16);
construct_at_boundary<Matrix2f>(16);
construct_at_boundary<Matrix3f>(4);
construct_at_boundary<Matrix4f>(EIGEN_ALIGN_BYTES);
@ -92,6 +105,13 @@ void unalignedassert()
construct_at_boundary<Vector2d>(16);
construct_at_boundary<Vector3d>(4);
construct_at_boundary<Vector4d>(EIGEN_ALIGN_BYTES);
construct_at_boundary<Vector5d>(4);
construct_at_boundary<Vector6d>(16);
construct_at_boundary<Vector7d>(4);
construct_at_boundary<Vector8d>(EIGEN_ALIGN_BYTES);
construct_at_boundary<Vector9d>(4);
construct_at_boundary<Vector10d>(16);
construct_at_boundary<Vector12d>(EIGEN_ALIGN_BYTES);
construct_at_boundary<Matrix2d>(EIGEN_ALIGN_BYTES);
construct_at_boundary<Matrix3d>(4);
construct_at_boundary<Matrix4d>(EIGEN_ALIGN_BYTES);
@ -100,7 +120,7 @@ void unalignedassert()
construct_at_boundary<Vector3cf>(4);
construct_at_boundary<Vector2cd>(EIGEN_ALIGN_BYTES);
construct_at_boundary<Vector3cd>(16);
#endif
#endif
check_unalignedassert_good<TestNew1>();
check_unalignedassert_good<TestNew2>();
@ -112,11 +132,19 @@ void unalignedassert()
check_unalignedassert_good<Depends<true> >();
#if EIGEN_ALIGN_STATICALLY
if(EIGEN_ALIGN_BYTES==16)
if(EIGEN_ALIGN_BYTES>=16)
{
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4f>(8));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(8));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector12f>(8));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2d>(8));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4d>(8));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector6d>(8));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8d>(8));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector10d>(8));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector12d>(8));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cf>(8));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4i>(8));
}
for(int b=8; b<EIGEN_ALIGN_BYTES; b+=8)
{

View File

@ -214,7 +214,7 @@ template<typename Scalar, bool Enable = internal::packet_traits<Scalar>::Vectori
>(DefaultTraversal,CompleteUnrolling)));
VERIFY((test_assign(Matrix11(), Matrix<Scalar,PacketSize,EIGEN_PLAIN_ENUM_MIN(2,PacketSize)>()*Matrix<Scalar,EIGEN_PLAIN_ENUM_MIN(2,PacketSize),PacketSize>(),
PacketSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD?DefaultTraversal:InnerVectorizedTraversal, CompleteUnrolling)));
InnerVectorizedTraversal, CompleteUnrolling)));
#endif
VERIFY(test_assign(MatrixXX(10,10),MatrixXX(20,20).block(10,10,2,3),

View File

@ -49,8 +49,8 @@
#include "unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h"
@ -80,8 +80,8 @@
#include "unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h"
#include "unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h"
#include "unsupported/Eigen/CXX11/src/Tensor/Tensor.h"

View File

@ -266,16 +266,16 @@ array<t, n> repeat(t v) {
}
template<std::size_t I, class Head, class Tail>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(type_list<Head, Tail>& a) {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(type_list<Head, Tail>&) {
return get<I, type_list<Head, Tail> >::value;
}
template<std::size_t I, class Head, class Tail>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(const type_list<Head, Tail>& a) {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(const type_list<Head, Tail>&) {
return get<I, type_list<Head, Tail> >::value;
}
template <class NList>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NList::HeadType::type array_prod(const NList& l) {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NList::HeadType::type array_prod(const NList&) {
return arg_prod<NList>::value;
};

View File

@ -1157,7 +1157,41 @@ in TensorFunctors.h for information on how to implement a reduction operator.
## Convolutions
TBD: convolve(const KernelDerived& kernel, const Dimensions& dims)
### &lt;Operation&gt; convolve(const Kernel& kernel, const Dimensions& dims)
Returns a tensor that is the output of the convolution of the input tensor with the kernel,
along the specified dimensions of the input tensor. The dimension size for dimensions of the output tensor
which were part of the convolution will be reduced by the formula:
output_dim_size = input_dim_size - kernel_dim_size + 1 (requires: input_dim_size >= kernel_dim_size).
The dimension sizes for dimensions that were not part of the convolution will remain the same.
Performance of the convolution can depend on the length of the stride(s) of the input tensor dimension(s) along which the
convolution is computed (the first dimension has the shortest stride for ColMajor, whereas RowMajor's shortest stride is
for the last dimension).
// Compute convolution along the second and third dimension.
Tensor<float, 4, DataLayout> input(3, 3, 7, 11);
Tensor<float, 2, DataLayout> kernel(2, 2);
Tensor<float, 4, DataLayout> output(3, 2, 6, 11);
input.setRandom();
kernel.setRandom();
Eigen::array<ptrdiff_t, 2> dims({1, 2}); // Specify second and third dimension for convolution.
output = input.convolve(kernel, dims);
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 6; ++k) {
for (int l = 0; l < 11; ++l) {
const float result = output(i,j,k,l);
const float expected = input(i,j+0,k+0,l) * kernel(0,0) +
input(i,j+1,k+0,l) * kernel(1,0) +
input(i,j+0,k+1,l) * kernel(0,1) +
input(i,j+1,k+1,l) * kernel(1,1);
VERIFY_IS_APPROX(result, expected);
}
}
}
}
## Geometrical Operations

View File

@ -520,48 +520,101 @@ class TensorBase<Derived, WriteAccessors> : public TensorBase<Derived, ReadOnlyA
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorLayoutSwapOp<Derived>
const TensorLayoutSwapOp<const Derived>
swap_layout() const {
return TensorLayoutSwapOp<const Derived>(derived());
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorLayoutSwapOp<Derived>
swap_layout() {
return TensorLayoutSwapOp<Derived>(derived());
}
template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorConcatenationOp<const Axis, const Derived, const OtherDerived>
concatenate(const OtherDerived& other, const Axis& axis) const {
return TensorConcatenationOp<const Axis, const Derived, const OtherDerived>(derived(), other, axis);
}
template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorConcatenationOp<const Axis, Derived, OtherDerived>
concatenate(const OtherDerived& other, const Axis& axis) const {
return TensorConcatenationOp<const Axis, Derived, OtherDerived>(derived(), other.derived(), axis);
concatenate(const OtherDerived& other, const Axis& axis) {
return TensorConcatenationOp<const Axis, Derived, OtherDerived>(derived(), other, axis);
}
template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorReshapingOp<const NewDimensions, const Derived>
reshape(const NewDimensions& newDimensions) const {
return TensorReshapingOp<const NewDimensions, const Derived>(derived(), newDimensions);
}
template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorReshapingOp<const NewDimensions, Derived>
reshape(const NewDimensions& newDimensions) const {
reshape(const NewDimensions& newDimensions) {
return TensorReshapingOp<const NewDimensions, Derived>(derived(), newDimensions);
}
template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorSlicingOp<const StartIndices, const Sizes, const Derived>
slice(const StartIndices& startIndices, const Sizes& sizes) const {
return TensorSlicingOp<const StartIndices, const Sizes, const Derived>(derived(), startIndices, sizes);
}
template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorSlicingOp<const StartIndices, const Sizes, Derived>
slice(const StartIndices& startIndices, const Sizes& sizes) const {
slice(const StartIndices& startIndices, const Sizes& sizes) {
return TensorSlicingOp<const StartIndices, const Sizes, Derived>(derived(), startIndices, sizes);
}
template <DenseIndex DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorChippingOp<DimId, Derived>
const TensorChippingOp<DimId, const Derived>
chip(const Index offset) const {
return TensorChippingOp<DimId, const Derived>(derived(), offset, DimId);
}
template <Index DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorChippingOp<DimId, Derived>
chip(const Index offset) {
return TensorChippingOp<DimId, Derived>(derived(), offset, DimId);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorChippingOp<Dynamic, const Derived>
chip(const Index offset, const Index dim) const {
return TensorChippingOp<Dynamic, const Derived>(derived(), offset, dim);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorChippingOp<Dynamic, Derived>
chip(const Index offset, const Index dim) const {
chip(const Index offset, const Index dim) {
return TensorChippingOp<Dynamic, Derived>(derived(), offset, dim);
}
template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorReverseOp<const ReverseDimensions, const Derived>
reverse(const ReverseDimensions& rev) const {
return TensorReverseOp<const ReverseDimensions, const Derived>(derived(), rev);
}
template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorReverseOp<const ReverseDimensions, Derived>
reverse(const ReverseDimensions& rev) const {
reverse(const ReverseDimensions& rev) {
return TensorReverseOp<const ReverseDimensions, Derived>(derived(), rev);
}
template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorShufflingOp<const Shuffle, const Derived>
shuffle(const Shuffle& shuffle) const {
return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle);
}
template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorShufflingOp<const Shuffle, Derived>
shuffle(const Shuffle& shuffle) const {
shuffle(const Shuffle& shuffle) {
return TensorShufflingOp<const Shuffle, Derived>(derived(), shuffle);
}
template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorStridingOp<const Strides, const Derived>
stride(const Strides& strides) const {
return TensorStridingOp<const Strides, const Derived>(derived(), strides);
}
template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorStridingOp<const Strides, Derived>
stride(const Strides& strides) const {
stride(const Strides& strides) {
return TensorStridingOp<const Strides, Derived>(derived(), strides);
}

View File

@ -157,6 +157,8 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
eigen_assert(NumInputDims > m_dim.actualDim());
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
eigen_assert(op.offset() < input_dims[m_dim.actualDim()]);
int j = 0;
for (int i = 0; i < NumInputDims; ++i) {
if (i != m_dim.actualDim()) {
@ -246,7 +248,9 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
Scalar* result = m_impl.data();
if (m_dim.actualDim() == NumDims && result) {
if (((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumDims) ||
(static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) &&
result) {
return result + m_inputOffset;
} else {
return NULL;

View File

@ -21,8 +21,8 @@ namespace Eigen {
*/
namespace internal {
template <typename Index, typename InputDims, size_t NumKernelDims> class IndexMapper {
template <typename Index, typename InputDims, size_t NumKernelDims, int Layout>
class IndexMapper {
public:
IndexMapper(const InputDims& input_dims, const array<Index, NumKernelDims>& kernel_dims,
const array<Index, NumKernelDims>& indices) {
@ -38,13 +38,19 @@ template <typename Index, typename InputDims, size_t NumKernelDims> class IndexM
array<Index, NumDims> inputStrides;
array<Index, NumDims> outputStrides;
for (int i = 0; i < NumDims; ++i) {
if (i > 0) {
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
inputStrides[0] = 1;
outputStrides[0] = 1;
for (int i = 1; i < NumDims; ++i) {
inputStrides[i] = inputStrides[i-1] * input_dims[i-1];
outputStrides[i] = outputStrides[i-1] * dimensions[i-1];
} else {
inputStrides[0] = 1;
outputStrides[0] = 1;
}
} else {
inputStrides[NumDims - 1] = 1;
outputStrides[NumDims - 1] = 1;
for (int i = static_cast<int>(NumDims) - 2; i >= 0; --i) {
inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1];
outputStrides[i] = outputStrides[i + 1] * dimensions[i + 1];
}
}
@ -52,13 +58,20 @@ template <typename Index, typename InputDims, size_t NumKernelDims> class IndexM
array<Index, NumDims> cudaOutputDimensions;
array<Index, NumDims> tmp = dimensions;
array<Index, NumDims> ordering;
const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
? 0
: NumDims - NumKernelDims;
for (int i = 0; i < NumKernelDims; ++i) {
ordering[i] = indices[i];
const Index index = i + offset;
ordering[index] = indices[i];
tmp[indices[i]] = -1;
cudaInputDimensions[i] = input_dims[ordering[i]];
cudaOutputDimensions[i] = dimensions[ordering[i]];
cudaInputDimensions[index] = input_dims[indices[i]];
cudaOutputDimensions[index] = dimensions[indices[i]];
}
int written = NumKernelDims;
int written = static_cast<int>(Layout) == static_cast<int>(ColMajor)
? NumKernelDims
: 0;
for (int i = 0; i < NumDims; ++i) {
if (tmp[i] >= 0) {
ordering[written] = i;
@ -73,61 +86,123 @@ template <typename Index, typename InputDims, size_t NumKernelDims> class IndexM
m_outputStrides[i] = outputStrides[ordering[i]];
}
for (int i = 0; i < NumDims; ++i) {
if (i > NumKernelDims) {
m_cudaInputStrides[i] = m_cudaInputStrides[i-1] * cudaInputDimensions[i-1];
m_cudaOutputStrides[i] = m_cudaOutputStrides[i-1] * cudaOutputDimensions[i-1];
} else {
m_cudaInputStrides[i] = 1;
m_cudaOutputStrides[i] = 1;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
for (int i = 0; i < NumDims; ++i) {
if (i > NumKernelDims) {
m_cudaInputStrides[i] =
m_cudaInputStrides[i - 1] * cudaInputDimensions[i - 1];
m_cudaOutputStrides[i] =
m_cudaOutputStrides[i - 1] * cudaOutputDimensions[i - 1];
} else {
m_cudaInputStrides[i] = 1;
m_cudaOutputStrides[i] = 1;
}
}
} else {
for (int i = NumDims - 1; i >= 0; --i) {
if (i + 1 < offset) {
m_cudaInputStrides[i] =
m_cudaInputStrides[i + 1] * cudaInputDimensions[i + 1];
m_cudaOutputStrides[i] =
m_cudaOutputStrides[i + 1] * cudaOutputDimensions[i + 1];
} else {
m_cudaInputStrides[i] = 1;
m_cudaOutputStrides[i] = 1;
}
}
}
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputPlaneToTensorInputOffset(Index p) const {
Index inputIndex = 0;
for (int d = NumDims - 1; d > NumKernelDims; --d) {
const Index idx = p / m_cudaInputStrides[d];
inputIndex += idx * m_inputStrides[d];
p -= idx * m_cudaInputStrides[d];
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
for (int d = NumDims - 1; d > NumKernelDims; --d) {
const Index idx = p / m_cudaInputStrides[d];
inputIndex += idx * m_inputStrides[d];
p -= idx * m_cudaInputStrides[d];
}
inputIndex += p * m_inputStrides[NumKernelDims];
} else {
int limit = 0;
if (NumKernelDims < NumDims) {
limit = NumDims - NumKernelDims - 1;
}
for (int d = 0; d < limit; ++d) {
const Index idx = p / m_cudaInputStrides[d];
inputIndex += idx * m_inputStrides[d];
p -= idx * m_cudaInputStrides[d];
}
inputIndex += p * m_inputStrides[limit];
}
inputIndex += p * m_inputStrides[NumKernelDims];
return inputIndex;
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputPlaneToTensorOutputOffset(Index p) const {
Index outputIndex = 0;
for (int d = NumDims - 1; d > NumKernelDims; --d) {
const Index idx = p / m_cudaOutputStrides[d];
outputIndex += idx * m_outputStrides[d];
p -= idx * m_cudaOutputStrides[d];
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
for (int d = NumDims - 1; d > NumKernelDims; --d) {
const Index idx = p / m_cudaOutputStrides[d];
outputIndex += idx * m_outputStrides[d];
p -= idx * m_cudaOutputStrides[d];
}
outputIndex += p * m_outputStrides[NumKernelDims];
} else {
int limit = 0;
if (NumKernelDims < NumDims) {
limit = NumDims - NumKernelDims - 1;
}
for (int d = 0; d < limit; ++d) {
const Index idx = p / m_cudaOutputStrides[d];
outputIndex += idx * m_outputStrides[d];
p -= idx * m_cudaOutputStrides[d];
}
outputIndex += p * m_outputStrides[limit];
}
outputIndex += p * m_outputStrides[NumKernelDims];
return outputIndex;
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i) const {
return i * m_inputStrides[0];
const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
? 0
: NumDims - NumKernelDims;
return i * m_inputStrides[offset];
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i) const {
return i * m_outputStrides[0];
const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
? 0
: NumDims - NumKernelDims;
return i * m_outputStrides[offset];
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j) const {
return i * m_inputStrides[0] + j*m_inputStrides[1];
const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
? 0
: NumDims - NumKernelDims;
return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1];
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j) const {
return i * m_outputStrides[0] + j * m_outputStrides[1];
const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
? 0
: NumDims - NumKernelDims;
return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1];
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j, Index k) const {
return i * m_inputStrides[0] + j*m_inputStrides[1] + k*m_inputStrides[2];
const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
? 0
: NumDims - NumKernelDims;
return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1] +
k * m_inputStrides[offset + 2];
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j, Index k) const {
return i * m_outputStrides[0] + j*m_outputStrides[1] + k*m_outputStrides[2];
const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
? 0
: NumDims - NumKernelDims;
return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1] +
k * m_outputStrides[offset + 2];
}
private:
@ -237,35 +312,61 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
: m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_kernel(NULL), m_local_kernel(false), m_device(device)
{
EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
// Only column major tensors are supported for now.
EIGEN_STATIC_ASSERT((static_cast<int>(Layout) == static_cast<int>(ColMajor)), YOU_MADE_A_PROGRAMMING_MISTAKE);
const typename TensorEvaluator<InputArgType, Device>::Dimensions& input_dims = m_inputImpl.dimensions();
const typename TensorEvaluator<KernelArgType, Device>::Dimensions& kernel_dims = m_kernelImpl.dimensions();
m_inputStride[0] = 1;
for (int i = 1; i < NumDims; ++i) {
m_inputStride[i] = m_inputStride[i-1] * input_dims[i-1];
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
m_inputStride[0] = 1;
for (int i = 1; i < NumDims; ++i) {
m_inputStride[i] = m_inputStride[i - 1] * input_dims[i - 1];
}
} else {
m_inputStride[NumDims - 1] = 1;
for (int i = NumDims - 2; i >= 0; --i) {
m_inputStride[i] = m_inputStride[i + 1] * input_dims[i + 1];
}
}
m_dimensions = m_inputImpl.dimensions();
for (int i = 0; i < NumKernelDims; ++i) {
const Index index = op.indices()[i];
const Index input_dim = input_dims[index];
const Index kernel_dim = kernel_dims[i];
const Index result_dim = input_dim - kernel_dim + 1;
m_dimensions[index] = result_dim;
if (i > 0) {
m_kernelStride[i] = m_kernelStride[i-1] * kernel_dims[i-1];
} else {
m_kernelStride[0] = 1;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
for (int i = 0; i < NumKernelDims; ++i) {
const Index index = op.indices()[i];
const Index input_dim = input_dims[index];
const Index kernel_dim = kernel_dims[i];
const Index result_dim = input_dim - kernel_dim + 1;
m_dimensions[index] = result_dim;
if (i > 0) {
m_kernelStride[i] = m_kernelStride[i - 1] * kernel_dims[i - 1];
} else {
m_kernelStride[0] = 1;
}
m_indexStride[i] = m_inputStride[index];
}
m_indexStride[i] = m_inputStride[index];
}
m_outputStride[0] = 1;
for (int i = 1; i < NumDims; ++i) {
m_outputStride[i] = m_outputStride[i-1] * m_dimensions[i-1];
m_outputStride[0] = 1;
for (int i = 1; i < NumDims; ++i) {
m_outputStride[i] = m_outputStride[i - 1] * m_dimensions[i - 1];
}
} else {
for (int i = NumKernelDims - 1; i >= 0; --i) {
const Index index = op.indices()[i];
const Index input_dim = input_dims[index];
const Index kernel_dim = kernel_dims[i];
const Index result_dim = input_dim - kernel_dim + 1;
m_dimensions[index] = result_dim;
if (i < NumKernelDims - 1) {
m_kernelStride[i] = m_kernelStride[i + 1] * kernel_dims[i + 1];
} else {
m_kernelStride[NumKernelDims - 1] = 1;
}
m_indexStride[i] = m_inputStride[index];
}
m_outputStride[NumDims - 1] = 1;
for (int i = NumDims - 2; i >= 0; --i) {
m_outputStride[i] = m_outputStride[i + 1] * m_dimensions[i + 1];
}
}
}
@ -310,13 +411,24 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
Index indices[2] = {index, index+PacketSize-1};
Index startInputs[2] = {0, 0};
for (int i = NumDims - 1; i > 0; --i) {
const Index idx0 = indices[0] / m_outputStride[i];
const Index idx1 = indices[1] / m_outputStride[i];
startInputs[0] += idx0 * m_inputStride[i];
startInputs[1] += idx1 * m_inputStride[i];
indices[0] -= idx0 * m_outputStride[i];
indices[1] -= idx1 * m_outputStride[i];
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
for (int i = NumDims - 1; i > 0; --i) {
const Index idx0 = indices[0] / m_outputStride[i];
const Index idx1 = indices[1] / m_outputStride[i];
startInputs[0] += idx0 * m_inputStride[i];
startInputs[1] += idx1 * m_inputStride[i];
indices[0] -= idx0 * m_outputStride[i];
indices[1] -= idx1 * m_outputStride[i];
}
} else {
for (int i = 0; i < NumDims - 1; ++i) {
const Index idx0 = indices[0] / m_outputStride[i];
const Index idx1 = indices[1] / m_outputStride[i];
startInputs[0] += idx0 * m_inputStride[i];
startInputs[1] += idx1 * m_inputStride[i];
indices[0] -= idx0 * m_outputStride[i];
indices[1] -= idx1 * m_outputStride[i];
}
}
startInputs[0] += indices[0];
startInputs[1] += indices[1];
@ -344,10 +456,18 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
private:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const {
Index startInput = 0;
for (int i = NumDims - 1; i > 0; --i) {
const Index idx = index / m_outputStride[i];
startInput += idx * m_inputStride[i];
index -= idx * m_outputStride[i];
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
for (int i = NumDims - 1; i > 0; --i) {
const Index idx = index / m_outputStride[i];
startInput += idx * m_inputStride[i];
index -= idx * m_outputStride[i];
}
} else {
for (int i = 0; i < NumDims - 1; ++i) {
const Index idx = index / m_outputStride[i];
startInput += idx * m_inputStride[i];
index -= idx * m_outputStride[i];
}
}
startInput += index;
return startInput;
@ -378,7 +498,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
}
}
EIGEN_STRONG_INLINE void preloadKernel() {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void preloadKernel() {
// Don't make a local copy of the kernel unless we have to (i.e. it's an
// expression that needs to be evaluated)
const Scalar* in_place = m_kernelImpl.data();
@ -431,11 +551,14 @@ struct GetKernelSize<Dynamic> {
}
};
template <typename InputEvaluator, typename Index, typename InputDims, int StaticKernelSize>
__global__ void EigenConvolutionKernel1D(InputEvaluator eval, const internal::IndexMapper<Index, InputDims, 1> indexMapper, const float* __restrict kernel, const int numPlanes, const int numX, const int maxX, const int kernelSize, float* buffer) {
template <typename InputEvaluator, typename Index, typename InputDims,
int StaticKernelSize>
__global__ void EigenConvolutionKernel1D(
InputEvaluator eval,
const internal::IndexMapper<Index, InputDims, 1, InputEvaluator::Layout>
indexMapper,
const float* __restrict kernel, const int numPlanes, const int numX,
const int maxX, const int kernelSize, float* buffer) {
extern __shared__ float s[];
const int first_x = blockIdx.x * maxX;
@ -453,7 +576,7 @@ __global__ void EigenConvolutionKernel1D(InputEvaluator eval, const internal::In
#pragma unroll
for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x);
s[i + plane_kernel_offset] = eval.coeff(tensor_index);
s[i + plane_kernel_offset] = eval.coeff(tensor_index);
}
__syncthreads();
@ -476,9 +599,15 @@ __global__ void EigenConvolutionKernel1D(InputEvaluator eval, const internal::In
}
};
template <typename InputEvaluator, typename Index, typename InputDims, int StaticKernelSizeX, int StaticKernelSizeY>
__global__ void EigenConvolutionKernel2D(InputEvaluator eval, const internal::IndexMapper<Index, InputDims, 2> indexMapper, const float* __restrict kernel, const int numPlanes, const int numX, const int maxX, const int numY, const int maxY, const int kernelSizeX, const int kernelSizeY, float* buffer) {
template <typename InputEvaluator, typename Index, typename InputDims,
int StaticKernelSizeX, int StaticKernelSizeY>
__global__ void EigenConvolutionKernel2D(
InputEvaluator eval,
const internal::IndexMapper<Index, InputDims, 2, InputEvaluator::Layout>
indexMapper,
const float* __restrict kernel, const int numPlanes, const int numX,
const int maxX, const int numY, const int maxY, const int kernelSizeX,
const int kernelSizeY, float* buffer) {
extern __shared__ float s[];
const int first_x = blockIdx.x * maxX;
@ -538,9 +667,15 @@ __global__ void EigenConvolutionKernel2D(InputEvaluator eval, const internal::In
}
};
template <typename InputEvaluator, typename Index, typename InputDims>
__global__ void EigenConvolutionKernel3D(InputEvaluator eval, const internal::IndexMapper<Index, InputDims, 3> indexMapper, const float* __restrict kernel, const size_t numPlanes, const size_t numX, const size_t maxX, const size_t numY, const size_t maxY, const size_t numZ, const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY, const size_t kernelSizeZ, float* buffer) {
__global__ void EigenConvolutionKernel3D(
InputEvaluator eval,
const internal::IndexMapper<Index, InputDims, 3, InputEvaluator::Layout>
indexMapper,
const float* __restrict kernel, const size_t numPlanes, const size_t numX,
const size_t maxX, const size_t numY, const size_t maxY, const size_t numZ,
const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY,
const size_t kernelSizeZ, float* buffer) {
extern __shared__ float s[];
// Load inputs to shared memory
@ -622,8 +757,6 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
: m_inputImpl(op.inputExpression(), device), m_kernelArg(op.kernelExpression()), m_kernelImpl(op.kernelExpression(), device), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device)
{
EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, GpuDevice>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, GpuDevice>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
// Only column major tensors are supported for now.
EIGEN_STATIC_ASSERT((static_cast<int>(Layout) == static_cast<int>(ColMajor)), YOU_MADE_A_PROGRAMMING_MISTAKE);
const typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions& input_dims = m_inputImpl.dimensions();
const typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions& kernel_dims = m_kernelImpl.dimensions();
@ -712,10 +845,14 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
const int numX = dimensions()[m_indices[0]];
const int numP = dimensions().TotalSize() / numX;
int maxX;
dim3 block_size;
if (m_indices[0] == 0) {
const int single_stride_dim =
static_cast<int>(Layout) == static_cast<int>(ColMajor)
? 0
: m_inputImpl.dimensions().rank() - 1;
if (m_indices[0] == single_stride_dim) {
// Maximum the reuse
const int inner_dim = ((maxSharedMem / (sizeof(Scalar)) - kernel_size + 1 + 31) / 32) * 32;
maxX = (std::min<int>)(inner_dim, numX);
@ -747,7 +884,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
const array<Index, 1> indices(m_indices[0]);
const array<Index, 1> kernel_dims(m_kernelImpl.dimensions()[0]);
internal::IndexMapper<Index, InputDims, 1> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
internal::IndexMapper<Index, InputDims, 1, Layout> indexMapper(
m_inputImpl.dimensions(), kernel_dims, indices);
switch(kernel_size) {
case 4: {
LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 4, data);
@ -765,11 +903,15 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
}
case 2: {
const int kernel_size_x = m_kernelImpl.dimensions()[0];
const int kernel_size_y = m_kernelImpl.dimensions()[1];
const int idxX =
static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 1;
const int idxY =
static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 0;
const int kernel_size_x = m_kernelImpl.dimensions()[idxX];
const int kernel_size_y = m_kernelImpl.dimensions()[idxY];
const int numX = dimensions()[m_indices[0]];
const int numY = dimensions()[m_indices[1]];
const int numX = dimensions()[m_indices[idxX]];
const int numY = dimensions()[m_indices[idxY]];
const int numP = dimensions().TotalSize() / (numX*numY);
const float scaling_factor = sqrtf(static_cast<float>(maxSharedMem) / (sizeof(Scalar) * kernel_size_y * kernel_size_x));
@ -798,9 +940,11 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
//cout << "launching 2D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " maxX: " << maxX << " maxY: " << maxY << " maxP: " << maxP << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
const array<Index, 2> indices(m_indices[0], m_indices[1]);
const array<Index, 2> kernel_dims(m_kernelImpl.dimensions()[0], m_kernelImpl.dimensions()[1]);
internal::IndexMapper<Index, InputDims, 2> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
const array<Index, 2> indices(m_indices[idxX], m_indices[idxY]);
const array<Index, 2> kernel_dims(m_kernelImpl.dimensions()[idxX],
m_kernelImpl.dimensions()[idxY]);
internal::IndexMapper<Index, InputDims, 2, Layout> indexMapper(
m_inputImpl.dimensions(), kernel_dims, indices);
switch (kernel_size_x) {
case 4: {
switch (kernel_size_y) {
@ -837,13 +981,20 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
}
case 3: {
const int kernel_size_x = m_kernelImpl.dimensions()[0];
const int kernel_size_y = m_kernelImpl.dimensions()[1];
const int kernel_size_z = m_kernelImpl.dimensions()[2];
const int idxX =
static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 2;
const int idxY =
static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 1;
const int idxZ =
static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 2 : 0;
const int numX = dimensions()[m_indices[0]];
const int numY = dimensions()[m_indices[1]];
const int numZ = dimensions()[m_indices[2]];
const int kernel_size_x = m_kernelImpl.dimensions()[idxX];
const int kernel_size_y = m_kernelImpl.dimensions()[idxY];
const int kernel_size_z = m_kernelImpl.dimensions()[idxZ];
const int numX = dimensions()[m_indices[idxX]];
const int numY = dimensions()[m_indices[idxY]];
const int numZ = dimensions()[m_indices[idxZ]];
const int numP = dimensions().TotalSize() / (numX*numY*numZ);
const int maxX = (std::min<int>)(128, (std::min<int>)(maxSharedMem / (sizeof(Scalar) * kernel_size_y * kernel_size_z) - kernel_size_x + 1, numX));
@ -860,16 +1011,20 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
assert(shared_mem <= maxSharedMem);
//cout << "launching 3D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
const array<Index, 3> indices(m_indices[0], m_indices[1], m_indices[2]);
const array<Index, 3> kernel_dims(m_kernelImpl.dimensions()[0], m_kernelImpl.dimensions()[1], m_kernelImpl.dimensions()[2]);
internal::IndexMapper<Index, InputDims, 3> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
const array<Index, 3> indices(m_indices[idxX], m_indices[idxY],
m_indices[idxZ]);
const array<Index, 3> kernel_dims(m_kernelImpl.dimensions()[idxX],
m_kernelImpl.dimensions()[idxY],
m_kernelImpl.dimensions()[idxZ]);
internal::IndexMapper<Index, InputDims, 3, Layout> indexMapper(
m_inputImpl.dimensions(), kernel_dims, indices);
LAUNCH_CUDA_KERNEL((EigenConvolutionKernel3D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, numZ, maxZ, kernel_size_x, kernel_size_y, kernel_size_z, data);
break;
}
default: {
assert(false && "not supported yet");
EIGEN_STATIC_ASSERT((NumKernelDims >= 1 && NumKernelDims <= 3), THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE);
}
}
}

View File

@ -21,8 +21,7 @@ namespace Eigen {
* Example:
* C.device(EIGEN_GPU) = A + B;
*
* Todo: thread pools.
* Todo: operator +=, -=, *= and so on.
* Todo: operator *= and /=.
*/
template <typename ExpressionType, typename DeviceType> class TensorDevice {
@ -33,8 +32,7 @@ template <typename ExpressionType, typename DeviceType> class TensorDevice {
EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
Assign assign(m_expression, other);
static const bool Vectorize = TensorEvaluator<const Assign, DeviceType>::PacketAccess;
internal::TensorExecutor<const Assign, DeviceType, Vectorize>::run(assign, m_device);
internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
return *this;
}
@ -45,8 +43,18 @@ template <typename ExpressionType, typename DeviceType> class TensorDevice {
Sum sum(m_expression, other);
typedef TensorAssignOp<ExpressionType, const Sum> Assign;
Assign assign(m_expression, sum);
static const bool Vectorize = TensorEvaluator<const Assign, DeviceType>::PacketAccess;
internal::TensorExecutor<const Assign, DeviceType, Vectorize>::run(assign, m_device);
internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
return *this;
}
template<typename OtherDerived>
EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) {
typedef typename OtherDerived::Scalar Scalar;
typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference;
Difference difference(m_expression, other);
typedef TensorAssignOp<ExpressionType, const Difference> Assign;
Assign assign(m_expression, difference);
internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
return *this;
}
@ -65,8 +73,7 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, ThreadPool
EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
Assign assign(m_expression, other);
static const bool Vectorize = TensorEvaluator<const Assign, ThreadPoolDevice>::PacketAccess;
internal::TensorExecutor<const Assign, ThreadPoolDevice, Vectorize>::run(assign, m_device);
internal::TensorExecutor<const Assign, ThreadPoolDevice>::run(assign, m_device);
return *this;
}
@ -77,8 +84,18 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, ThreadPool
Sum sum(m_expression, other);
typedef TensorAssignOp<ExpressionType, const Sum> Assign;
Assign assign(m_expression, sum);
static const bool Vectorize = TensorEvaluator<const Assign, ThreadPoolDevice>::PacketAccess;
internal::TensorExecutor<const Assign, ThreadPoolDevice, Vectorize>::run(assign, m_device);
internal::TensorExecutor<const Assign, ThreadPoolDevice>::run(assign, m_device);
return *this;
}
template<typename OtherDerived>
EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) {
typedef typename OtherDerived::Scalar Scalar;
typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference;
Difference difference(m_expression, other);
typedef TensorAssignOp<ExpressionType, const Difference> Assign;
Assign assign(m_expression, difference);
internal::TensorExecutor<const Assign, ThreadPoolDevice>::run(assign, m_device);
return *this;
}
@ -99,7 +116,7 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, GpuDevice>
EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
Assign assign(m_expression, other);
internal::TensorExecutor<const Assign, GpuDevice, false>::run(assign, m_device);
internal::TensorExecutor<const Assign, GpuDevice>::run(assign, m_device);
return *this;
}
@ -110,13 +127,24 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, GpuDevice>
Sum sum(m_expression, other);
typedef TensorAssignOp<ExpressionType, const Sum> Assign;
Assign assign(m_expression, sum);
internal::TensorExecutor<const Assign, GpuDevice, false>::run(assign, m_device);
internal::TensorExecutor<const Assign, GpuDevice>::run(assign, m_device);
return *this;
}
template<typename OtherDerived>
EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) {
typedef typename OtherDerived::Scalar Scalar;
typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference;
Difference difference(m_expression, other);
typedef TensorAssignOp<ExpressionType, const Difference> Assign;
Assign assign(m_expression, difference);
internal::TensorExecutor<const Assign, GpuDevice>::run(assign, m_device);
return *this;
}
protected:
const GpuDevice& m_device;
ExpressionType m_expression;
ExpressionType& m_expression;
};
#endif

View File

@ -145,39 +145,39 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0
Sizes() { }
template <typename DenseIndex>
explicit Sizes(const array<DenseIndex, Base::count>& indices) {
explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
// todo: add assertion
}
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
template <typename... DenseIndex> Sizes(DenseIndex... indices) { }
explicit Sizes(std::initializer_list<std::size_t> l) {
template <typename... DenseIndex> Sizes(DenseIndex... /*indices*/) { }
explicit Sizes(std::initializer_list<std::size_t>) {
// todo: add assertion
}
#else
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0) {
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex) {
}
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1) {
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex, const DenseIndex) {
}
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) {
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex, const DenseIndex, const DenseIndex) {
}
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) {
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) {
}
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) {
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) {
}
#endif
template <typename T> Sizes& operator = (const T& other) {
template <typename T> Sizes& operator = (const T&) {
// to do: check the size of other
return *this;
}
template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count - 1, false>::run(indices, *static_cast<const Base*>(this);
return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count - 1, false>::run(indices, *static_cast<const Base*>(this));
}
template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count - 1, true>::run(indices, *static_cast<const Base*>(this);
return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count - 1, true>::run(indices, *static_cast<const Base*>(this));
}
};
@ -343,7 +343,7 @@ template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::s
template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > {
static const size_t value = Sizes<V1,V2,V3,V4,V5>::count;
};
template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_get(const Sizes<V1,V2,V3,V4,V5>& a) {
template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_get(const Sizes<V1,V2,V3,V4,V5>&) {
return get<n, typename Sizes<V1,V2,V3,V4,V5>::Base>::value;
};

View File

@ -352,11 +352,12 @@ template<typename IfArgType, typename ThenArgType, typename ElseArgType, typenam
struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
{
typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
typedef typename XprType::Scalar Scalar;
enum {
IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned & TensorEvaluator<ElseArgType, Device>::IsAligned,
PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & TensorEvaluator<ElseArgType, Device>::PacketAccess/* &
TensorEvaluator<IfArgType>::PacketAccess*/,
PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & TensorEvaluator<ElseArgType, Device>::PacketAccess &
internal::packet_traits<Scalar>::HasBlend,
Layout = TensorEvaluator<IfArgType, Device>::Layout,
CoordAccess = false, // to be implemented
};
@ -373,7 +374,6 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
}
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename internal::traits<XprType>::Packet PacketReturnType;
typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
@ -403,7 +403,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
template<int LoadMode>
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
{
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
internal::Selector<PacketSize> select;
for (Index i = 0; i < PacketSize; ++i) {
select.select[i] = m_condImpl.coeff(index+i);

View File

@ -77,7 +77,7 @@ template <typename T> struct MeanReducer
}
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * packet_traits<Packet>::size);
return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits<Packet>::size);
}
protected:

View File

@ -54,7 +54,7 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_> >
static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
enum {
Options = Options_,
Flags = compute_tensor_flags<Scalar_, Options_>::ret | LvalueBit,
Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit),
};
};
@ -69,7 +69,7 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_> >
static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
enum {
Options = Options_,
Flags = compute_tensor_flags<Scalar_, Options_>::ret | LvalueBit,
Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit),
};
};
@ -86,7 +86,7 @@ struct traits<TensorMap<PlainObjectType, Options_> >
static const int Layout = BaseTraits::Layout;
enum {
Options = Options_,
Flags = ((BaseTraits::Flags | LvalueBit) & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0),
Flags = (BaseTraits::Flags & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0),
};
};
@ -102,7 +102,7 @@ struct traits<TensorRef<PlainObjectType> >
static const int Layout = BaseTraits::Layout;
enum {
Options = BaseTraits::Options,
Flags = ((BaseTraits::Flags | LvalueBit) & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0),
Flags = (BaseTraits::Flags & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0),
};
};

View File

@ -361,7 +361,6 @@ struct evaluator<DynamicSparseMatrix<_Scalar,_Options,_StorageIndex> >
: evaluator_base<DynamicSparseMatrix<_Scalar,_Options,_StorageIndex> >
{
typedef _Scalar Scalar;
typedef _StorageIndex Index;
typedef DynamicSparseMatrix<_Scalar,_Options,_StorageIndex> SparseMatrixType;
typedef typename SparseMatrixType::InnerIterator InnerIterator;
typedef typename SparseMatrixType::ReverseInnerIterator ReverseInnerIterator;
@ -378,6 +377,8 @@ struct evaluator<DynamicSparseMatrix<_Scalar,_Options,_StorageIndex> >
operator const SparseMatrixType&() const { return *m_matrix; }
Scalar coeff(Index row, Index col) const { return m_matrix->coeff(row,col); }
Index nonZerosEstimate() const { return m_matrix->nonZeros(); }
const SparseMatrixType *m_matrix;
};

View File

@ -18,7 +18,7 @@ namespace Eigen {
namespace internal
{
template <typename Scalar>
inline bool GetMarketLine (std::stringstream& line, int& M, int& N, int& i, int& j, Scalar& value)
inline bool GetMarketLine (std::stringstream& line, Index& M, Index& N, Index& i, Index& j, Scalar& value)
{
line >> i >> j >> value;
i--;
@ -31,7 +31,7 @@ namespace internal
return false;
}
template <typename Scalar>
inline bool GetMarketLine (std::stringstream& line, int& M, int& N, int& i, int& j, std::complex<Scalar>& value)
inline bool GetMarketLine (std::stringstream& line, Index& M, Index& N, Index& i, Index& j, std::complex<Scalar>& value)
{
Scalar valR, valI;
line >> i >> j >> valR >> valI;

View File

@ -340,11 +340,9 @@ static void test_chip_as_lvalue()
}
}
template<int DataLayout>
static void test_chip_raw_data()
static void test_chip_raw_data_col_major()
{
Tensor<float, 5, DataLayout> tensor(2,3,5,7,11);
Tensor<float, 5, ColMajor> tensor(2,3,5,7,11);
tensor.setRandom();
typedef TensorEvaluator<decltype(tensor.template chip<4>(3)), DefaultDevice> Evaluator4;
@ -353,12 +351,7 @@ static void test_chip_raw_data()
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
int chip_index;
if (DataLayout == ColMajor) {
chip_index = i + 2 * (j + 3 * (k + 5 * l));
} else {
chip_index = 11 * (l + 7 * (k + 5 * (j + 3 * i)));
}
int chip_index = i + 2 * (j + 3 * (k + 5 * l));
VERIFY_IS_EQUAL(chip.data()[chip_index], tensor(i,j,k,l,3));
}
}
@ -382,6 +375,41 @@ static void test_chip_raw_data()
VERIFY_IS_EQUAL(chip3.data(), static_cast<float*>(0));
}
static void test_chip_raw_data_row_major()
{
Tensor<float, 5, RowMajor> tensor(11,7,5,3,2);
tensor.setRandom();
typedef TensorEvaluator<decltype(tensor.template chip<0>(3)), DefaultDevice> Evaluator0;
auto chip = Evaluator0(tensor.template chip<0>(3), DefaultDevice());
for (int i = 0; i < 7; ++i) {
for (int j = 0; j < 5; ++j) {
for (int k = 0; k < 3; ++k) {
for (int l = 0; l < 2; ++l) {
int chip_index = l + 2 * (k + 3 * (j + 5 * i));
VERIFY_IS_EQUAL(chip.data()[chip_index], tensor(3,i,j,k,l));
}
}
}
}
typedef TensorEvaluator<decltype(tensor.template chip<1>(0)), DefaultDevice> Evaluator1;
auto chip1 = Evaluator1(tensor.template chip<1>(0), DefaultDevice());
VERIFY_IS_EQUAL(chip1.data(), static_cast<float*>(0));
typedef TensorEvaluator<decltype(tensor.template chip<2>(0)), DefaultDevice> Evaluator2;
auto chip2 = Evaluator2(tensor.template chip<2>(0), DefaultDevice());
VERIFY_IS_EQUAL(chip2.data(), static_cast<float*>(0));
typedef TensorEvaluator<decltype(tensor.template chip<3>(0)), DefaultDevice> Evaluator3;
auto chip3 = Evaluator3(tensor.template chip<3>(0), DefaultDevice());
VERIFY_IS_EQUAL(chip3.data(), static_cast<float*>(0));
typedef TensorEvaluator<decltype(tensor.template chip<4>(0)), DefaultDevice> Evaluator4;
auto chip4 = Evaluator4(tensor.template chip<4>(0), DefaultDevice());
VERIFY_IS_EQUAL(chip4.data(), static_cast<float*>(0));
}
void test_cxx11_tensor_chipping()
{
CALL_SUBTEST(test_simple_chip<ColMajor>());
@ -392,6 +420,6 @@ void test_cxx11_tensor_chipping()
CALL_SUBTEST(test_chip_in_expr<RowMajor>());
CALL_SUBTEST(test_chip_as_lvalue<ColMajor>());
CALL_SUBTEST(test_chip_as_lvalue<RowMajor>());
CALL_SUBTEST(test_chip_raw_data<ColMajor>());
CALL_SUBTEST(test_chip_raw_data<RowMajor>());
CALL_SUBTEST(test_chip_raw_data_col_major());
CALL_SUBTEST(test_chip_raw_data_row_major());
}

View File

@ -13,8 +13,6 @@
using Eigen::Tensor;
static void test_simple_assign()
{
Tensor<int, 3> random(2,3,7);
@ -33,7 +31,32 @@ static void test_simple_assign()
}
}
static void test_assign_of_const_tensor()
{
Tensor<int, 3> random(2,3,7);
random.setRandom();
TensorMap<Tensor<const int, 3> > constant1(random.data(), 2, 3, 7);
TensorMap<const Tensor<int, 3> > constant2(random.data(), 2, 3, 7);
const TensorMap<Tensor<int, 3> > constant3(random.data(), 2, 3, 7);
Tensor<int, 2> result1 = constant1.chip(0, 2);
Tensor<int, 2> result2 = constant2.chip(0, 2);
Tensor<int, 2> result3 = constant3.chip(0, 2);
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
VERIFY_IS_EQUAL((result1(i,j)), random(i,j,0));
VERIFY_IS_EQUAL((result2(i,j)), random(i,j,0));
VERIFY_IS_EQUAL((result3(i,j)), random(i,j,0));
}
}
}
void test_cxx11_tensor_const()
{
CALL_SUBTEST(test_simple_assign());
CALL_SUBTEST(test_assign_of_const_tensor());
}

View File

@ -14,15 +14,16 @@
using Eigen::Tensor;
using Eigen::DefaultDevice;
template <int DataLayout>
static void test_evals()
{
Tensor<float, 2> input(3, 3);
Tensor<float, 1> kernel(2);
Tensor<float, 2, DataLayout> input(3, 3);
Tensor<float, 1, DataLayout> kernel(2);
input.setRandom();
kernel.setRandom();
Tensor<float, 2> result(2,3);
Tensor<float, 2, DataLayout> result(2,3);
result.setZero();
Eigen::array<Tensor<float, 2>::Index, 1> dims3({0});
@ -41,15 +42,15 @@ static void test_evals()
VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1)); // index 5
}
template <int DataLayout>
static void test_expr()
{
Tensor<float, 2> input(3, 3);
Tensor<float, 2> kernel(2, 2);
Tensor<float, 2, DataLayout> input(3, 3);
Tensor<float, 2, DataLayout> kernel(2, 2);
input.setRandom();
kernel.setRandom();
Tensor<float, 2> result(2,2);
Tensor<float, 2, DataLayout> result(2,2);
Eigen::array<ptrdiff_t, 2> dims({0, 1});
result = input.convolve(kernel, dims);
@ -63,10 +64,10 @@ static void test_expr()
input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1));
}
template <int DataLayout>
static void test_modes() {
Tensor<float, 1> input(3);
Tensor<float, 1> kernel(3);
Tensor<float, 1, DataLayout> input(3);
Tensor<float, 1, DataLayout> kernel(3);
input(0) = 1.0f;
input(1) = 2.0f;
input(2) = 3.0f;
@ -74,13 +75,13 @@ static void test_modes() {
kernel(1) = 1.0f;
kernel(2) = 0.0f;
const Eigen::array<ptrdiff_t, 1> dims{{0}};
const Eigen::array<ptrdiff_t, 1> dims({0});
Eigen::array<std::pair<ptrdiff_t, ptrdiff_t>, 1> padding;
// Emulate VALID mode (as defined in
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
padding[0] = std::make_pair(0, 0);
Tensor<float, 1> valid(1);
Tensor<float, 1, DataLayout> valid(1);
valid = input.pad(padding).convolve(kernel, dims);
VERIFY_IS_EQUAL(valid.dimension(0), 1);
VERIFY_IS_APPROX(valid(0), 2.5f);
@ -88,7 +89,7 @@ static void test_modes() {
// Emulate SAME mode (as defined in
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
padding[0] = std::make_pair(1, 1);
Tensor<float, 1> same(3);
Tensor<float, 1, DataLayout> same(3);
same = input.pad(padding).convolve(kernel, dims);
VERIFY_IS_EQUAL(same.dimension(0), 3);
VERIFY_IS_APPROX(same(0), 1.0f);
@ -98,7 +99,7 @@ static void test_modes() {
// Emulate FULL mode (as defined in
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
padding[0] = std::make_pair(2, 2);
Tensor<float, 1> full(5);
Tensor<float, 1, DataLayout> full(5);
full = input.pad(padding).convolve(kernel, dims);
VERIFY_IS_EQUAL(full.dimension(0), 5);
VERIFY_IS_APPROX(full(0), 0.0f);
@ -108,18 +109,18 @@ static void test_modes() {
VERIFY_IS_APPROX(full(4), 1.5f);
}
template <int DataLayout>
static void test_strides() {
Tensor<float, 1> input(13);
Tensor<float, 1> kernel(3);
Tensor<float, 1, DataLayout> input(13);
Tensor<float, 1, DataLayout> kernel(3);
input.setRandom();
kernel.setRandom();
const Eigen::array<ptrdiff_t, 1> dims{{0}};
const Eigen::array<ptrdiff_t, 1> stride_of_3{{3}};
const Eigen::array<ptrdiff_t, 1> stride_of_2{{2}};
const Eigen::array<ptrdiff_t, 1> dims({0});
const Eigen::array<ptrdiff_t, 1> stride_of_3({3});
const Eigen::array<ptrdiff_t, 1> stride_of_2({2});
Tensor<float, 1> result;
Tensor<float, 1, DataLayout> result;
result = input.stride(stride_of_3).convolve(kernel, dims).stride(stride_of_2);
VERIFY_IS_EQUAL(result.dimension(0), 2);
@ -129,13 +130,14 @@ static void test_strides() {
input(12)*kernel(2)));
}
void test_cxx11_tensor_convolution()
{
CALL_SUBTEST(test_evals());
CALL_SUBTEST(test_expr());
CALL_SUBTEST(test_modes());
CALL_SUBTEST(test_strides());
CALL_SUBTEST(test_evals<ColMajor>());
CALL_SUBTEST(test_evals<RowMajor>());
CALL_SUBTEST(test_expr<ColMajor>());
CALL_SUBTEST(test_expr<RowMajor>());
CALL_SUBTEST(test_modes<ColMajor>());
CALL_SUBTEST(test_modes<RowMajor>());
CALL_SUBTEST(test_strides<ColMajor>());
CALL_SUBTEST(test_strides<RowMajor>());
}

View File

@ -117,11 +117,10 @@ void test_cuda_elementwise()
}
}
void test_cuda_reduction()
{
Tensor<float, 4> in1(Eigen::array<int, 4>(72,53,97,113));
Tensor<float, 2> out(Eigen::array<int, 2>(72,97));
Tensor<float, 4> in1(72,53,97,113);
Tensor<float, 2> out(72,97);
in1.setRandom();
std::size_t in1_bytes = in1.size() * sizeof(float);
@ -138,8 +137,8 @@ void test_cuda_reduction()
assert(cudaStreamCreate(&stream) == cudaSuccess);
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_in1(d_in1, Eigen::array<int, 4>(72,53,97,113));
Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, Eigen::array<int, 2>(72,97));
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_in1(d_in1, 72,53,97,113);
Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97);
array<int, 2> reduction_axis;
reduction_axis[0] = 1;
@ -156,10 +155,10 @@ void test_cuda_reduction()
for (int k = 0; k < 53; ++k) {
for (int l = 0; l < 113; ++l) {
expected =
std::max<float>(expected, in1(Eigen::array<int, 4>(i, k, j, l)));
std::max<float>(expected, in1(i, k, j, l));
}
}
VERIFY_IS_APPROX(out(Eigen::array<int, 2>(i,j)), expected);
VERIFY_IS_APPROX(out(i,j), expected);
}
}
}
@ -170,7 +169,7 @@ static void test_cuda_contraction()
// with these dimensions, the output has 300 * 140 elements, which is
// more than 30 * 1024, which is the number of threads in blocks on
// a 15 SM GK110 GPU
Tensor<float, 4, DataLayout> t_left(Eigen::array<int, 4>(6, 50, 3, 31));
Tensor<float, 4, DataLayout> t_left(6, 50, 3, 31);
Tensor<float, 5, DataLayout> t_right(Eigen::array<int, 5>(3, 31, 7, 20, 1));
Tensor<float, 5, DataLayout> t_result(Eigen::array<int, 5>(6, 50, 7, 20, 1));
@ -196,12 +195,9 @@ static void test_cuda_contraction()
assert(cudaStreamCreate(&stream) == cudaSuccess);
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> >
gpu_t_left(d_t_left, Eigen::array<int, 4>(6, 50, 3, 31));
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> >
gpu_t_right(d_t_right, Eigen::array<int, 5>(3, 31, 7, 20, 1));
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> >
gpu_t_result(d_t_result, Eigen::array<int, 5>(6, 50, 7, 20, 1));
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_t_left(d_t_left, 6, 50, 3, 31);
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_t_right(d_t_right, 3, 31, 7, 20, 1);
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_t_result(d_t_result, 6, 50, 7, 20, 1);
typedef Eigen::Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> > MapXf;
MapXf m_left(t_left.data(), 300, 93);
@ -226,11 +222,12 @@ static void test_cuda_contraction()
}
}
template<int DataLayout>
static void test_cuda_convolution_1d()
{
Tensor<float, 4> input(Eigen::array<int, 4>(74,37,11,137));
Tensor<float, 1> kernel(Eigen::array<int, 1>(4));
Tensor<float, 4> out(Eigen::array<int, 4>(74,34,11,137));
Tensor<float, 4, DataLayout> input(74,37,11,137);
Tensor<float, 1, DataLayout> kernel(4);
Tensor<float, 4, DataLayout> out(74,34,11,137);
input = input.constant(10.0f) + input.random();
kernel = kernel.constant(7.0f) + kernel.random();
@ -252,9 +249,9 @@ static void test_cuda_convolution_1d()
assert(cudaStreamCreate(&stream) == cudaSuccess);
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_input(d_input, Eigen::array<int, 4>(74,37,11,137));
Eigen::TensorMap<Eigen::Tensor<float, 1> > gpu_kernel(d_kernel, Eigen::array<int, 1>(4));
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_out(d_out, Eigen::array<int, 4>(74,34,11,137));
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_input(d_input, 74,37,11,137);
Eigen::TensorMap<Eigen::Tensor<float, 1, DataLayout> > gpu_kernel(d_kernel, 4);
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out, 74,34,11,137);
Eigen::array<int, 1> dims(1);
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
@ -266,11 +263,9 @@ static void test_cuda_convolution_1d()
for (int j = 0; j < 34; ++j) {
for (int k = 0; k < 11; ++k) {
for (int l = 0; l < 137; ++l) {
const float result = out(Eigen::array<int, 4>(i,j,k,l));
const float expected = input(Eigen::array<int, 4>(i,j+0,k,l)) * kernel(Eigen::array<int, 1>(0)) +
input(Eigen::array<int, 4>(i,j+1,k,l)) * kernel(Eigen::array<int, 1>(1)) +
input(Eigen::array<int, 4>(i,j+2,k,l)) * kernel(Eigen::array<int, 1>(2)) +
input(Eigen::array<int, 4>(i,j+3,k,l)) * kernel(Eigen::array<int, 1>(3));
const float result = out(i,j,k,l);
const float expected = input(i,j+0,k,l) * kernel(0) + input(i,j+1,k,l) * kernel(1) +
input(i,j+2,k,l) * kernel(2) + input(i,j+3,k,l) * kernel(3);
VERIFY_IS_APPROX(result, expected);
}
}
@ -278,12 +273,11 @@ static void test_cuda_convolution_1d()
}
}
static void test_cuda_convolution_2d()
static void test_cuda_convolution_inner_dim_col_major_1d()
{
Tensor<float, 4> input(Eigen::array<int, 4>(74,37,11,137));
Tensor<float, 2> kernel(Eigen::array<int, 2>(3,4));
Tensor<float, 4> out(Eigen::array<int, 4>(74,35,8,137));
Tensor<float, 4, ColMajor> input(74,9,11,7);
Tensor<float, 1, ColMajor> kernel(4);
Tensor<float, 4, ColMajor> out(71,9,11,7);
input = input.constant(10.0f) + input.random();
kernel = kernel.constant(7.0f) + kernel.random();
@ -305,9 +299,110 @@ static void test_cuda_convolution_2d()
assert(cudaStreamCreate(&stream) == cudaSuccess);
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_input(d_input, Eigen::array<int, 4>(74,37,11,137));
Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_kernel(d_kernel, Eigen::array<int, 2>(3,4));
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_out(d_out, Eigen::array<int, 4>(74,35,8,137));
Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_input(d_input,74,9,11,7);
Eigen::TensorMap<Eigen::Tensor<float, 1, ColMajor> > gpu_kernel(d_kernel,4);
Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_out(d_out,71,9,11,7);
Eigen::array<int, 1> dims(0);
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
for (int i = 0; i < 71; ++i) {
for (int j = 0; j < 9; ++j) {
for (int k = 0; k < 11; ++k) {
for (int l = 0; l < 7; ++l) {
const float result = out(i,j,k,l);
const float expected = input(i+0,j,k,l) * kernel(0) + input(i+1,j,k,l) * kernel(1) +
input(i+2,j,k,l) * kernel(2) + input(i+3,j,k,l) * kernel(3);
VERIFY_IS_APPROX(result, expected);
}
}
}
}
}
static void test_cuda_convolution_inner_dim_row_major_1d()
{
Tensor<float, 4, RowMajor> input(7,9,11,74);
Tensor<float, 1, RowMajor> kernel(4);
Tensor<float, 4, RowMajor> out(7,9,11,71);
input = input.constant(10.0f) + input.random();
kernel = kernel.constant(7.0f) + kernel.random();
std::size_t input_bytes = input.size() * sizeof(float);
std::size_t kernel_bytes = kernel.size() * sizeof(float);
std::size_t out_bytes = out.size() * sizeof(float);
float* d_input;
float* d_kernel;
float* d_out;
cudaMalloc((void**)(&d_input), input_bytes);
cudaMalloc((void**)(&d_kernel), kernel_bytes);
cudaMalloc((void**)(&d_out), out_bytes);
cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice);
cudaStream_t stream;
assert(cudaStreamCreate(&stream) == cudaSuccess);
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_input(d_input, 7,9,11,74);
Eigen::TensorMap<Eigen::Tensor<float, 1, RowMajor> > gpu_kernel(d_kernel, 4);
Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_out(d_out, 7,9,11,71);
Eigen::array<int, 1> dims(3);
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
for (int i = 0; i < 7; ++i) {
for (int j = 0; j < 9; ++j) {
for (int k = 0; k < 11; ++k) {
for (int l = 0; l < 71; ++l) {
const float result = out(i,j,k,l);
const float expected = input(i,j,k,l+0) * kernel(0) + input(i,j,k,l+1) * kernel(1) +
input(i,j,k,l+2) * kernel(2) + input(i,j,k,l+3) * kernel(3);
VERIFY_IS_APPROX(result, expected);
}
}
}
}
}
template<int DataLayout>
static void test_cuda_convolution_2d()
{
Tensor<float, 4, DataLayout> input(74,37,11,137);
Tensor<float, 2, DataLayout> kernel(3,4);
Tensor<float, 4, DataLayout> out(74,35,8,137);
input = input.constant(10.0f) + input.random();
kernel = kernel.constant(7.0f) + kernel.random();
std::size_t input_bytes = input.size() * sizeof(float);
std::size_t kernel_bytes = kernel.size() * sizeof(float);
std::size_t out_bytes = out.size() * sizeof(float);
float* d_input;
float* d_kernel;
float* d_out;
cudaMalloc((void**)(&d_input), input_bytes);
cudaMalloc((void**)(&d_kernel), kernel_bytes);
cudaMalloc((void**)(&d_out), out_bytes);
cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice);
cudaStream_t stream;
assert(cudaStreamCreate(&stream) == cudaSuccess);
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_input(d_input,74,37,11,137);
Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > gpu_kernel(d_kernel,3,4);
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out,74,35,8,137);
Eigen::array<int, 2> dims(1,2);
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
@ -319,32 +414,32 @@ static void test_cuda_convolution_2d()
for (int j = 0; j < 35; ++j) {
for (int k = 0; k < 8; ++k) {
for (int l = 0; l < 137; ++l) {
const float result = out(Eigen::array<int, 4>(i,j,k,l));
const float expected = input(Eigen::array<int, 4>(i,j+0,k+0,l)) * kernel(Eigen::array<int, 2>(0,0)) +
input(Eigen::array<int, 4>(i,j+1,k+0,l)) * kernel(Eigen::array<int, 2>(1,0)) +
input(Eigen::array<int, 4>(i,j+2,k+0,l)) * kernel(Eigen::array<int, 2>(2,0)) +
input(Eigen::array<int, 4>(i,j+0,k+1,l)) * kernel(Eigen::array<int, 2>(0,1)) +
input(Eigen::array<int, 4>(i,j+1,k+1,l)) * kernel(Eigen::array<int, 2>(1,1)) +
input(Eigen::array<int, 4>(i,j+2,k+1,l)) * kernel(Eigen::array<int, 2>(2,1)) +
input(Eigen::array<int, 4>(i,j+0,k+2,l)) * kernel(Eigen::array<int, 2>(0,2)) +
input(Eigen::array<int, 4>(i,j+1,k+2,l)) * kernel(Eigen::array<int, 2>(1,2)) +
input(Eigen::array<int, 4>(i,j+2,k+2,l)) * kernel(Eigen::array<int, 2>(2,2)) +
input(Eigen::array<int, 4>(i,j+0,k+3,l)) * kernel(Eigen::array<int, 2>(0,3)) +
input(Eigen::array<int, 4>(i,j+1,k+3,l)) * kernel(Eigen::array<int, 2>(1,3)) +
input(Eigen::array<int, 4>(i,j+2,k+3,l)) * kernel(Eigen::array<int, 2>(2,3));
VERIFY_IS_APPROX(result, expected);
const float result = out(i,j,k,l);
const float expected = input(i,j+0,k+0,l) * kernel(0,0) +
input(i,j+1,k+0,l) * kernel(1,0) +
input(i,j+2,k+0,l) * kernel(2,0) +
input(i,j+0,k+1,l) * kernel(0,1) +
input(i,j+1,k+1,l) * kernel(1,1) +
input(i,j+2,k+1,l) * kernel(2,1) +
input(i,j+0,k+2,l) * kernel(0,2) +
input(i,j+1,k+2,l) * kernel(1,2) +
input(i,j+2,k+2,l) * kernel(2,2) +
input(i,j+0,k+3,l) * kernel(0,3) +
input(i,j+1,k+3,l) * kernel(1,3) +
input(i,j+2,k+3,l) * kernel(2,3);
VERIFY_IS_APPROX(result, expected);
}
}
}
}
}
template<int DataLayout>
static void test_cuda_convolution_3d()
{
Tensor<float, 5> input(Eigen::array<int, 5>(74,37,11,137,17));
Tensor<float, 3> kernel(Eigen::array<int, 3>(3,4,2));
Tensor<float, 5> out(Eigen::array<int, 5>(74,35,8,136,17));
Tensor<float, 5, DataLayout> input(Eigen::array<int, 5>(74,37,11,137,17));
Tensor<float, 3, DataLayout> kernel(3,4,2);
Tensor<float, 5, DataLayout> out(Eigen::array<int, 5>(74,35,8,136,17));
input = input.constant(10.0f) + input.random();
kernel = kernel.constant(7.0f) + kernel.random();
@ -366,9 +461,9 @@ static void test_cuda_convolution_3d()
assert(cudaStreamCreate(&stream) == cudaSuccess);
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 5> > gpu_input(d_input, Eigen::array<int, 5>(74,37,11,137,17));
Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_kernel(d_kernel, Eigen::array<int, 3>(3,4,2));
Eigen::TensorMap<Eigen::Tensor<float, 5> > gpu_out(d_out, Eigen::array<int, 5>(74,35,8,136,17));
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_input(d_input,74,37,11,137,17);
Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> > gpu_kernel(d_kernel,3,4,2);
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_out(d_out,74,35,8,136,17);
Eigen::array<int, 3> dims(1,2,3);
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
@ -381,31 +476,31 @@ static void test_cuda_convolution_3d()
for (int k = 0; k < 8; ++k) {
for (int l = 0; l < 136; ++l) {
for (int m = 0; m < 17; ++m) {
const float result = out(Eigen::array<int, 5>(i,j,k,l,m));
const float expected = input(Eigen::array<int, 5>(i,j+0,k+0,l+0,m)) * kernel(Eigen::array<int, 3>(0,0,0)) +
input(Eigen::array<int, 5>(i,j+1,k+0,l+0,m)) * kernel(Eigen::array<int, 3>(1,0,0)) +
input(Eigen::array<int, 5>(i,j+2,k+0,l+0,m)) * kernel(Eigen::array<int, 3>(2,0,0)) +
input(Eigen::array<int, 5>(i,j+0,k+1,l+0,m)) * kernel(Eigen::array<int, 3>(0,1,0)) +
input(Eigen::array<int, 5>(i,j+1,k+1,l+0,m)) * kernel(Eigen::array<int, 3>(1,1,0)) +
input(Eigen::array<int, 5>(i,j+2,k+1,l+0,m)) * kernel(Eigen::array<int, 3>(2,1,0)) +
input(Eigen::array<int, 5>(i,j+0,k+2,l+0,m)) * kernel(Eigen::array<int, 3>(0,2,0)) +
input(Eigen::array<int, 5>(i,j+1,k+2,l+0,m)) * kernel(Eigen::array<int, 3>(1,2,0)) +
input(Eigen::array<int, 5>(i,j+2,k+2,l+0,m)) * kernel(Eigen::array<int, 3>(2,2,0)) +
input(Eigen::array<int, 5>(i,j+0,k+3,l+0,m)) * kernel(Eigen::array<int, 3>(0,3,0)) +
input(Eigen::array<int, 5>(i,j+1,k+3,l+0,m)) * kernel(Eigen::array<int, 3>(1,3,0)) +
input(Eigen::array<int, 5>(i,j+2,k+3,l+0,m)) * kernel(Eigen::array<int, 3>(2,3,0)) +
input(Eigen::array<int, 5>(i,j+0,k+0,l+1,m)) * kernel(Eigen::array<int, 3>(0,0,1)) +
input(Eigen::array<int, 5>(i,j+1,k+0,l+1,m)) * kernel(Eigen::array<int, 3>(1,0,1)) +
input(Eigen::array<int, 5>(i,j+2,k+0,l+1,m)) * kernel(Eigen::array<int, 3>(2,0,1)) +
input(Eigen::array<int, 5>(i,j+0,k+1,l+1,m)) * kernel(Eigen::array<int, 3>(0,1,1)) +
input(Eigen::array<int, 5>(i,j+1,k+1,l+1,m)) * kernel(Eigen::array<int, 3>(1,1,1)) +
input(Eigen::array<int, 5>(i,j+2,k+1,l+1,m)) * kernel(Eigen::array<int, 3>(2,1,1)) +
input(Eigen::array<int, 5>(i,j+0,k+2,l+1,m)) * kernel(Eigen::array<int, 3>(0,2,1)) +
input(Eigen::array<int, 5>(i,j+1,k+2,l+1,m)) * kernel(Eigen::array<int, 3>(1,2,1)) +
input(Eigen::array<int, 5>(i,j+2,k+2,l+1,m)) * kernel(Eigen::array<int, 3>(2,2,1)) +
input(Eigen::array<int, 5>(i,j+0,k+3,l+1,m)) * kernel(Eigen::array<int, 3>(0,3,1)) +
input(Eigen::array<int, 5>(i,j+1,k+3,l+1,m)) * kernel(Eigen::array<int, 3>(1,3,1)) +
input(Eigen::array<int, 5>(i,j+2,k+3,l+1,m)) * kernel(Eigen::array<int, 3>(2,3,1));
const float result = out(i,j,k,l,m);
const float expected = input(i,j+0,k+0,l+0,m) * kernel(0,0,0) +
input(i,j+1,k+0,l+0,m) * kernel(1,0,0) +
input(i,j+2,k+0,l+0,m) * kernel(2,0,0) +
input(i,j+0,k+1,l+0,m) * kernel(0,1,0) +
input(i,j+1,k+1,l+0,m) * kernel(1,1,0) +
input(i,j+2,k+1,l+0,m) * kernel(2,1,0) +
input(i,j+0,k+2,l+0,m) * kernel(0,2,0) +
input(i,j+1,k+2,l+0,m) * kernel(1,2,0) +
input(i,j+2,k+2,l+0,m) * kernel(2,2,0) +
input(i,j+0,k+3,l+0,m) * kernel(0,3,0) +
input(i,j+1,k+3,l+0,m) * kernel(1,3,0) +
input(i,j+2,k+3,l+0,m) * kernel(2,3,0) +
input(i,j+0,k+0,l+1,m) * kernel(0,0,1) +
input(i,j+1,k+0,l+1,m) * kernel(1,0,1) +
input(i,j+2,k+0,l+1,m) * kernel(2,0,1) +
input(i,j+0,k+1,l+1,m) * kernel(0,1,1) +
input(i,j+1,k+1,l+1,m) * kernel(1,1,1) +
input(i,j+2,k+1,l+1,m) * kernel(2,1,1) +
input(i,j+0,k+2,l+1,m) * kernel(0,2,1) +
input(i,j+1,k+2,l+1,m) * kernel(1,2,1) +
input(i,j+2,k+2,l+1,m) * kernel(2,2,1) +
input(i,j+0,k+3,l+1,m) * kernel(0,3,1) +
input(i,j+1,k+3,l+1,m) * kernel(1,3,1) +
input(i,j+2,k+3,l+1,m) * kernel(2,3,1);
VERIFY_IS_APPROX(result, expected);
}
}
@ -414,91 +509,6 @@ static void test_cuda_convolution_3d()
}
}
static float* CudaCopyFloat(float* data, int size) {
const int nbytes = size * sizeof(float);
float* result = NULL;
if (cudaMalloc((void**)(&result), nbytes) != cudaSuccess) {
return NULL;
} else {
if (data != NULL) {
cudaMemcpy(result, data, nbytes, cudaMemcpyHostToDevice);
}
return result;
}
}
static void test_cuda_constant_broadcast()
{
cudaStream_t stream;
assert(cudaStreamCreate(&stream) == cudaSuccess);
Eigen::GpuDevice gpu_device(&stream);
Tensor<float, 1> t1(10);
for (int i = 0; i < 10; ++i) {
t1(i) = 10.0f * i;
}
float* t1_cuda = CudaCopyFloat(t1.data(), t1.size());
Eigen::TensorMap<Eigen::Tensor<float, 1> > t1_gpu(t1_cuda, 10);
Tensor<float, 1> t2(1);
t2 = t2.constant(20.0f);
float* t2_cuda = CudaCopyFloat(t2.data(), t2.size());
Eigen::TensorMap<Eigen::TensorFixedSize<float, Sizes<1> > > t2_gpu(t2_cuda, 1);
float* t3_cuda = CudaCopyFloat(NULL, 10);
Eigen::TensorMap<Eigen::Tensor<float, 1> > t3_gpu(t3_cuda, 10);
t3_gpu.device(gpu_device) =
t1_gpu + t2_gpu.broadcast(Eigen::array<int, 1>(10));
Eigen::Tensor<float, 1> t3(10);
cudaMemcpy(t3.data(), t3_gpu.data(), 10 * sizeof(float),
cudaMemcpyDeviceToHost);
for (int i = 0; i < 10; ++i) {
VERIFY_IS_APPROX(t3(i), t1(i) + t2(0));
}
}
void test_cuda_cast()
{
Tensor<double, 3> in(Eigen::array<int, 3>(72,53,97));
Tensor<float, 3> out(Eigen::array<int, 3>(72,53,97));
in.setRandom();
std::size_t in_bytes = in.size() * sizeof(double);
std::size_t out_bytes = out.size() * sizeof(float);
double* d_in;
float* d_out;
cudaMalloc((void**)(&d_in), in_bytes);
cudaMalloc((void**)(&d_out), out_bytes);
cudaMemcpy(d_in, in.data(), in_bytes, cudaMemcpyHostToDevice);
cudaStream_t stream;
assert(cudaStreamCreate(&stream) == cudaSuccess);
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<double, 3> > gpu_in(d_in, Eigen::array<int, 3>(72,53,97));
Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<int, 3>(72,53,97));
gpu_out.device(gpu_device) = gpu_in.template cast<float>();
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
for (int i = 0; i < 72; ++i) {
for (int j = 0; j < 53; ++j) {
for (int k = 0; k < 97; ++k) {
VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), static_cast<float>(in(Eigen::array<int, 3>(i,j,k))));
}
}
}
}
void test_cxx11_tensor_cuda()
{
CALL_SUBTEST(test_cuda_elementwise_small());
@ -506,9 +516,12 @@ void test_cxx11_tensor_cuda()
CALL_SUBTEST(test_cuda_reduction());
CALL_SUBTEST(test_cuda_contraction<ColMajor>());
CALL_SUBTEST(test_cuda_contraction<RowMajor>());
CALL_SUBTEST(test_cuda_convolution_1d());
CALL_SUBTEST(test_cuda_convolution_2d());
CALL_SUBTEST(test_cuda_convolution_3d());
CALL_SUBTEST(test_cuda_constant_broadcast());
CALL_SUBTEST(test_cuda_cast());
CALL_SUBTEST(test_cuda_convolution_1d<ColMajor>());
CALL_SUBTEST(test_cuda_convolution_1d<RowMajor>());
CALL_SUBTEST(test_cuda_convolution_inner_dim_col_major_1d());
CALL_SUBTEST(test_cuda_convolution_inner_dim_row_major_1d());
CALL_SUBTEST(test_cuda_convolution_2d<ColMajor>());
CALL_SUBTEST(test_cuda_convolution_2d<RowMajor>());
CALL_SUBTEST(test_cuda_convolution_3d<ColMajor>());
CALL_SUBTEST(test_cuda_convolution_3d<RowMajor>());
}

Some files were not shown because too many files have changed in this diff Show More