mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 19:29:02 +08:00
Merged default into unary-array-cwise-functors
This commit is contained in:
commit
0250f4a9f2
@ -168,6 +168,11 @@ if(NOT MSVC)
|
||||
else()
|
||||
ei_add_cxx_compiler_flag("-ansi")
|
||||
endif()
|
||||
|
||||
if(ANDROID_NDK)
|
||||
ei_add_cxx_compiler_flag("-pie")
|
||||
ei_add_cxx_compiler_flag("-fPIE")
|
||||
endif()
|
||||
|
||||
set(CMAKE_REQUIRED_FLAGS "")
|
||||
|
||||
@ -208,7 +213,7 @@ if(NOT MSVC)
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_FMA "Enable/Disable FMA in tests/examples" OFF)
|
||||
if(EIGEN_TEST_FMA)
|
||||
if(EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma")
|
||||
message(STATUS "Enabling FMA in tests/examples")
|
||||
endif()
|
||||
@ -227,7 +232,12 @@ if(NOT MSVC)
|
||||
|
||||
option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
|
||||
if(EIGEN_TEST_NEON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mfloat-abi=softfp")
|
||||
if(EIGEN_TEST_FMA)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon-vfpv4")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp")
|
||||
message(STATUS "Enabling NEON in tests/examples")
|
||||
endif()
|
||||
|
||||
|
@ -308,6 +308,7 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/arch/NEON/PacketMath.h"
|
||||
#include "src/Core/arch/NEON/MathFunctions.h"
|
||||
#include "src/Core/arch/NEON/Complex.h"
|
||||
#include "src/Core/arch/NEON/BlockingSizesLookupTables.h"
|
||||
#endif
|
||||
|
||||
#if defined EIGEN_VECTORIZE_CUDA
|
||||
@ -381,6 +382,7 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/Inverse.h"
|
||||
#include "src/Core/TriangularMatrix.h"
|
||||
#include "src/Core/SelfAdjointView.h"
|
||||
#include "src/Core/products/LookupBlockingSizesTable.h"
|
||||
#include "src/Core/products/GeneralBlockPanelKernel.h"
|
||||
#include "src/Core/products/Parallelizer.h"
|
||||
#include "src/Core/ProductEvaluators.h"
|
||||
|
@ -17,7 +17,7 @@
|
||||
*
|
||||
* These iterative solvers are associated with some preconditioners:
|
||||
* - IdentityPreconditioner - not really useful
|
||||
* - DiagonalPreconditioner - also called JAcobi preconditioner, work very well on diagonal dominant matrices.
|
||||
* - DiagonalPreconditioner - also called Jacobi preconditioner, work very well on diagonal dominant matrices.
|
||||
* - IncompleteLUT - incomplete LU factorization with dual thresholding
|
||||
*
|
||||
* Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport.
|
||||
|
@ -226,6 +226,11 @@ template<typename _MatrixType, int _UpLo> class LDLT
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.
|
||||
@ -424,6 +429,8 @@ template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
|
||||
template<typename MatrixType, int _UpLo>
|
||||
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
eigen_assert(a.rows()==a.cols());
|
||||
const Index size = a.rows();
|
||||
|
||||
|
@ -170,6 +170,12 @@ template<typename _MatrixType, int _UpLo> class LLT
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* Used to compute and store L
|
||||
* The strict upper part is not used and even not initialized.
|
||||
@ -377,6 +383,8 @@ template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
|
||||
template<typename MatrixType, int _UpLo>
|
||||
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const MatrixType& a)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
eigen_assert(a.rows()==a.cols());
|
||||
const Index size = a.rows();
|
||||
m_matrix.resize(size, size);
|
||||
|
@ -647,11 +647,15 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
HasNoStride = HasNoInnerStride && HasNoOuterStride,
|
||||
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
|
||||
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
|
||||
|
||||
// TODO: should check for smaller packet types once we can handle multi-sized packet types
|
||||
AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
|
||||
|
||||
KeepsPacketAccess = bool(HasNoInnerStride)
|
||||
&& ( bool(IsDynamicSize)
|
||||
|| HasNoOuterStride
|
||||
|| ( OuterStrideAtCompileTime!=Dynamic
|
||||
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ),
|
||||
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime) % AlignBytes)==0 ) ),
|
||||
Flags0 = evaluator<PlainObjectType>::Flags,
|
||||
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
|
||||
Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
|
||||
@ -717,7 +721,10 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
|
||||
&& (InnerStrideAtCompileTime == 1)
|
||||
? PacketAccessBit : 0,
|
||||
|
||||
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0,
|
||||
// TODO: should check for smaller packet types once we can handle multi-sized packet types
|
||||
AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
|
||||
|
||||
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignedBit : 0,
|
||||
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
|
||||
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
|
||||
Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
|
||||
@ -825,12 +832,16 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc
|
||||
typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
|
||||
{
|
||||
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
|
||||
: mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
|
||||
{
|
||||
// TODO: should check for smaller packet types once we can handle multi-sized packet types
|
||||
const int AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(AlignBytes)
|
||||
// FIXME this should be an internal assertion
|
||||
eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned");
|
||||
eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % AlignBytes) == 0) && "data is not aligned");
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -300,9 +300,10 @@ template<typename Derived>
|
||||
bool DenseBase<Derived>::isApproxToConstant
|
||||
(const Scalar& val, const RealScalar& prec) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,1>::type self(derived());
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = 0; i < rows(); ++i)
|
||||
if(!internal::isApprox(this->coeff(i, j), val, prec))
|
||||
if(!internal::isApprox(self.coeff(i, j), val, prec))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
@ -484,9 +485,10 @@ DenseBase<Derived>::Zero()
|
||||
template<typename Derived>
|
||||
bool DenseBase<Derived>::isZero(const RealScalar& prec) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,1>::type self(derived());
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = 0; i < rows(); ++i)
|
||||
if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<Scalar>(1), prec))
|
||||
if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<Scalar>(1), prec))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
@ -719,18 +721,19 @@ template<typename Derived>
|
||||
bool MatrixBase<Derived>::isIdentity
|
||||
(const RealScalar& prec) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,1>::type self(derived());
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
{
|
||||
for(Index i = 0; i < rows(); ++i)
|
||||
{
|
||||
if(i == j)
|
||||
{
|
||||
if(!internal::isApprox(this->coeff(i, j), static_cast<Scalar>(1), prec))
|
||||
if(!internal::isApprox(self.coeff(i, j), static_cast<Scalar>(1), prec))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast<RealScalar>(1), prec))
|
||||
if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<RealScalar>(1), prec))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -34,14 +34,35 @@ void check_static_allocation_size()
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T, int Size, typename Packet = typename packet_traits<T>::type,
|
||||
bool Match = bool((Size%unpacket_traits<Packet>::size)==0),
|
||||
bool TryHalf = bool(int(unpacket_traits<Packet>::size) > 1)
|
||||
&& bool(int(unpacket_traits<Packet>::size) > int(unpacket_traits<typename unpacket_traits<Packet>::half>::size)) >
|
||||
struct compute_default_alignment
|
||||
{
|
||||
enum { value = 0 };
|
||||
};
|
||||
|
||||
template<typename T, int Size, typename Packet, bool TryHalf>
|
||||
struct compute_default_alignment<T, Size, Packet, true, TryHalf> // Match
|
||||
{
|
||||
enum { value = sizeof(T) * unpacket_traits<Packet>::size };
|
||||
};
|
||||
|
||||
template<typename T, int Size, typename Packet>
|
||||
struct compute_default_alignment<T, Size, Packet, false, true> // Try-half
|
||||
{
|
||||
// current packet too large, try with an half-packet
|
||||
enum { value = compute_default_alignment<T, Size, typename unpacket_traits<Packet>::half>::value };
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:
|
||||
* to 16 bytes boundary if the total size is a multiple of 16 bytes.
|
||||
*/
|
||||
template <typename T, int Size, int MatrixOrArrayOptions,
|
||||
int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
|
||||
: (((Size*sizeof(T))%EIGEN_ALIGN_BYTES)==0) ? EIGEN_ALIGN_BYTES
|
||||
: 0 >
|
||||
: compute_default_alignment<T,Size>::value >
|
||||
struct plain_array
|
||||
{
|
||||
T array[Size];
|
||||
@ -81,14 +102,71 @@ struct plain_array
|
||||
#endif
|
||||
|
||||
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, 8>
|
||||
{
|
||||
EIGEN_USER_ALIGN_DEFAULT T array[Size];
|
||||
EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(EIGEN_ALIGN_BYTES-1);
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, 16>
|
||||
{
|
||||
EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, 32>
|
||||
{
|
||||
EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, 64>
|
||||
{
|
||||
EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
|
@ -224,13 +224,13 @@ bool MatrixBase<Derived>::isOrthogonal
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isUnitary(const RealScalar& prec) const
|
||||
{
|
||||
typename Derived::Nested nested(derived());
|
||||
typename internal::nested_eval<Derived,1>::type self(derived());
|
||||
for(Index i = 0; i < cols(); ++i)
|
||||
{
|
||||
if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
|
||||
if(!internal::isApprox(self.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
|
||||
return false;
|
||||
for(Index j = 0; j < i; ++j)
|
||||
if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast<Scalar>(1), prec))
|
||||
if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast<Scalar>(1), prec))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -328,6 +328,7 @@ struct hypot_impl
|
||||
p = _y;
|
||||
qp = _x / p;
|
||||
}
|
||||
if(p==RealScalar(0)) return RealScalar(0);
|
||||
return p * sqrt(RealScalar(1) + qp*qp);
|
||||
}
|
||||
};
|
||||
@ -560,48 +561,48 @@ struct random_default_impl<Scalar, false, false>
|
||||
};
|
||||
|
||||
enum {
|
||||
floor_log2_terminate,
|
||||
floor_log2_move_up,
|
||||
floor_log2_move_down,
|
||||
floor_log2_bogus
|
||||
meta_floor_log2_terminate,
|
||||
meta_floor_log2_move_up,
|
||||
meta_floor_log2_move_down,
|
||||
meta_floor_log2_bogus
|
||||
};
|
||||
|
||||
template<unsigned int n, int lower, int upper> struct floor_log2_selector
|
||||
template<unsigned int n, int lower, int upper> struct meta_floor_log2_selector
|
||||
{
|
||||
enum { middle = (lower + upper) / 2,
|
||||
value = (upper <= lower + 1) ? int(floor_log2_terminate)
|
||||
: (n < (1 << middle)) ? int(floor_log2_move_down)
|
||||
: (n==0) ? int(floor_log2_bogus)
|
||||
: int(floor_log2_move_up)
|
||||
value = (upper <= lower + 1) ? int(meta_floor_log2_terminate)
|
||||
: (n < (1 << middle)) ? int(meta_floor_log2_move_down)
|
||||
: (n==0) ? int(meta_floor_log2_bogus)
|
||||
: int(meta_floor_log2_move_up)
|
||||
};
|
||||
};
|
||||
|
||||
template<unsigned int n,
|
||||
int lower = 0,
|
||||
int upper = sizeof(unsigned int) * CHAR_BIT - 1,
|
||||
int selector = floor_log2_selector<n, lower, upper>::value>
|
||||
struct floor_log2 {};
|
||||
int selector = meta_floor_log2_selector<n, lower, upper>::value>
|
||||
struct meta_floor_log2 {};
|
||||
|
||||
template<unsigned int n, int lower, int upper>
|
||||
struct floor_log2<n, lower, upper, floor_log2_move_down>
|
||||
struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down>
|
||||
{
|
||||
enum { value = floor_log2<n, lower, floor_log2_selector<n, lower, upper>::middle>::value };
|
||||
enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value };
|
||||
};
|
||||
|
||||
template<unsigned int n, int lower, int upper>
|
||||
struct floor_log2<n, lower, upper, floor_log2_move_up>
|
||||
struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up>
|
||||
{
|
||||
enum { value = floor_log2<n, floor_log2_selector<n, lower, upper>::middle, upper>::value };
|
||||
enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value };
|
||||
};
|
||||
|
||||
template<unsigned int n, int lower, int upper>
|
||||
struct floor_log2<n, lower, upper, floor_log2_terminate>
|
||||
struct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate>
|
||||
{
|
||||
enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower };
|
||||
};
|
||||
|
||||
template<unsigned int n, int lower, int upper>
|
||||
struct floor_log2<n, lower, upper, floor_log2_bogus>
|
||||
struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>
|
||||
{
|
||||
// no value, error at compile time
|
||||
};
|
||||
@ -609,11 +610,24 @@ struct floor_log2<n, lower, upper, floor_log2_bogus>
|
||||
template<typename Scalar>
|
||||
struct random_default_impl<Scalar, false, true>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::NonInteger NonInteger;
|
||||
|
||||
static inline Scalar run(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
return x + Scalar((NonInteger(y)-x+1) * std::rand() / (RAND_MAX + NonInteger(1)));
|
||||
{
|
||||
using std::max;
|
||||
using std::min;
|
||||
typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
|
||||
if(y<x)
|
||||
return x;
|
||||
std::size_t range = ScalarX(y)-ScalarX(x);
|
||||
std::size_t offset = 0;
|
||||
// rejection sampling
|
||||
std::size_t divisor = (range+RAND_MAX-1)/(range+1);
|
||||
std::size_t multiplier = (range+RAND_MAX-1)/std::size_t(RAND_MAX);
|
||||
|
||||
do {
|
||||
offset = ( (std::size_t(std::rand()) * multiplier) / divisor );
|
||||
} while (offset > range);
|
||||
|
||||
return Scalar(ScalarX(x) + offset);
|
||||
}
|
||||
|
||||
static inline Scalar run()
|
||||
@ -621,7 +635,7 @@ struct random_default_impl<Scalar, false, true>
|
||||
#ifdef EIGEN_MAKING_DOCS
|
||||
return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
|
||||
#else
|
||||
enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value,
|
||||
enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
|
||||
scalar_bits = sizeof(Scalar) * CHAR_BIT,
|
||||
shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
|
||||
offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0
|
||||
|
@ -409,7 +409,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
|
||||
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
|
||||
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
|
||||
CoeffReadCost = (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic
|
||||
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
|
||||
: (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits<Scalar>::AddCost==Dynamic || NumTraits<Scalar>::MulCost==Dynamic) ? Dynamic
|
||||
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
|
||||
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
|
||||
|
||||
@ -484,7 +485,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
{
|
||||
PacketScalar res;
|
||||
typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
|
||||
Unroll ? InnerSize-1 : Dynamic,
|
||||
Unroll ? InnerSize : Dynamic,
|
||||
LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl;
|
||||
|
||||
PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
|
||||
@ -527,7 +528,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||
{
|
||||
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode>(UnrollingIndex-1, col), res);
|
||||
}
|
||||
};
|
||||
|
||||
@ -537,12 +538,12 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||
{
|
||||
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
|
||||
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
@ -551,7 +552,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
@ -559,14 +560,31 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pset1<Packet>(0);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
|
||||
{
|
||||
res = pset1<Packet>(0);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||
struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
||||
{
|
||||
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
|
||||
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||
for(Index i = 1; i < innerDim; ++i)
|
||||
res = pset1<Packet>(0);
|
||||
for(Index i = 0; i < innerDim; ++i)
|
||||
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
|
||||
}
|
||||
};
|
||||
@ -576,9 +594,8 @@ struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
||||
{
|
||||
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
|
||||
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||
for(Index i = 1; i < innerDim; ++i)
|
||||
res = pset1<Packet>(0);
|
||||
for(Index i = 0; i < innerDim; ++i)
|
||||
res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
|
||||
}
|
||||
};
|
||||
@ -678,8 +695,7 @@ public:
|
||||
//_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
|
||||
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
|
||||
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
|
||||
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit
|
||||
//(int(MatrixFlags)&int(DiagFlags)&AlignedBit),
|
||||
Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0)
|
||||
};
|
||||
|
||||
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
|
||||
|
@ -200,17 +200,82 @@ DenseBase<Derived>::reverse() const
|
||||
* In most cases it is probably better to simply use the reversed expression
|
||||
* of a matrix. However, when reversing the matrix data itself is really needed,
|
||||
* then this "in-place" version is probably the right choice because it provides
|
||||
* the following additional features:
|
||||
* the following additional benefits:
|
||||
* - less error prone: doing the same operation with .reverse() requires special care:
|
||||
* \code m = m.reverse().eval(); \endcode
|
||||
* - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap)
|
||||
* - this API enables reverse operations without the need for a temporary
|
||||
* - it allows future optimizations (cache friendliness, etc.)
|
||||
*
|
||||
* \sa reverse() */
|
||||
* \sa VectorwiseOp::reverseInPlace(), reverse() */
|
||||
template<typename Derived>
|
||||
inline void DenseBase<Derived>::reverseInPlace()
|
||||
{
|
||||
derived() = derived().reverse().eval();
|
||||
if(cols()>rows())
|
||||
{
|
||||
Index half = cols()/2;
|
||||
leftCols(half).swap(rightCols(half).reverse());
|
||||
if((cols()%2)==1)
|
||||
{
|
||||
Index half2 = rows()/2;
|
||||
col(half).head(half2).swap(col(half).tail(half2).reverse());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Index half = rows()/2;
|
||||
topRows(half).swap(bottomRows(half).reverse());
|
||||
if((rows()%2)==1)
|
||||
{
|
||||
Index half2 = cols()/2;
|
||||
row(half).head(half2).swap(row(half).tail(half2).reverse());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<int Direction>
|
||||
struct vectorwise_reverse_inplace_impl;
|
||||
|
||||
template<>
|
||||
struct vectorwise_reverse_inplace_impl<Vertical>
|
||||
{
|
||||
template<typename ExpressionType>
|
||||
static void run(ExpressionType &xpr)
|
||||
{
|
||||
Index half = xpr.rows()/2;
|
||||
xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse());
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct vectorwise_reverse_inplace_impl<Horizontal>
|
||||
{
|
||||
template<typename ExpressionType>
|
||||
static void run(ExpressionType &xpr)
|
||||
{
|
||||
Index half = xpr.cols()/2;
|
||||
xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse());
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this.
|
||||
*
|
||||
* In most cases it is probably better to simply use the reversed expression
|
||||
* of a matrix. However, when reversing the matrix data itself is really needed,
|
||||
* then this "in-place" version is probably the right choice because it provides
|
||||
* the following additional benefits:
|
||||
* - less error prone: doing the same operation with .reverse() requires special care:
|
||||
* \code m = m.reverse().eval(); \endcode
|
||||
* - this API enables reverse operations without the need for a temporary
|
||||
*
|
||||
* \sa DenseBase::reverseInPlace(), reverse() */
|
||||
template<typename ExpressionType, int Direction>
|
||||
void VectorwiseOp<ExpressionType,Direction>::reverseInPlace()
|
||||
{
|
||||
internal::vectorwise_reverse_inplace_impl<Direction>::run(_expression().const_cast_derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -38,13 +38,17 @@ public:
|
||||
template<int StoreMode, int LoadMode>
|
||||
void assignPacket(Index row, Index col)
|
||||
{
|
||||
m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(row,col), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(row,col));
|
||||
PacketScalar tmp = m_src.template packet<LoadMode>(row,col);
|
||||
const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(row,col, m_dst.template packet<StoreMode>(row,col));
|
||||
m_dst.template writePacket<StoreMode>(row,col,tmp);
|
||||
}
|
||||
|
||||
template<int StoreMode, int LoadMode>
|
||||
void assignPacket(Index index)
|
||||
{
|
||||
m_functor.template swapPacket<StoreMode,LoadMode,PacketScalar>(&m_dst.coeffRef(index), &const_cast<SrcEvaluatorTypeT&>(m_src).coeffRef(index));
|
||||
PacketScalar tmp = m_src.template packet<LoadMode>(index);
|
||||
const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode>(index));
|
||||
m_dst.template writePacket<StoreMode>(index,tmp);
|
||||
}
|
||||
|
||||
// TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
|
||||
|
@ -562,6 +562,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
void normalize() {
|
||||
m_matrix = this->normalized();
|
||||
}
|
||||
|
||||
inline void reverseInPlace();
|
||||
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
|
@ -197,21 +197,21 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(cons
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, int stride) {
|
||||
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
|
||||
return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, int stride) {
|
||||
template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
|
||||
return make_double2(from[0*stride], from[1*stride]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, int stride) {
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
|
||||
to[stride*0] = from.x;
|
||||
to[stride*1] = from.y;
|
||||
to[stride*2] = from.z;
|
||||
to[stride*3] = from.w;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, int stride) {
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
|
||||
to[stride*0] = from.x;
|
||||
to[stride*1] = from.y;
|
||||
}
|
||||
@ -245,14 +245,14 @@ template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a)
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
|
||||
return make_float4(fabs(a.x), fabs(a.y), fabs(a.z), fabs(a.w));
|
||||
return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
|
||||
return make_double2(abs(a.x), abs(a.y));
|
||||
return make_double2(fabs(a.x), fabs(a.y));
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<float4,4>& kernel) {
|
||||
double tmp = kernel.packet[0].y;
|
||||
kernel.packet[0].y = kernel.packet[1].x;
|
||||
@ -279,7 +279,7 @@ ptranspose(PacketBlock<float4,4>& kernel) {
|
||||
kernel.packet[3].z = tmp;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<double2,2>& kernel) {
|
||||
double tmp = kernel.packet[0].y;
|
||||
kernel.packet[0].y = kernel.packet[1].x;
|
||||
|
110
Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h
Normal file
110
Eigen/src/Core/arch/NEON/BlockingSizesLookupTables.h
Normal file
@ -0,0 +1,110 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H
|
||||
#define EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
/* The following lookup table was generated from measurements on a Nexus 5,
|
||||
* which has a Qualcomm Krait 400 CPU. This is very representative of current
|
||||
* 32bit (ARMv7) Android devices. On the other hand, I don't know how
|
||||
* representative that is outside of these conditions. Accordingly,
|
||||
* let's only use this lookup table on ARM 32bit on Android for now.
|
||||
*
|
||||
* Measurements were single-threaded, with Scalar=float, compiled with
|
||||
* -mfpu=neon-vfpv4, so the pmadd instruction used was VFMA.F32.
|
||||
*
|
||||
* The device was cooled, allowing it to run a the max clock speed throughout.
|
||||
* This may not be representative of real-world thermal conditions.
|
||||
*
|
||||
* The benchmark attempted to flush caches to test cold-cache performance.
|
||||
*/
|
||||
#if EIGEN_ARCH_ARM && EIGEN_OS_ANDROID
|
||||
template<>
|
||||
struct BlockingSizesLookupTable<float, float> {
|
||||
static const size_t BaseSize = 16;
|
||||
static const size_t NumSizes = 8;
|
||||
static const unsigned short* Data() {
|
||||
static const unsigned short data[512] = {
|
||||
0x444, 0x445, 0x446, 0x447, 0x448, 0x449, 0x447, 0x447,
|
||||
0x454, 0x455, 0x456, 0x457, 0x458, 0x459, 0x45a, 0x456,
|
||||
0x464, 0x465, 0x466, 0x467, 0x468, 0x469, 0x46a, 0x467,
|
||||
0x474, 0x475, 0x476, 0x467, 0x478, 0x479, 0x476, 0x478,
|
||||
0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x476, 0x476,
|
||||
0x474, 0x475, 0x476, 0x477, 0x478, 0x479, 0x496, 0x488,
|
||||
0x474, 0x475, 0x476, 0x4a6, 0x496, 0x496, 0x495, 0x4a6,
|
||||
0x474, 0x475, 0x466, 0x4a6, 0x497, 0x4a5, 0x496, 0x4a5,
|
||||
0x544, 0x545, 0x546, 0x547, 0x548, 0x549, 0x54a, 0x54b,
|
||||
0x554, 0x555, 0x556, 0x557, 0x558, 0x559, 0x55a, 0x55b,
|
||||
0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x56b,
|
||||
0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x576,
|
||||
0x564, 0x565, 0x566, 0x567, 0x568, 0x569, 0x56a, 0x587,
|
||||
0x564, 0x565, 0x566, 0x567, 0x596, 0x596, 0x596, 0x597,
|
||||
0x574, 0x565, 0x566, 0x596, 0x596, 0x5a6, 0x5a6, 0x5a6,
|
||||
0x564, 0x565, 0x5a6, 0x596, 0x5a6, 0x5a6, 0x5a6, 0x5a6,
|
||||
0x644, 0x645, 0x646, 0x647, 0x648, 0x649, 0x64a, 0x64b,
|
||||
0x644, 0x655, 0x656, 0x657, 0x658, 0x659, 0x65a, 0x65b,
|
||||
0x664, 0x665, 0x666, 0x667, 0x668, 0x669, 0x65a, 0x667,
|
||||
0x654, 0x665, 0x676, 0x677, 0x678, 0x679, 0x67a, 0x675,
|
||||
0x684, 0x675, 0x686, 0x687, 0x688, 0x688, 0x687, 0x686,
|
||||
0x664, 0x685, 0x666, 0x677, 0x697, 0x696, 0x697, 0x697,
|
||||
0x664, 0x665, 0x696, 0x696, 0x685, 0x6a6, 0x696, 0x696,
|
||||
0x664, 0x675, 0x686, 0x696, 0x6a6, 0x696, 0x696, 0x696,
|
||||
0x744, 0x745, 0x746, 0x747, 0x748, 0x749, 0x74a, 0x747,
|
||||
0x754, 0x755, 0x756, 0x757, 0x758, 0x759, 0x75a, 0x757,
|
||||
0x764, 0x765, 0x756, 0x767, 0x768, 0x759, 0x75a, 0x766,
|
||||
0x744, 0x755, 0x766, 0x777, 0x768, 0x759, 0x778, 0x777,
|
||||
0x744, 0x745, 0x766, 0x777, 0x788, 0x786, 0x786, 0x788,
|
||||
0x754, 0x755, 0x766, 0x787, 0x796, 0x796, 0x787, 0x796,
|
||||
0x684, 0x695, 0x696, 0x6a6, 0x795, 0x786, 0x795, 0x796,
|
||||
0x684, 0x695, 0x696, 0x795, 0x786, 0x796, 0x795, 0x796,
|
||||
0x844, 0x845, 0x846, 0x847, 0x848, 0x849, 0x848, 0x848,
|
||||
0x844, 0x855, 0x846, 0x847, 0x848, 0x849, 0x855, 0x857,
|
||||
0x844, 0x845, 0x846, 0x857, 0x848, 0x859, 0x866, 0x865,
|
||||
0x844, 0x855, 0x846, 0x847, 0x878, 0x859, 0x877, 0x877,
|
||||
0x844, 0x855, 0x846, 0x867, 0x886, 0x887, 0x885, 0x886,
|
||||
0x784, 0x785, 0x786, 0x877, 0x897, 0x885, 0x896, 0x896,
|
||||
0x684, 0x695, 0x686, 0x886, 0x885, 0x885, 0x886, 0x896,
|
||||
0x694, 0x6a5, 0x6a6, 0x885, 0x885, 0x886, 0x896, 0x896,
|
||||
0x944, 0x945, 0x946, 0x947, 0x948, 0x847, 0x847, 0x848,
|
||||
0x954, 0x855, 0x856, 0x947, 0x858, 0x857, 0x858, 0x858,
|
||||
0x944, 0x945, 0x946, 0x867, 0x948, 0x866, 0x867, 0x867,
|
||||
0x944, 0x975, 0x976, 0x877, 0x877, 0x877, 0x877, 0x877,
|
||||
0x784, 0x785, 0x886, 0x887, 0x886, 0x887, 0x887, 0x887,
|
||||
0x784, 0x785, 0x786, 0x796, 0x887, 0x897, 0x896, 0x896,
|
||||
0x684, 0x695, 0x6a6, 0x886, 0x886, 0x896, 0x896, 0x896,
|
||||
0x6a4, 0x6a5, 0x696, 0x896, 0x886, 0x896, 0x896, 0x896,
|
||||
0xa44, 0xa45, 0xa46, 0xa47, 0x847, 0x848, 0x847, 0x848,
|
||||
0xa44, 0xa45, 0x856, 0x857, 0x857, 0x857, 0x857, 0x857,
|
||||
0xa44, 0xa65, 0x866, 0x867, 0x867, 0x867, 0x867, 0x867,
|
||||
0x774, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877,
|
||||
0x784, 0x785, 0x886, 0x887, 0x887, 0x887, 0x887, 0x887,
|
||||
0x784, 0x785, 0x786, 0x787, 0x887, 0x896, 0x897, 0x897,
|
||||
0x684, 0x6a5, 0x696, 0x886, 0x886, 0x896, 0x896, 0x896,
|
||||
0x684, 0x6a5, 0x6a5, 0x886, 0x886, 0x896, 0x896, 0x896,
|
||||
0xb44, 0x845, 0x846, 0x847, 0x847, 0x945, 0x846, 0x946,
|
||||
0xb54, 0x855, 0x856, 0x857, 0x857, 0x856, 0x857, 0x856,
|
||||
0x864, 0x865, 0x866, 0x867, 0x867, 0x866, 0x866, 0x867,
|
||||
0x864, 0x875, 0x876, 0x877, 0x877, 0x877, 0x877, 0x877,
|
||||
0x784, 0x885, 0x886, 0x787, 0x887, 0x887, 0x887, 0x887,
|
||||
0x784, 0x785, 0x786, 0x796, 0x886, 0x897, 0x897, 0x897,
|
||||
0x684, 0x695, 0x696, 0x886, 0x896, 0x896, 0x896, 0x896,
|
||||
0x684, 0x685, 0x696, 0xb57, 0x896, 0x896, 0x896, 0x896
|
||||
};
|
||||
return data;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif // EIGEN_NEON_BLOCKING_SIZES_LOOKUP_TABLES_H
|
@ -150,14 +150,6 @@ template<typename Scalar> struct swap_assign_op {
|
||||
swap(a,const_cast<Scalar&>(b));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int LhsAlignment, int RhsAlignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const
|
||||
{
|
||||
Packet tmp = internal::ploadt<Packet,RhsAlignment>(b);
|
||||
internal::pstoret<Scalar,Packet,RhsAlignment>(b, internal::ploadt<Packet,LhsAlignment>(a));
|
||||
internal::pstoret<Scalar,Packet,LhsAlignment>(a, tmp);
|
||||
}
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<swap_assign_op<Scalar> > {
|
||||
|
@ -25,21 +25,31 @@ inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff
|
||||
return a<=0 ? b : a;
|
||||
}
|
||||
|
||||
#if EIGEN_ARCH_i386_OR_x86_64
|
||||
const std::ptrdiff_t defaultL1CacheSize = 32*1024;
|
||||
const std::ptrdiff_t defaultL2CacheSize = 256*1024;
|
||||
const std::ptrdiff_t defaultL3CacheSize = 2*1024*1024;
|
||||
#else
|
||||
const std::ptrdiff_t defaultL1CacheSize = 16*1024;
|
||||
const std::ptrdiff_t defaultL2CacheSize = 512*1024;
|
||||
const std::ptrdiff_t defaultL3CacheSize = 512*1024;
|
||||
#endif
|
||||
|
||||
/** \internal */
|
||||
inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3)
|
||||
{
|
||||
static bool m_cache_sizes_initialized = false;
|
||||
static std::ptrdiff_t m_l1CacheSize = 32*1024;
|
||||
static std::ptrdiff_t m_l2CacheSize = 256*1024;
|
||||
static std::ptrdiff_t m_l3CacheSize = 2*1024*1024;
|
||||
static std::ptrdiff_t m_l1CacheSize = 0;
|
||||
static std::ptrdiff_t m_l2CacheSize = 0;
|
||||
static std::ptrdiff_t m_l3CacheSize = 0;
|
||||
|
||||
if(!m_cache_sizes_initialized)
|
||||
{
|
||||
int l1CacheSize, l2CacheSize, l3CacheSize;
|
||||
queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize);
|
||||
m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, 8*1024);
|
||||
m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, 256*1024);
|
||||
m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, 8*1024*1024);
|
||||
m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize);
|
||||
m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize);
|
||||
m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize);
|
||||
m_cache_sizes_initialized = true;
|
||||
}
|
||||
|
||||
@ -64,45 +74,23 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
|
||||
}
|
||||
}
|
||||
|
||||
/** \brief Computes the blocking parameters for a m x k times k x n matrix product
|
||||
*
|
||||
* \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
|
||||
* \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
|
||||
* \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
|
||||
*
|
||||
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
|
||||
* this function computes the blocking size parameters along the respective dimensions
|
||||
* for matrix products and related algorithms. The blocking sizes depends on various
|
||||
* parameters:
|
||||
* - the L1 and L2 cache sizes,
|
||||
* - the register level blocking sizes defined by gebp_traits,
|
||||
* - the number of scalars that fit into a packet (when vectorization is enabled).
|
||||
*
|
||||
* \sa setCpuCacheSizes */
|
||||
/* Helper for computeProductBlockingSizes.
|
||||
*
|
||||
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
|
||||
* this function computes the blocking size parameters along the respective dimensions
|
||||
* for matrix products and related algorithms. The blocking sizes depends on various
|
||||
* parameters:
|
||||
* - the L1 and L2 cache sizes,
|
||||
* - the register level blocking sizes defined by gebp_traits,
|
||||
* - the number of scalars that fit into a packet (when vectorization is enabled).
|
||||
*
|
||||
* \sa setCpuCacheSizes */
|
||||
|
||||
template<typename LhsScalar, typename RhsScalar, int KcFactor>
|
||||
void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
|
||||
void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1)
|
||||
{
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
|
||||
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
|
||||
if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
|
||||
EIGEN_UNUSED_VARIABLE(num_threads);
|
||||
enum {
|
||||
kr = 8,
|
||||
mr = Traits::mr,
|
||||
nr = Traits::nr
|
||||
};
|
||||
k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
|
||||
if (k > kr) k -= k % kr;
|
||||
m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
|
||||
if (m > mr) m -= m % mr;
|
||||
n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
|
||||
if (n > nr) n -= n % nr;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Explanations:
|
||||
// Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and
|
||||
// kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed
|
||||
@ -261,16 +249,69 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads
|
||||
actual_lm = l2;
|
||||
max_mc = 576;
|
||||
}
|
||||
|
||||
Index mc = (std::min<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);
|
||||
if (mc > Traits::mr) mc -= mc % Traits::mr;
|
||||
|
||||
else if (mc==0) return;
|
||||
m = (m%mc)==0 ? mc
|
||||
: (mc - Traits::mr * ((mc/*-1*/-(m%mc))/(Traits::mr*(m/mc+1))));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)
|
||||
{
|
||||
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
|
||||
if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
|
||||
k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
|
||||
m = std::min<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
|
||||
n = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
EIGEN_UNUSED_VARIABLE(k)
|
||||
EIGEN_UNUSED_VARIABLE(m)
|
||||
EIGEN_UNUSED_VARIABLE(n)
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
/** \brief Computes the blocking parameters for a m x k times k x n matrix product
|
||||
*
|
||||
* \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
|
||||
* \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
|
||||
* \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
|
||||
*
|
||||
* Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
|
||||
* this function computes the blocking size parameters along the respective dimensions
|
||||
* for matrix products and related algorithms.
|
||||
*
|
||||
* The blocking size parameters may be evaluated:
|
||||
* - either by a heuristic based on cache sizes;
|
||||
* - or using a precomputed lookup table;
|
||||
* - or using fixed prescribed values (for testing purposes).
|
||||
*
|
||||
* \sa setCpuCacheSizes */
|
||||
|
||||
template<typename LhsScalar, typename RhsScalar, int KcFactor>
|
||||
void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
|
||||
{
|
||||
if (!useSpecificBlockingSizes(k, m, n)) {
|
||||
if (!lookupBlockingSizesFromTable<LhsScalar, RhsScalar>(k, m, n, num_threads)) {
|
||||
evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor>(k, m, n, num_threads);
|
||||
}
|
||||
}
|
||||
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
enum {
|
||||
kr = 8,
|
||||
mr = Traits::mr,
|
||||
nr = Traits::nr
|
||||
};
|
||||
if (k > kr) k -= k % kr;
|
||||
if (m > mr) m -= m % mr;
|
||||
if (n > nr) n -= n % nr;
|
||||
}
|
||||
|
||||
template<typename LhsScalar, typename RhsScalar>
|
||||
inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
|
||||
{
|
||||
@ -339,11 +380,14 @@ public:
|
||||
nr = 4,
|
||||
|
||||
// register block size along the M direction (currently, this one cannot be modified)
|
||||
default_mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
|
||||
#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
|
||||
// we assume 16 registers
|
||||
mr = 3*LhsPacketSize,
|
||||
// See bug 992, if the scalar type is not vectorizable but that EIGEN_HAS_SINGLE_INSTRUCTION_MADD is defined,
|
||||
// then using 3*LhsPacketSize triggers non-implemented paths in syrk.
|
||||
mr = Vectorizable ? 3*LhsPacketSize : default_mr,
|
||||
#else
|
||||
mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
|
||||
mr = default_mr,
|
||||
#endif
|
||||
|
||||
LhsProgress = LhsPacketSize,
|
||||
@ -974,12 +1018,11 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
// Blocking sizes, i.e., 'depth' has been computed so that the micro horizontal panel of the lhs fit in L1.
|
||||
// However, if depth is too small, we can extend the number of rows of these horizontal panels.
|
||||
// This actual number of rows is computed as follow:
|
||||
const Index l1 = 32*1024; // in Bytes, TODO, l1 should be passed to this function.
|
||||
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
|
||||
const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.
|
||||
// The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size
|
||||
// suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),
|
||||
// or because we are testing specific blocking sizes.
|
||||
const Index actual_panel_rows = (3*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) ));
|
||||
#else
|
||||
const Index actual_panel_rows = (3*LhsProgress) * ( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) );
|
||||
#endif
|
||||
for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows)
|
||||
{
|
||||
const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3);
|
||||
@ -1211,12 +1254,12 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
//---------- Process 2 * LhsProgress rows at once ----------
|
||||
if(mr>=2*Traits::LhsProgress)
|
||||
{
|
||||
const Index l1 = 32*1024; // in Bytes, TODO, l1 should be passed to this function.
|
||||
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
|
||||
const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.
|
||||
// The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size
|
||||
// suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),
|
||||
// or because we are testing specific blocking sizes.
|
||||
Index actual_panel_rows = (2*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) ));
|
||||
#else
|
||||
Index actual_panel_rows = (2*LhsProgress) * ( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) );
|
||||
#endif
|
||||
|
||||
for(Index i1=peeled_mc3; i1<peeled_mc2; i1+=actual_panel_rows)
|
||||
{
|
||||
Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc2);
|
||||
|
97
Eigen/src/Core/products/LookupBlockingSizesTable.h
Normal file
97
Eigen/src/Core/products/LookupBlockingSizesTable.h
Normal file
@ -0,0 +1,97 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
|
||||
#define EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template <typename LhsScalar,
|
||||
typename RhsScalar,
|
||||
bool HasLookupTable = BlockingSizesLookupTable<LhsScalar, RhsScalar>::NumSizes != 0 >
|
||||
struct LookupBlockingSizesFromTableImpl
|
||||
{
|
||||
static bool run(Index&, Index&, Index&, Index)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
inline size_t floor_log2_helper(unsigned short& x, size_t offset)
|
||||
{
|
||||
unsigned short y = x >> offset;
|
||||
if (y) {
|
||||
x = y;
|
||||
return offset;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline size_t floor_log2(unsigned short x)
|
||||
{
|
||||
return floor_log2_helper(x, 8)
|
||||
+ floor_log2_helper(x, 4)
|
||||
+ floor_log2_helper(x, 2)
|
||||
+ floor_log2_helper(x, 1);
|
||||
}
|
||||
|
||||
inline size_t ceil_log2(unsigned short x)
|
||||
{
|
||||
return x > 1 ? floor_log2(x - 1) + 1 : 0;
|
||||
}
|
||||
|
||||
template <typename LhsScalar,
|
||||
typename RhsScalar>
|
||||
struct LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar, true>
|
||||
{
|
||||
static bool run(Index& k, Index& m, Index& n, Index)
|
||||
{
|
||||
using std::min;
|
||||
using std::max;
|
||||
typedef BlockingSizesLookupTable<LhsScalar, RhsScalar> Table;
|
||||
const unsigned short minsize = Table::BaseSize;
|
||||
const unsigned short maxsize = minsize << (Table::NumSizes - 1);
|
||||
const unsigned short k_clamped = max<unsigned short>(minsize, min<Index>(k, maxsize));
|
||||
const unsigned short m_clamped = max<unsigned short>(minsize, min<Index>(m, maxsize));
|
||||
const unsigned short n_clamped = max<unsigned short>(minsize, min<Index>(n, maxsize));
|
||||
const size_t k_index = ceil_log2(k_clamped / minsize);
|
||||
const size_t m_index = ceil_log2(m_clamped / minsize);
|
||||
const size_t n_index = ceil_log2(n_clamped / minsize);
|
||||
const size_t index = n_index + Table::NumSizes * (m_index + Table::NumSizes * k_index);
|
||||
const unsigned short table_entry = Table::Data()[index];
|
||||
k = min<Index>(k, 1 << ((table_entry & 0xf00) >> 8));
|
||||
m = min<Index>(m, 1 << ((table_entry & 0x0f0) >> 4));
|
||||
n = min<Index>(n, 1 << ((table_entry & 0x00f) >> 0));
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename LhsScalar,
|
||||
typename RhsScalar>
|
||||
bool lookupBlockingSizesFromTable(Index& k, Index& m, Index& n, Index num_threads)
|
||||
{
|
||||
if (num_threads > 1) {
|
||||
// We don't currently have lookup tables recorded for multithread performance,
|
||||
// and we have confirmed experimentally that our single-thread-recorded LUTs are
|
||||
// poor for multithread performance, and our LUTs don't currently contain
|
||||
// any annotation about multithread status (FIXME - we need that).
|
||||
// So for now, we just early-return here.
|
||||
return false;
|
||||
}
|
||||
return LookupBlockingSizesFromTableImpl<LhsScalar, RhsScalar>::run(k, m, n, num_threads);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // EIGEN_LOOKUP_BLOCKING_SIZES_TABLE_H
|
@ -214,7 +214,7 @@ class blas_data_mapper {
|
||||
}
|
||||
|
||||
template<typename SubPacket>
|
||||
EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, SubPacket p) const {
|
||||
EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
|
||||
pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
|
||||
}
|
||||
|
||||
|
@ -287,6 +287,14 @@ struct stem_function
|
||||
typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
|
||||
typedef ComplexScalar type(ComplexScalar, int);
|
||||
};
|
||||
|
||||
template <typename LhsScalar,
|
||||
typename RhsScalar>
|
||||
struct BlockingSizesLookupTable
|
||||
{
|
||||
static const size_t NumSizes = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -213,7 +213,8 @@
|
||||
#endif
|
||||
|
||||
/// \internal EIGEN_OS_ANDROID set to 1 if the OS is Android
|
||||
#if defined(__ANDROID__)
|
||||
// note: ANDROID is defined when using ndk_build, __ANDROID__ is defined when using a standalone toolchain.
|
||||
#if defined(__ANDROID__) || defined(ANDROID)
|
||||
#define EIGEN_OS_ANDROID 1
|
||||
#else
|
||||
#define EIGEN_OS_ANDROID 0
|
||||
@ -318,6 +319,9 @@
|
||||
// Defined the boundary (in bytes) on which the data needs to be aligned. Note
|
||||
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
|
||||
// aligned at all regardless of the value of this #define.
|
||||
// TODO should be renamed EIGEN_MAXIMAL_ALIGN_BYTES,
|
||||
// for instance with AVX 1 EIGEN_MAXIMAL_ALIGN_BYTES=32 while for 'int' 16 bytes alignment is always enough,
|
||||
// and 16 bytes alignment is also enough for Vector4f.
|
||||
#define EIGEN_ALIGN_BYTES 16
|
||||
|
||||
#ifdef EIGEN_DONT_ALIGN
|
||||
|
@ -159,13 +159,16 @@ class compute_matrix_evaluator_flags
|
||||
enum {
|
||||
row_major_bit = Options&RowMajor ? RowMajorBit : 0,
|
||||
is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
|
||||
|
||||
// TODO: should check for smaller packet types once we can handle multi-sized packet types
|
||||
align_bytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
|
||||
|
||||
aligned_bit =
|
||||
(
|
||||
((Options&DontAlign)==0)
|
||||
&& (
|
||||
#if EIGEN_ALIGN_STATICALLY
|
||||
((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0))
|
||||
((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0))
|
||||
#else
|
||||
0
|
||||
#endif
|
||||
|
@ -234,6 +234,12 @@ template<typename _MatrixType> class ComplexEigenSolver
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
EigenvectorType m_eivec;
|
||||
EigenvalueType m_eivalues;
|
||||
ComplexSchur<MatrixType> m_schur;
|
||||
@ -251,6 +257,8 @@ template<typename MatrixType>
|
||||
ComplexEigenSolver<MatrixType>&
|
||||
ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
// this code is inspired from Jampack
|
||||
eigen_assert(matrix.cols() == matrix.rows());
|
||||
|
||||
|
@ -299,6 +299,13 @@ template<typename _MatrixType> class EigenSolver
|
||||
void doComputeEigenvectors();
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
|
||||
}
|
||||
|
||||
MatrixType m_eivec;
|
||||
EigenvalueType m_eivalues;
|
||||
bool m_isInitialized;
|
||||
@ -366,6 +373,8 @@ template<typename MatrixType>
|
||||
EigenSolver<MatrixType>&
|
||||
EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
using std::sqrt;
|
||||
using std::abs;
|
||||
using numext::isfinite;
|
||||
@ -408,7 +417,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
|
||||
{
|
||||
Scalar t0 = m_matT.coeff(i+1, i);
|
||||
Scalar t1 = m_matT.coeff(i, i+1);
|
||||
Scalar maxval = numext::maxi(abs(p),numext::maxi(abs(t0),abs(t1)));
|
||||
Scalar maxval = numext::maxi<Scalar>(abs(p),numext::maxi<Scalar>(abs(t0),abs(t1)));
|
||||
t0 /= maxval;
|
||||
t1 /= maxval;
|
||||
Scalar p0 = p/maxval;
|
||||
@ -599,7 +608,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
|
||||
}
|
||||
|
||||
// Overflow control
|
||||
Scalar t = numext::maxi(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n)));
|
||||
Scalar t = numext::maxi<Scalar>(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n)));
|
||||
if ((eps * t) * t > Scalar(1))
|
||||
m_matT.block(i, n-1, size-i, 2) /= t;
|
||||
|
||||
|
@ -263,6 +263,13 @@ template<typename _MatrixType> class GeneralizedEigenSolver
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
|
||||
}
|
||||
|
||||
MatrixType m_eivec;
|
||||
ComplexVectorType m_alphas;
|
||||
VectorType m_betas;
|
||||
@ -290,6 +297,8 @@ template<typename MatrixType>
|
||||
GeneralizedEigenSolver<MatrixType>&
|
||||
GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
using std::sqrt;
|
||||
using std::abs;
|
||||
eigen_assert(A.cols() == A.rows() && B.cols() == A.rows() && B.cols() == B.rows());
|
||||
|
@ -240,10 +240,10 @@ namespace Eigen {
|
||||
m_S.coeffRef(i,j) = Scalar(0.0);
|
||||
m_S.rightCols(dim-j-1).applyOnTheLeft(i-1,i,G.adjoint());
|
||||
m_T.rightCols(dim-i+1).applyOnTheLeft(i-1,i,G.adjoint());
|
||||
// update Q
|
||||
if (m_computeQZ)
|
||||
m_Q.applyOnTheRight(i-1,i,G);
|
||||
}
|
||||
// update Q
|
||||
if (m_computeQZ)
|
||||
m_Q.applyOnTheRight(i-1,i,G);
|
||||
// kill T(i,i-1)
|
||||
if(m_T.coeff(i,i-1)!=Scalar(0))
|
||||
{
|
||||
@ -251,10 +251,10 @@ namespace Eigen {
|
||||
m_T.coeffRef(i,i-1) = Scalar(0.0);
|
||||
m_S.applyOnTheRight(i,i-1,G);
|
||||
m_T.topRows(i).applyOnTheRight(i,i-1,G);
|
||||
// update Z
|
||||
if (m_computeQZ)
|
||||
m_Z.applyOnTheLeft(i,i-1,G.adjoint());
|
||||
}
|
||||
// update Z
|
||||
if (m_computeQZ)
|
||||
m_Z.applyOnTheLeft(i,i-1,G.adjoint());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -347,6 +347,11 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
|
||||
static const int m_maxIterations = 30;
|
||||
|
||||
protected:
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
MatrixType m_eivec;
|
||||
RealVectorType m_eivalues;
|
||||
typename TridiagonalizationType::SubDiagonalType m_subdiag;
|
||||
@ -382,6 +387,8 @@ EIGEN_DEVICE_FUNC
|
||||
SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
|
||||
::compute(const MatrixType& matrix, int options)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
using std::abs;
|
||||
eigen_assert(matrix.cols() == matrix.rows());
|
||||
eigen_assert((options&~(EigVecMask|GenEigMask))==0
|
||||
|
@ -161,8 +161,8 @@ class QuaternionBase : public RotationBase<Derived, 3>
|
||||
bool isApprox(const QuaternionBase<OtherDerived>& other, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||
{ return coeffs().isApprox(other.coeffs(), prec); }
|
||||
|
||||
/** return the result vector of \a v through the rotation*/
|
||||
EIGEN_STRONG_INLINE Vector3 _transformVector(Vector3 v) const;
|
||||
/** return the result vector of \a v through the rotation*/
|
||||
EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const;
|
||||
|
||||
/** \returns \c *this with scalar type casted to \a NewScalarType
|
||||
*
|
||||
@ -462,7 +462,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const Quaterni
|
||||
*/
|
||||
template <class Derived>
|
||||
EIGEN_STRONG_INLINE typename QuaternionBase<Derived>::Vector3
|
||||
QuaternionBase<Derived>::_transformVector(Vector3 v) const
|
||||
QuaternionBase<Derived>::_transformVector(const Vector3& v) const
|
||||
{
|
||||
// Note that this algorithm comes from the optimization by hand
|
||||
// of the conversion to a Matrix followed by a Matrix/Vector product.
|
||||
|
@ -390,6 +390,12 @@ template<typename _MatrixType> class FullPivLU
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
MatrixType m_lu;
|
||||
PermutationPType m_p;
|
||||
PermutationQType m_q;
|
||||
@ -434,6 +440,8 @@ FullPivLU<MatrixType>::FullPivLU(const MatrixType& matrix)
|
||||
template<typename MatrixType>
|
||||
FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
// the permutations are stored as int indices, so just to be sure:
|
||||
eigen_assert(matrix.rows()<=NumTraits<int>::highest() && matrix.cols()<=NumTraits<int>::highest());
|
||||
|
||||
|
@ -209,6 +209,12 @@ template<typename _MatrixType> class PartialPivLU
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
MatrixType m_lu;
|
||||
PermutationType m_p;
|
||||
TranspositionType m_rowsTranspositions;
|
||||
@ -425,6 +431,8 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t
|
||||
template<typename MatrixType>
|
||||
PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const MatrixType& matrix)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
// the row permutation is stored as int indices, so just to be sure:
|
||||
eigen_assert(matrix.rows()<NumTraits<int>::highest());
|
||||
|
||||
|
@ -137,22 +137,27 @@ void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,StorageIndex>& C, Perm
|
||||
degree[i] = len[i]; // degree of node i
|
||||
}
|
||||
mark = internal::cs_wclear<StorageIndex>(0, 0, w, n); /* clear w */
|
||||
elen[n] = -2; /* n is a dead element */
|
||||
Cp[n] = -1; /* n is a root of assembly tree */
|
||||
w[n] = 0; /* n is a dead element */
|
||||
|
||||
/* --- Initialize degree lists ------------------------------------------ */
|
||||
for(i = 0; i < n; i++)
|
||||
{
|
||||
bool has_diag = false;
|
||||
for(p = Cp[i]; p<Cp[i+1]; ++p)
|
||||
if(Ci[p]==i)
|
||||
{
|
||||
has_diag = true;
|
||||
break;
|
||||
}
|
||||
|
||||
d = degree[i];
|
||||
if(d == 0) /* node i is empty */
|
||||
if(d == 1) /* node i is empty */
|
||||
{
|
||||
elen[i] = -2; /* element i is dead */
|
||||
nel++;
|
||||
Cp[i] = -1; /* i is a root of assembly tree */
|
||||
w[i] = 0;
|
||||
}
|
||||
else if(d > dense) /* node i is dense */
|
||||
else if(d > dense || !has_diag) /* node i is dense or has no structural diagonal element */
|
||||
{
|
||||
nv[i] = 0; /* absorb i into element n */
|
||||
elen[i] = -1; /* node i is dead */
|
||||
@ -168,6 +173,10 @@ void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,StorageIndex>& C, Perm
|
||||
}
|
||||
}
|
||||
|
||||
elen[n] = -2; /* n is a dead element */
|
||||
Cp[n] = -1; /* n is a root of assembly tree */
|
||||
w[n] = 0; /* n is a dead element */
|
||||
|
||||
while (nel < n) /* while (selecting pivots) do */
|
||||
{
|
||||
/* --- Select node of minimum approximate degree -------------------- */
|
||||
|
@ -398,6 +398,12 @@ template<typename _MatrixType> class ColPivHouseholderQR
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
MatrixType m_qr;
|
||||
HCoeffsType m_hCoeffs;
|
||||
PermutationType m_colsPermutation;
|
||||
@ -436,6 +442,8 @@ typename MatrixType::RealScalar ColPivHouseholderQR<MatrixType>::logAbsDetermina
|
||||
template<typename MatrixType>
|
||||
ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const MatrixType& matrix)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
using std::abs;
|
||||
Index rows = matrix.rows();
|
||||
Index cols = matrix.cols();
|
||||
|
@ -380,6 +380,12 @@ template<typename _MatrixType> class FullPivHouseholderQR
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
MatrixType m_qr;
|
||||
HCoeffsType m_hCoeffs;
|
||||
IntDiagSizeVectorType m_rows_transpositions;
|
||||
@ -419,6 +425,8 @@ typename MatrixType::RealScalar FullPivHouseholderQR<MatrixType>::logAbsDetermin
|
||||
template<typename MatrixType>
|
||||
FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(const MatrixType& matrix)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
using std::abs;
|
||||
Index rows = matrix.rows();
|
||||
Index cols = matrix.cols();
|
||||
|
@ -196,6 +196,12 @@ template<typename _MatrixType> class HouseholderQR
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
MatrixType m_qr;
|
||||
HCoeffsType m_hCoeffs;
|
||||
RowVectorType m_temp;
|
||||
@ -348,6 +354,8 @@ void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) c
|
||||
template<typename MatrixType>
|
||||
HouseholderQR<MatrixType>& HouseholderQR<MatrixType>::compute(const MatrixType& matrix)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
Index rows = matrix.rows();
|
||||
Index cols = matrix.cols();
|
||||
Index size = (std::min)(rows,cols);
|
||||
|
@ -84,6 +84,8 @@ public:
|
||||
typedef Matrix<RealScalar, Dynamic, 1> VectorType;
|
||||
typedef Array<RealScalar, Dynamic, 1> ArrayXr;
|
||||
typedef Array<Index,1,Dynamic> ArrayXi;
|
||||
typedef Ref<ArrayXr> ArrayRef;
|
||||
typedef Ref<ArrayXi> IndicesRef;
|
||||
|
||||
/** \brief Default Constructor.
|
||||
*
|
||||
@ -159,21 +161,23 @@ private:
|
||||
void allocate(Index rows, Index cols, unsigned int computationOptions);
|
||||
void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift);
|
||||
void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V);
|
||||
void computeSingVals(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi& perm, VectorType& singVals, ArrayXr& shifts, ArrayXr& mus);
|
||||
void perturbCol0(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi& perm, const VectorType& singVals, const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat);
|
||||
void computeSingVecs(const ArrayXr& zhat, const ArrayXr& diag, const ArrayXi& perm, const VectorType& singVals, const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V);
|
||||
void computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, VectorType& singVals, ArrayRef shifts, ArrayRef mus);
|
||||
void perturbCol0(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat);
|
||||
void computeSingVecs(const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V);
|
||||
void deflation43(Index firstCol, Index shift, Index i, Index size);
|
||||
void deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size);
|
||||
void deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift);
|
||||
template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
|
||||
void copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naivev);
|
||||
static void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1);
|
||||
static RealScalar secularEq(RealScalar x, const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const ArrayXr& diagShifted, RealScalar shift);
|
||||
void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1);
|
||||
static RealScalar secularEq(RealScalar x, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift);
|
||||
|
||||
protected:
|
||||
MatrixXr m_naiveU, m_naiveV;
|
||||
MatrixXr m_computed;
|
||||
Index m_nRec;
|
||||
ArrayXr m_workspace;
|
||||
ArrayXi m_workspaceI;
|
||||
int m_algoswap;
|
||||
bool m_isTranspose, m_compU, m_compV;
|
||||
|
||||
@ -212,6 +216,9 @@ void BDCSVD<MatrixType>::allocate(Index rows, Index cols, unsigned int computati
|
||||
else m_naiveU = MatrixXr::Zero(2, m_diagSize + 1 );
|
||||
|
||||
if (m_compV) m_naiveV = MatrixXr::Zero(m_diagSize, m_diagSize);
|
||||
|
||||
m_workspace.resize((m_diagSize+1)*(m_diagSize+1)*3);
|
||||
m_workspaceI.resize(3*m_diagSize);
|
||||
}// end allocate
|
||||
|
||||
template<typename MatrixType>
|
||||
@ -223,6 +230,19 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
|
||||
allocate(matrix.rows(), matrix.cols(), computationOptions);
|
||||
using std::abs;
|
||||
|
||||
//**** step -1 - If the problem is too small, directly falls back to JacobiSVD and return
|
||||
if(matrix.cols() < m_algoswap)
|
||||
{
|
||||
// FIXME this line involves temporaries
|
||||
JacobiSVD<MatrixType> jsvd(matrix,computationOptions);
|
||||
if(computeU()) m_matrixU = jsvd.matrixU();
|
||||
if(computeV()) m_matrixV = jsvd.matrixV();
|
||||
m_singularValues = jsvd.singularValues();
|
||||
m_nonzeroSingularValues = jsvd.nonzeroSingularValues();
|
||||
m_isInitialized = true;
|
||||
return *this;
|
||||
}
|
||||
|
||||
//**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows
|
||||
RealScalar scale = matrix.cwiseAbs().maxCoeff();
|
||||
if(scale==RealScalar(0)) scale = RealScalar(1);
|
||||
@ -231,11 +251,13 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
|
||||
else copy = matrix/scale;
|
||||
|
||||
//**** step 1 - Bidiagonalization
|
||||
// FIXME this line involves temporaries
|
||||
internal::UpperBidiagonalization<MatrixX> bid(copy);
|
||||
|
||||
//**** step 2 - Divide & Conquer
|
||||
m_naiveU.setZero();
|
||||
m_naiveV.setZero();
|
||||
// FIXME this line involves a temporary matrix
|
||||
m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();
|
||||
m_computed.template bottomRows<1>().setZero();
|
||||
divide(0, m_diagSize - 1, 0, 0, 0);
|
||||
@ -257,6 +279,7 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
|
||||
// std::cout << "m_naiveU\n" << m_naiveU << "\n\n";
|
||||
// std::cout << "m_naiveV\n" << m_naiveV << "\n\n";
|
||||
@ -279,14 +302,14 @@ void BDCSVD<MatrixType>::copyUV(const HouseholderU &householderU, const Househol
|
||||
Index Ucols = m_computeThinU ? m_diagSize : householderU.cols();
|
||||
m_matrixU = MatrixX::Identity(householderU.cols(), Ucols);
|
||||
m_matrixU.topLeftCorner(m_diagSize, m_diagSize) = naiveV.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
|
||||
householderU.applyThisOnTheLeft(m_matrixU);
|
||||
householderU.applyThisOnTheLeft(m_matrixU); // FIXME this line involves a temporary buffer
|
||||
}
|
||||
if (computeV())
|
||||
{
|
||||
Index Vcols = m_computeThinV ? m_diagSize : householderV.cols();
|
||||
m_matrixV = MatrixX::Identity(householderV.cols(), Vcols);
|
||||
m_matrixV.topLeftCorner(m_diagSize, m_diagSize) = naiveU.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
|
||||
householderV.applyThisOnTheLeft(m_matrixV);
|
||||
householderV.applyThisOnTheLeft(m_matrixV); // FIXME this line involves a temporary buffer
|
||||
}
|
||||
}
|
||||
|
||||
@ -307,7 +330,10 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co
|
||||
// If the matrices are large enough, let's exploit the sparse structure of A by
|
||||
// splitting it in half (wrt n1), and packing the non-zero columns.
|
||||
Index n2 = n - n1;
|
||||
MatrixXr A1(n1,n), A2(n2,n), B1(n,n), B2(n,n);
|
||||
Map<MatrixXr> A1(m_workspace.data() , n1, n);
|
||||
Map<MatrixXr> A2(m_workspace.data()+ n1*n, n2, n);
|
||||
Map<MatrixXr> B1(m_workspace.data()+ n*n, n, n);
|
||||
Map<MatrixXr> B2(m_workspace.data()+2*n*n, n, n);
|
||||
Index k1=0, k2=0;
|
||||
for(Index j=0; j<n; ++j)
|
||||
{
|
||||
@ -329,7 +355,11 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co
|
||||
A.bottomRows(n2).noalias() = A2.leftCols(k2) * B2.topRows(k2);
|
||||
}
|
||||
else
|
||||
A *= B; // FIXME this requires a temporary
|
||||
{
|
||||
Map<MatrixXr,Aligned> tmp(m_workspace.data(),n,n);
|
||||
tmp.noalias() = A*B;
|
||||
A = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
// The divide algorithm is done "in place", we are always working on subsets of the same matrix. The divide methods takes as argument the
|
||||
@ -360,7 +390,8 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
|
||||
// matrices.
|
||||
if (n < m_algoswap)
|
||||
{
|
||||
JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0)) ;
|
||||
// FIXME this line involves temporaries
|
||||
JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0));
|
||||
if (m_compU)
|
||||
m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = b.matrixU();
|
||||
else
|
||||
@ -438,7 +469,7 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
|
||||
}
|
||||
else
|
||||
{
|
||||
RealScalar q1 = (m_naiveU(0, firstCol + k));
|
||||
RealScalar q1 = m_naiveU(0, firstCol + k);
|
||||
// we shift Q1 to the right
|
||||
for (Index i = firstCol + k - 1; i >= firstCol; i--)
|
||||
m_naiveU(0, i + 1) = m_naiveU(0, i);
|
||||
@ -491,8 +522,14 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
|
||||
assert(VofSVD.allFinite());
|
||||
#endif
|
||||
|
||||
if (m_compU) structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2);
|
||||
else m_naiveU.middleCols(firstCol, n + 1) *= UofSVD; // FIXME this requires a temporary, and exploit that there are 2 rows at compile time
|
||||
if (m_compU)
|
||||
structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2);
|
||||
else
|
||||
{
|
||||
Map<Matrix<RealScalar,2,Dynamic>,Aligned> tmp(m_workspace.data(),2,n+1);
|
||||
tmp.noalias() = m_naiveU.middleCols(firstCol, n+1) * UofSVD;
|
||||
m_naiveU.middleCols(firstCol, n + 1) = tmp;
|
||||
}
|
||||
|
||||
if (m_compV) structured_update(m_naiveV.block(firstRowW, firstColW, n, n), VofSVD, (n+1)/2);
|
||||
|
||||
@ -517,10 +554,9 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
|
||||
template <typename MatrixType>
|
||||
void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V)
|
||||
{
|
||||
// TODO Get rid of these copies (?)
|
||||
// FIXME at least preallocate them
|
||||
ArrayXr col0 = m_computed.col(firstCol).segment(firstCol, n);
|
||||
ArrayXr diag = m_computed.block(firstCol, firstCol, n, n).diagonal();
|
||||
ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n);
|
||||
m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal();
|
||||
ArrayRef diag = m_workspace.head(n);
|
||||
diag(0) = 0;
|
||||
|
||||
// Allocate space for singular values and vectors
|
||||
@ -539,13 +575,14 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
|
||||
Index actual_n = n;
|
||||
while(actual_n>1 && diag(actual_n-1)==0) --actual_n;
|
||||
Index m = 0; // size of the deflated problem
|
||||
ArrayXi perm(actual_n);
|
||||
for(Index k=0;k<actual_n;++k)
|
||||
if(col0(k)!=0)
|
||||
perm(m++) = k;
|
||||
perm.conservativeResize(m);
|
||||
m_workspaceI(m++) = k;
|
||||
Map<ArrayXi> perm(m_workspaceI.data(),m);
|
||||
|
||||
ArrayXr shifts(n), mus(n), zhat(n);
|
||||
Map<ArrayXr> shifts(m_workspace.data()+1*n, n);
|
||||
Map<ArrayXr> mus(m_workspace.data()+2*n, n);
|
||||
Map<ArrayXr> zhat(m_workspace.data()+3*n, n);
|
||||
|
||||
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
|
||||
std::cout << "computeSVDofM using:\n";
|
||||
@ -622,8 +659,8 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
|
||||
// Reverse order so that singular values in increased order
|
||||
// Because of deflation, the zeros singular-values are already at the end
|
||||
singVals.head(actual_n).reverseInPlace();
|
||||
U.leftCols(actual_n) = U.leftCols(actual_n).rowwise().reverse().eval(); // FIXME this requires a temporary
|
||||
if (m_compV) V.leftCols(actual_n) = V.leftCols(actual_n).rowwise().reverse().eval(); // FIXME this requires a temporary
|
||||
U.leftCols(actual_n).rowwise().reverseInPlace();
|
||||
if (m_compV) V.leftCols(actual_n).rowwise().reverseInPlace();
|
||||
|
||||
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
|
||||
JacobiSVD<MatrixXr> jsvd(m_computed.block(firstCol, firstCol, n, n) );
|
||||
@ -634,7 +671,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
|
||||
}
|
||||
|
||||
template <typename MatrixType>
|
||||
typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const ArrayXr& diagShifted, RealScalar shift)
|
||||
typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift)
|
||||
{
|
||||
Index m = perm.size();
|
||||
RealScalar res = 1;
|
||||
@ -647,8 +684,8 @@ typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar
|
||||
}
|
||||
|
||||
template <typename MatrixType>
|
||||
void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm,
|
||||
VectorType& singVals, ArrayXr& shifts, ArrayXr& mus)
|
||||
void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm,
|
||||
VectorType& singVals, ArrayRef shifts, ArrayRef mus)
|
||||
{
|
||||
using std::abs;
|
||||
using std::swap;
|
||||
@ -703,7 +740,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
|
||||
RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right;
|
||||
|
||||
// measure everything relative to shift
|
||||
ArrayXr diagShifted = diag - shift;
|
||||
Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n);
|
||||
diagShifted = diag - shift;
|
||||
|
||||
// initial guess
|
||||
RealScalar muPrev, muCur;
|
||||
@ -730,7 +768,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
|
||||
// rational interpolation: fit a function of the form a / mu + b through the two previous
|
||||
// iterates and use its zero to compute the next iterate
|
||||
bool useBisection = fPrev*fCur>0;
|
||||
while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
|
||||
while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
|
||||
{
|
||||
++m_numIters;
|
||||
|
||||
@ -773,7 +811,10 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
|
||||
}
|
||||
|
||||
RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift);
|
||||
|
||||
#if defined EIGEN_INTERNAL_DEBUGGING || defined EIGEN_BDCSVD_DEBUG_VERBOSE
|
||||
RealScalar fRight = secularEq(rightShifted, col0, diag, perm, diagShifted, shift);
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
|
||||
if(!(fLeft * fRight<0))
|
||||
@ -781,14 +822,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
|
||||
#endif
|
||||
eigen_internal_assert(fLeft * fRight < 0);
|
||||
|
||||
while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi(abs(leftShifted), abs(rightShifted)))
|
||||
while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
|
||||
{
|
||||
RealScalar midShifted = (leftShifted + rightShifted) / 2;
|
||||
RealScalar fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
|
||||
if (fLeft * fMid < 0)
|
||||
{
|
||||
rightShifted = midShifted;
|
||||
fRight = fMid;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -816,8 +856,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& dia
|
||||
// zhat is perturbation of col0 for which singular vectors can be computed stably (see Section 3.1)
|
||||
template <typename MatrixType>
|
||||
void BDCSVD<MatrixType>::perturbCol0
|
||||
(const ArrayXr& col0, const ArrayXr& diag, const ArrayXi &perm, const VectorType& singVals,
|
||||
const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat)
|
||||
(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,
|
||||
const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat)
|
||||
{
|
||||
using std::sqrt;
|
||||
Index n = col0.size();
|
||||
@ -865,8 +905,8 @@ void BDCSVD<MatrixType>::perturbCol0
|
||||
// compute singular vectors
|
||||
template <typename MatrixType>
|
||||
void BDCSVD<MatrixType>::computeSingVecs
|
||||
(const ArrayXr& zhat, const ArrayXr& diag, const ArrayXi &perm, const VectorType& singVals,
|
||||
const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V)
|
||||
(const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,
|
||||
const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V)
|
||||
{
|
||||
Index n = zhat.size();
|
||||
Index m = perm.size();
|
||||
@ -991,7 +1031,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
|
||||
|
||||
RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff();
|
||||
RealScalar epsilon_strict = NumTraits<RealScalar>::epsilon() * maxDiag;
|
||||
RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi(col0.cwiseAbs().maxCoeff(), maxDiag);
|
||||
RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
|
||||
|
||||
#ifdef EIGEN_BDCSVD_SANITY_CHECKS
|
||||
assert(m_naiveU.allFinite());
|
||||
@ -1047,7 +1087,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
|
||||
|
||||
// Sort the diagonal entries, since diag(1:k-1) and diag(k:length) are already sorted, let's do a sorted merge.
|
||||
// First, compute the respective permutation.
|
||||
Index *permutation = new Index[length]; // FIXME avoid repeated dynamic memory allocation
|
||||
Index *permutation = m_workspaceI.data();
|
||||
{
|
||||
permutation[0] = 0;
|
||||
Index p = 1;
|
||||
@ -1084,8 +1124,8 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
|
||||
}
|
||||
|
||||
// Current index of each col, and current column of each index
|
||||
Index *realInd = new Index[length]; // FIXME avoid repeated dynamic memory allocation
|
||||
Index *realCol = new Index[length]; // FIXME avoid repeated dynamic memory allocation
|
||||
Index *realInd = m_workspaceI.data()+length;
|
||||
Index *realCol = m_workspaceI.data()+2*length;
|
||||
|
||||
for(int pos = 0; pos< length; pos++)
|
||||
{
|
||||
@ -1115,9 +1155,6 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
|
||||
realInd[J] = realI;
|
||||
realInd[i] = pi;
|
||||
}
|
||||
delete[] permutation;
|
||||
delete[] realInd;
|
||||
delete[] realCol;
|
||||
}
|
||||
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
|
||||
std::cout << "sorted: " << diag.transpose().format(bdcsvdfmt) << "\n";
|
||||
|
@ -425,12 +425,13 @@ void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q,
|
||||
// If d!=0, then t/d cannot overflow because the magnitude of the
|
||||
// entries forming d are not too small compared to the ones forming t.
|
||||
RealScalar u = t / d;
|
||||
rot1.s() = RealScalar(1) / sqrt(RealScalar(1) + numext::abs2(u));
|
||||
rot1.c() = rot1.s() * u;
|
||||
RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u));
|
||||
rot1.s() = RealScalar(1) / tmp;
|
||||
rot1.c() = u / tmp;
|
||||
}
|
||||
m.applyOnTheLeft(0,1,rot1);
|
||||
j_right->makeJacobi(m,0,1);
|
||||
*j_left = rot1 * j_right->transpose();
|
||||
*j_left = rot1 * j_right->transpose();
|
||||
}
|
||||
|
||||
template<typename _MatrixType, int QRPreconditioner>
|
||||
@ -680,6 +681,8 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
|
||||
const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon();
|
||||
|
||||
// limit for very small denormal numbers to be considered zero in order to avoid infinite loops (see bug 286)
|
||||
// FIXME What about considerering any denormal numbers as zero, using:
|
||||
// const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
|
||||
const RealScalar considerAsZero = RealScalar(2) * std::numeric_limits<RealScalar>::denorm_min();
|
||||
|
||||
// Scaling factor to reduce over/under-flows
|
||||
@ -719,8 +722,9 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
|
||||
// if this 2x2 sub-matrix is not diagonal already...
|
||||
// notice that this comparison will evaluate to false if any NaN is involved, ensuring that NaN's don't
|
||||
// keep us iterating forever. Similarly, small denormal numbers are considered zero.
|
||||
RealScalar threshold = numext::maxi(considerAsZero, precision * numext::maxi(abs(m_workMatrix.coeff(p,p)),
|
||||
abs(m_workMatrix.coeff(q,q))));
|
||||
RealScalar threshold = numext::maxi<RealScalar>(considerAsZero,
|
||||
precision * numext::maxi<RealScalar>(abs(m_workMatrix.coeff(p,p)),
|
||||
abs(m_workMatrix.coeff(q,q))));
|
||||
// We compare both values to threshold instead of calling max to be robust to NaN (See bug 791)
|
||||
if(abs(m_workMatrix.coeff(p,q))>threshold || abs(m_workMatrix.coeff(q,p)) > threshold)
|
||||
{
|
||||
|
@ -130,9 +130,10 @@ public:
|
||||
inline Index rank() const
|
||||
{
|
||||
using std::abs;
|
||||
using std::max;
|
||||
eigen_assert(m_isInitialized && "JacobiSVD is not initialized.");
|
||||
if(m_singularValues.size()==0) return 0;
|
||||
RealScalar premultiplied_threshold = m_singularValues.coeff(0) * threshold();
|
||||
RealScalar premultiplied_threshold = (max)(m_singularValues.coeff(0) * threshold(), (std::numeric_limits<RealScalar>::min)());
|
||||
Index i = m_nonzeroSingularValues-1;
|
||||
while(i>=0 && m_singularValues.coeff(i) < premultiplied_threshold) --i;
|
||||
return i+1;
|
||||
@ -217,6 +218,12 @@ public:
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
// return true if already allocated
|
||||
bool allocate(Index rows, Index cols, unsigned int computationOptions) ;
|
||||
|
||||
@ -240,7 +247,9 @@ protected:
|
||||
m_usePrescribedThreshold(false),
|
||||
m_computationOptions(0),
|
||||
m_rows(-1), m_cols(-1), m_diagSize(0)
|
||||
{}
|
||||
{
|
||||
check_template_parameters();
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
@ -86,7 +86,12 @@ class CompressedStorage
|
||||
void resize(Index size, double reserveSizeFactor = 0)
|
||||
{
|
||||
if (m_allocatedSize<size)
|
||||
reallocate(size + Index(reserveSizeFactor*double(size)));
|
||||
{
|
||||
Index realloc_size = (std::min<Index>)(NumTraits<StorageIndex>::highest(), size + Index(reserveSizeFactor*double(size)));
|
||||
if(realloc_size<size)
|
||||
internal::throw_std_bad_alloc();
|
||||
reallocate(realloc_size);
|
||||
}
|
||||
m_size = size;
|
||||
}
|
||||
|
||||
|
@ -30,16 +30,16 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
|
||||
|
||||
std::memset(mask,0,sizeof(bool)*rows);
|
||||
|
||||
typename evaluator<Lhs>::type lhsEval(lhs);
|
||||
typename evaluator<Rhs>::type rhsEval(rhs);
|
||||
|
||||
// estimate the number of non zero entries
|
||||
// given a rhs column containing Y non zeros, we assume that the respective Y columns
|
||||
// of the lhs differs in average of one non zeros, thus the number of non zeros for
|
||||
// the product of a rhs column with the lhs is X+Y where X is the average number of non zero
|
||||
// per column of the lhs.
|
||||
// Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)
|
||||
Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros();
|
||||
|
||||
typename evaluator<Lhs>::type lhsEval(lhs);
|
||||
typename evaluator<Rhs>::type rhsEval(rhs);
|
||||
Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate();
|
||||
|
||||
res.setZero();
|
||||
res.reserve(Index(estimated_nnz_prod));
|
||||
|
@ -49,6 +49,16 @@ public:
|
||||
return nnz;
|
||||
}
|
||||
|
||||
inline const Scalar coeff(Index row, Index col) const
|
||||
{
|
||||
return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
|
||||
}
|
||||
|
||||
inline const Scalar coeff(Index index) const
|
||||
{
|
||||
return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart);
|
||||
}
|
||||
|
||||
inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
|
||||
Index startRow() const { return IsRowMajor ? m_outerStart : 0; }
|
||||
Index startCol() const { return IsRowMajor ? 0 : m_outerStart; }
|
||||
@ -80,7 +90,8 @@ class sparse_matrix_block_impl
|
||||
typedef Block<SparseMatrixType, BlockRows, BlockCols, true> BlockType;
|
||||
public:
|
||||
enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor };
|
||||
EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
|
||||
typedef SparseCompressedBase<Block<SparseMatrixType,BlockRows,BlockCols,true> > Base;
|
||||
_EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
|
||||
protected:
|
||||
typedef typename Base::IndexVector IndexVector;
|
||||
enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
|
||||
@ -188,27 +199,31 @@ public:
|
||||
{ return m_matrix.const_cast_derived().outerIndexPtr() + m_outerStart; }
|
||||
|
||||
inline const StorageIndex* innerNonZeroPtr() const
|
||||
{ return isCompressed() ? 0 : m_matrix.innerNonZeroPtr(); }
|
||||
{ return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }
|
||||
inline StorageIndex* innerNonZeroPtr()
|
||||
{ return isCompressed() ? 0 : m_matrix.const_cast_derived().innerNonZeroPtr(); }
|
||||
|
||||
Index nonZeros() const
|
||||
{
|
||||
if(m_matrix.isCompressed())
|
||||
return ( (m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()])
|
||||
- (m_matrix.outerIndexPtr()[m_outerStart]));
|
||||
else if(m_outerSize.value()==0)
|
||||
return 0;
|
||||
else
|
||||
return Map<const IndexVector>(m_matrix.innerNonZeroPtr()+m_outerStart, m_outerSize.value()).sum();
|
||||
}
|
||||
{ return isCompressed() ? 0 : (m_matrix.const_cast_derived().innerNonZeroPtr()+m_outerStart); }
|
||||
|
||||
bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; }
|
||||
|
||||
inline Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
|
||||
}
|
||||
|
||||
inline const Scalar coeff(Index row, Index col) const
|
||||
{
|
||||
return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
|
||||
}
|
||||
|
||||
inline const Scalar coeff(Index index) const
|
||||
{
|
||||
return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart);
|
||||
}
|
||||
|
||||
const Scalar& lastCoeff() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(sparse_matrix_block_impl);
|
||||
eigen_assert(nonZeros()>0);
|
||||
eigen_assert(Base::nonZeros()>0);
|
||||
if(m_matrix.isCompressed())
|
||||
return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart+1]-1];
|
||||
else
|
||||
@ -314,17 +329,6 @@ SparseMatrixBase<Derived>::innerVectors(Index outerStart, Index outerSize) const
|
||||
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
template< typename XprType, int BlockRows, int BlockCols, bool InnerPanel,
|
||||
bool OuterVector = (BlockCols==1 && XprType::IsRowMajor)
|
||||
| // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
|
||||
// revert to || as soon as not needed anymore.
|
||||
(BlockRows==1 && !XprType::IsRowMajor)>
|
||||
class GenericSparseBlockInnerIteratorImpl;
|
||||
|
||||
}
|
||||
|
||||
/** Generic implementation of sparse Block expression.
|
||||
* Real-only.
|
||||
*/
|
||||
@ -390,8 +394,11 @@ public:
|
||||
Index blockCols() const { return m_blockCols.value(); }
|
||||
|
||||
protected:
|
||||
friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;
|
||||
// friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;
|
||||
friend class ReverseInnerIterator;
|
||||
friend struct internal::unary_evaluator<Block<XprType,BlockRows,BlockCols,InnerPanel>, internal::IteratorBased, Scalar >;
|
||||
|
||||
Index nonZeros() const { return Dynamic; }
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
|
||||
|
||||
@ -404,94 +411,6 @@ public:
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
|
||||
class GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel,false> : public Block<XprType, BlockRows, BlockCols, InnerPanel>::_MatrixTypeNested::InnerIterator
|
||||
{
|
||||
typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
|
||||
enum {
|
||||
IsRowMajor = BlockType::IsRowMajor
|
||||
};
|
||||
typedef typename BlockType::_MatrixTypeNested _MatrixTypeNested;
|
||||
typedef typename BlockType::StorageIndex StorageIndex;
|
||||
typedef typename _MatrixTypeNested::InnerIterator Base;
|
||||
const BlockType& m_block;
|
||||
Index m_end;
|
||||
public:
|
||||
|
||||
EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer)
|
||||
: Base(block.derived().nestedExpression(), outer + (IsRowMajor ? block.m_startRow.value() : block.m_startCol.value())),
|
||||
m_block(block),
|
||||
m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
|
||||
{
|
||||
while( (Base::operator bool()) && (Base::index() < (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value())) )
|
||||
Base::operator++();
|
||||
}
|
||||
|
||||
inline Index index() const { return Base::index() - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
|
||||
inline Index outer() const { return Base::outer() - (IsRowMajor ? m_block.m_startRow.value() : m_block.m_startCol.value()); }
|
||||
inline Index row() const { return Base::row() - m_block.m_startRow.value(); }
|
||||
inline Index col() const { return Base::col() - m_block.m_startCol.value(); }
|
||||
|
||||
inline operator bool() const { return Base::operator bool() && Base::index() < m_end; }
|
||||
};
|
||||
|
||||
// Row vector of a column-major sparse matrix or column of a row-major one.
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
|
||||
class GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel,true>
|
||||
{
|
||||
typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
|
||||
enum {
|
||||
IsRowMajor = BlockType::IsRowMajor
|
||||
};
|
||||
typedef typename BlockType::_MatrixTypeNested _MatrixTypeNested;
|
||||
typedef typename BlockType::StorageIndex StorageIndex;
|
||||
typedef typename BlockType::Scalar Scalar;
|
||||
const BlockType& m_block;
|
||||
Index m_outerPos;
|
||||
Index m_innerIndex;
|
||||
Scalar m_value;
|
||||
Index m_end;
|
||||
public:
|
||||
|
||||
explicit EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer = 0)
|
||||
:
|
||||
m_block(block),
|
||||
m_outerPos( (IsRowMajor ? block.m_startCol.value() : block.m_startRow.value()) - 1), // -1 so that operator++ finds the first non-zero entry
|
||||
m_innerIndex(IsRowMajor ? block.m_startRow.value() : block.m_startCol.value()),
|
||||
m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
|
||||
{
|
||||
EIGEN_UNUSED_VARIABLE(outer);
|
||||
eigen_assert(outer==0);
|
||||
|
||||
++(*this);
|
||||
}
|
||||
|
||||
inline Index index() const { return m_outerPos - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
|
||||
inline Index outer() const { return 0; }
|
||||
inline Index row() const { return IsRowMajor ? 0 : index(); }
|
||||
inline Index col() const { return IsRowMajor ? index() : 0; }
|
||||
|
||||
inline Scalar value() const { return m_value; }
|
||||
|
||||
inline GenericSparseBlockInnerIteratorImpl& operator++()
|
||||
{
|
||||
// search next non-zero entry
|
||||
while(++m_outerPos<m_end)
|
||||
{
|
||||
typename XprType::InnerIterator it(m_block.m_matrix, m_outerPos);
|
||||
// search for the key m_innerIndex in the current outer-vector
|
||||
while(it && it.index() < m_innerIndex) ++it;
|
||||
if(it && it.index()==m_innerIndex)
|
||||
{
|
||||
m_value = it.value();
|
||||
break;
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline operator bool() const { return m_outerPos < m_end; }
|
||||
};
|
||||
|
||||
template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
|
||||
struct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBased >
|
||||
@ -523,9 +442,16 @@ struct unary_evaluator<Block<ArgType,BlockRows,BlockCols,InnerPanel>, IteratorBa
|
||||
explicit unary_evaluator(const XprType& op)
|
||||
: m_argImpl(op.nestedExpression()), m_block(op)
|
||||
{}
|
||||
|
||||
inline Index nonZerosEstimate() const {
|
||||
Index nnz = m_block.nonZeros();
|
||||
if(nnz<0)
|
||||
return m_argImpl.nonZerosEstimate() * m_block.size() / m_block.nestedExpression().size();
|
||||
return nnz;
|
||||
}
|
||||
|
||||
protected:
|
||||
typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
|
||||
typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
|
||||
|
||||
typename evaluator<ArgType>::nestedType m_argImpl;
|
||||
const XprType &m_block;
|
||||
@ -570,6 +496,7 @@ public:
|
||||
: m_eval(aEval),
|
||||
m_outerPos( (IsRowMajor ? aEval.m_block.startCol() : aEval.m_block.startRow()) - 1), // -1 so that operator++ finds the first non-zero entry
|
||||
m_innerIndex(IsRowMajor ? aEval.m_block.startRow() : aEval.m_block.startCol()),
|
||||
m_value(0),
|
||||
m_end(IsRowMajor ? aEval.m_block.startCol()+aEval.m_block.blockCols() : aEval.m_block.startRow()+aEval.m_block.blockRows())
|
||||
{
|
||||
EIGEN_UNUSED_VARIABLE(outer);
|
||||
|
@ -35,6 +35,25 @@ class SparseCompressedBase
|
||||
class InnerIterator;
|
||||
class ReverseInnerIterator;
|
||||
|
||||
protected:
|
||||
typedef typename Base::IndexVector IndexVector;
|
||||
Eigen::Map<IndexVector> innerNonZeros() { return Eigen::Map<IndexVector>(innerNonZeroPtr(), isCompressed()?0:derived().outerSize()); }
|
||||
const Eigen::Map<const IndexVector> innerNonZeros() const { return Eigen::Map<const IndexVector>(innerNonZeroPtr(), isCompressed()?0:derived().outerSize()); }
|
||||
|
||||
public:
|
||||
|
||||
/** \returns the number of non zero coefficients */
|
||||
inline Index nonZeros() const
|
||||
{
|
||||
if(isCompressed())
|
||||
return outerIndexPtr()[derived().outerSize()]-outerIndexPtr()[0];
|
||||
else if(derived().outerSize()==0)
|
||||
return 0;
|
||||
else
|
||||
return innerNonZeros().sum();
|
||||
|
||||
}
|
||||
|
||||
/** \returns a const pointer to the array of values.
|
||||
* This function is aimed at interoperability with other libraries.
|
||||
* \sa innerIndexPtr(), outerIndexPtr() */
|
||||
@ -165,6 +184,10 @@ struct evaluator<SparseCompressedBase<Derived> >
|
||||
evaluator() : m_matrix(0) {}
|
||||
explicit evaluator(const Derived &mat) : m_matrix(&mat) {}
|
||||
|
||||
inline Index nonZerosEstimate() const {
|
||||
return m_matrix->nonZeros();
|
||||
}
|
||||
|
||||
operator Derived&() { return m_matrix->const_cast_derived(); }
|
||||
operator const Derived&() const { return *m_matrix; }
|
||||
|
||||
|
@ -121,6 +121,10 @@ public:
|
||||
m_lhsImpl(xpr.lhs()),
|
||||
m_rhsImpl(xpr.rhs())
|
||||
{ }
|
||||
|
||||
inline Index nonZerosEstimate() const {
|
||||
return m_lhsImpl.nonZerosEstimate() + m_rhsImpl.nonZerosEstimate();
|
||||
}
|
||||
|
||||
protected:
|
||||
const BinaryOp m_functor;
|
||||
@ -198,6 +202,10 @@ public:
|
||||
m_lhsImpl(xpr.lhs()),
|
||||
m_rhsImpl(xpr.rhs())
|
||||
{ }
|
||||
|
||||
inline Index nonZerosEstimate() const {
|
||||
return (std::min)(m_lhsImpl.nonZerosEstimate(), m_rhsImpl.nonZerosEstimate());
|
||||
}
|
||||
|
||||
protected:
|
||||
const BinaryOp m_functor;
|
||||
@ -243,7 +251,7 @@ public:
|
||||
EIGEN_STRONG_INLINE Index col() const { return m_rhsIter.col(); }
|
||||
|
||||
EIGEN_STRONG_INLINE operator bool() const { return m_rhsIter; }
|
||||
|
||||
|
||||
protected:
|
||||
const LhsEvaluator &m_lhsEval;
|
||||
RhsIterator m_rhsIter;
|
||||
@ -262,6 +270,10 @@ public:
|
||||
m_lhsImpl(xpr.lhs()),
|
||||
m_rhsImpl(xpr.rhs())
|
||||
{ }
|
||||
|
||||
inline Index nonZerosEstimate() const {
|
||||
return m_rhsImpl.nonZerosEstimate();
|
||||
}
|
||||
|
||||
protected:
|
||||
const BinaryOp m_functor;
|
||||
@ -308,7 +320,7 @@ public:
|
||||
EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); }
|
||||
|
||||
EIGEN_STRONG_INLINE operator bool() const { return m_lhsIter; }
|
||||
|
||||
|
||||
protected:
|
||||
LhsIterator m_lhsIter;
|
||||
const RhsEvaluator &m_rhsEval;
|
||||
@ -327,6 +339,10 @@ public:
|
||||
m_lhsImpl(xpr.lhs()),
|
||||
m_rhsImpl(xpr.rhs())
|
||||
{ }
|
||||
|
||||
inline Index nonZerosEstimate() const {
|
||||
return m_lhsImpl.nonZerosEstimate();
|
||||
}
|
||||
|
||||
protected:
|
||||
const BinaryOp m_functor;
|
||||
|
@ -30,6 +30,10 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp,ArgType>, IteratorBased>
|
||||
};
|
||||
|
||||
explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {}
|
||||
|
||||
inline Index nonZerosEstimate() const {
|
||||
return m_argImpl.nonZerosEstimate();
|
||||
}
|
||||
|
||||
protected:
|
||||
typedef typename evaluator<ArgType>::InnerIterator EvalIterator;
|
||||
|
@ -105,9 +105,6 @@ class SparseMapBase<Derived,ReadOnlyAccessors>
|
||||
return ((*r==inner) && (id<end)) ? m_values[id] : Scalar(0);
|
||||
}
|
||||
|
||||
/** \returns the number of non zero coefficients */
|
||||
inline Index nonZeros() const { return m_nnz; }
|
||||
|
||||
inline SparseMapBase(Index rows, Index cols, Index nnz, IndexPointer outerIndexPtr, IndexPointer innerIndexPtr,
|
||||
ScalarPointer valuePtr, IndexPointer innerNonZerosPtr = 0)
|
||||
: m_outerSize(IsRowMajor?rows:cols), m_innerSize(IsRowMajor?cols:rows), m_nnz(nnz), m_outerIndex(outerIndexPtr),
|
||||
|
@ -95,6 +95,7 @@ class SparseMatrix
|
||||
public:
|
||||
typedef SparseCompressedBase<SparseMatrix> Base;
|
||||
using Base::isCompressed;
|
||||
using Base::nonZeros;
|
||||
_EIGEN_SPARSE_PUBLIC_INTERFACE(SparseMatrix)
|
||||
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, +=)
|
||||
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, -=)
|
||||
@ -122,9 +123,6 @@ class SparseMatrix
|
||||
StorageIndex* m_outerIndex;
|
||||
StorageIndex* m_innerNonZeros; // optional, if null then the data is compressed
|
||||
Storage m_data;
|
||||
|
||||
Eigen::Map<IndexVector> innerNonZeros() { return Eigen::Map<IndexVector>(m_innerNonZeros, m_innerNonZeros?m_outerSize:0); }
|
||||
const Eigen::Map<const IndexVector> innerNonZeros() const { return Eigen::Map<const IndexVector>(m_innerNonZeros, m_innerNonZeros?m_outerSize:0); }
|
||||
|
||||
public:
|
||||
|
||||
@ -252,14 +250,6 @@ class SparseMatrix
|
||||
memset(m_innerNonZeros, 0, (m_outerSize)*sizeof(StorageIndex));
|
||||
}
|
||||
|
||||
/** \returns the number of non zero coefficients */
|
||||
inline Index nonZeros() const
|
||||
{
|
||||
if(m_innerNonZeros)
|
||||
return innerNonZeros().sum();
|
||||
return convert_index(Index(m_data.size()));
|
||||
}
|
||||
|
||||
/** Preallocates \a reserveSize non zeros.
|
||||
*
|
||||
* Precondition: the matrix must be in compressed mode. */
|
||||
@ -1172,8 +1162,12 @@ typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Op
|
||||
return (m_data.value(p) = 0);
|
||||
}
|
||||
|
||||
// make sure the matrix is compatible to random un-compressed insertion:
|
||||
m_data.resize(m_data.allocatedSize());
|
||||
if(m_data.size() != m_data.allocatedSize())
|
||||
{
|
||||
// make sure the matrix is compatible to random un-compressed insertion:
|
||||
m_data.resize(m_data.allocatedSize());
|
||||
this->reserveInnerVectors(Array<StorageIndex,Dynamic,1>::Constant(2*m_outerSize, convert_index(m_outerSize)));
|
||||
}
|
||||
|
||||
return insertUncompressed(row,col);
|
||||
}
|
||||
|
@ -149,9 +149,6 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
|
||||
/** \returns the number of coefficients, which is \a rows()*cols().
|
||||
* \sa rows(), cols(). */
|
||||
inline Index size() const { return rows() * cols(); }
|
||||
/** \returns the number of nonzero coefficients which is in practice the number
|
||||
* of stored coefficients. */
|
||||
inline Index nonZeros() const { return derived().nonZeros(); }
|
||||
/** \returns true if either the number of rows or the number of columns is equal to 1.
|
||||
* In other words, this function returns
|
||||
* \code rows()==1 || cols()==1 \endcode
|
||||
|
@ -33,14 +33,6 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
|
||||
// allocate a temporary buffer
|
||||
AmbiVector<Scalar,StorageIndex> tempVector(rows);
|
||||
|
||||
// estimate the number of non zero entries
|
||||
// given a rhs column containing Y non zeros, we assume that the respective Y columns
|
||||
// of the lhs differs in average of one non zeros, thus the number of non zeros for
|
||||
// the product of a rhs column with the lhs is X+Y where X is the average number of non zero
|
||||
// per column of the lhs.
|
||||
// Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)
|
||||
Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros();
|
||||
|
||||
// mimics a resizeByInnerOuter:
|
||||
if(ResultType::IsRowMajor)
|
||||
res.resize(cols, rows);
|
||||
@ -49,6 +41,14 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
|
||||
|
||||
typename evaluator<Lhs>::type lhsEval(lhs);
|
||||
typename evaluator<Rhs>::type rhsEval(rhs);
|
||||
|
||||
// estimate the number of non zero entries
|
||||
// given a rhs column containing Y non zeros, we assume that the respective Y columns
|
||||
// of the lhs differs in average of one non zeros, thus the number of non zeros for
|
||||
// the product of a rhs column with the lhs is X+Y where X is the average number of non zero
|
||||
// per column of the lhs.
|
||||
// Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)
|
||||
Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate();
|
||||
|
||||
res.reserve(estimated_nnz_prod);
|
||||
double ratioColRes = double(estimated_nnz_prod)/double(lhs.rows()*rhs.cols());
|
||||
|
@ -40,15 +40,11 @@ namespace internal {
|
||||
};
|
||||
}
|
||||
|
||||
// Implement nonZeros() for transpose. I'm not sure that's the best approach for that.
|
||||
// Perhaps it should be implemented in Transpose<> itself.
|
||||
template<typename MatrixType> class TransposeImpl<MatrixType,Sparse>
|
||||
: public internal::SparseTransposeImpl<MatrixType>
|
||||
{
|
||||
protected:
|
||||
typedef internal::SparseTransposeImpl<MatrixType> Base;
|
||||
public:
|
||||
inline Index nonZeros() const { return Base::derived().nestedExpression().nonZeros(); }
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
@ -61,6 +57,10 @@ struct unary_evaluator<Transpose<ArgType>, IteratorBased>
|
||||
typedef typename evaluator<ArgType>::ReverseInnerIterator EvalReverseIterator;
|
||||
public:
|
||||
typedef Transpose<ArgType> XprType;
|
||||
|
||||
inline Index nonZerosEstimate() const {
|
||||
return m_argImpl.nonZerosEstimate();
|
||||
}
|
||||
|
||||
class InnerIterator : public EvalIterator
|
||||
{
|
||||
|
@ -50,13 +50,6 @@ protected:
|
||||
|
||||
template<typename OtherDerived> void solveInPlace(MatrixBase<OtherDerived>& other) const;
|
||||
template<typename OtherDerived> void solveInPlace(SparseMatrixBase<OtherDerived>& other) const;
|
||||
|
||||
inline Index nonZeros() const {
|
||||
// FIXME HACK number of nonZeros is required for product logic
|
||||
// this returns only an upper bound (but should be OK for most purposes)
|
||||
return derived().nestedExpression().nonZeros();
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
@ -191,6 +184,10 @@ public:
|
||||
|
||||
explicit unary_evaluator(const XprType &xpr) : m_argImpl(xpr.nestedExpression()) {}
|
||||
|
||||
inline Index nonZerosEstimate() const {
|
||||
return m_argImpl.nonZerosEstimate();
|
||||
}
|
||||
|
||||
class InnerIterator : public EvalIterator
|
||||
{
|
||||
typedef EvalIterator Base;
|
||||
|
@ -442,6 +442,10 @@ struct evaluator<SparseVector<_Scalar,_Options,_Index> >
|
||||
|
||||
explicit evaluator(const SparseVectorType &mat) : m_matrix(mat) {}
|
||||
|
||||
inline Index nonZerosEstimate() const {
|
||||
return m_matrix.nonZeros();
|
||||
}
|
||||
|
||||
operator SparseVectorType&() { return m_matrix.const_cast_derived(); }
|
||||
operator const SparseVectorType&() const { return m_matrix; }
|
||||
|
||||
|
@ -165,8 +165,9 @@ struct SluMatrix : SuperMatrix
|
||||
}
|
||||
|
||||
template<typename MatrixType>
|
||||
static SluMatrix Map(SparseMatrixBase<MatrixType>& mat)
|
||||
static SluMatrix Map(SparseMatrixBase<MatrixType>& a_mat)
|
||||
{
|
||||
MatrixType &mat(a_mat.derived());
|
||||
SluMatrix res;
|
||||
if ((MatrixType::Flags&RowMajorBit)==RowMajorBit)
|
||||
{
|
||||
@ -184,9 +185,9 @@ struct SluMatrix : SuperMatrix
|
||||
res.Mtype = SLU_GE;
|
||||
|
||||
res.storage.nnz = internal::convert_index<int>(mat.nonZeros());
|
||||
res.storage.values = mat.derived().valuePtr();
|
||||
res.storage.innerInd = mat.derived().innerIndexPtr();
|
||||
res.storage.outerInd = mat.derived().outerIndexPtr();
|
||||
res.storage.values = mat.valuePtr();
|
||||
res.storage.innerInd = mat.innerIndexPtr();
|
||||
res.storage.outerInd = mat.outerIndexPtr();
|
||||
|
||||
res.setScalarType<typename MatrixType::Scalar>();
|
||||
|
||||
@ -302,6 +303,7 @@ class SuperLUBase : public SparseSolverBase<Derived>
|
||||
typedef Matrix<Scalar,Dynamic,1> Vector;
|
||||
typedef Matrix<int, 1, MatrixType::ColsAtCompileTime> IntRowVectorType;
|
||||
typedef Matrix<int, MatrixType::RowsAtCompileTime, 1> IntColVectorType;
|
||||
typedef Map<PermutationMatrix<Dynamic,Dynamic,int> > PermutationMap;
|
||||
typedef SparseMatrix<Scalar> LUMatrixType;
|
||||
|
||||
public:
|
||||
@ -459,10 +461,11 @@ class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> >
|
||||
typedef typename Base::RealScalar RealScalar;
|
||||
typedef typename Base::StorageIndex StorageIndex;
|
||||
typedef typename Base::IntRowVectorType IntRowVectorType;
|
||||
typedef typename Base::IntColVectorType IntColVectorType;
|
||||
typedef typename Base::IntColVectorType IntColVectorType;
|
||||
typedef typename Base::PermutationMap PermutationMap;
|
||||
typedef typename Base::LUMatrixType LUMatrixType;
|
||||
typedef TriangularView<LUMatrixType, Lower|UnitDiag> LMatrixType;
|
||||
typedef TriangularView<LUMatrixType, Upper> UMatrixType;
|
||||
typedef TriangularView<LUMatrixType, Upper> UMatrixType;
|
||||
|
||||
public:
|
||||
using Base::_solve_impl;
|
||||
@ -774,6 +777,8 @@ typename SuperLU<MatrixType>::Scalar SuperLU<MatrixType>::determinant() const
|
||||
det *= m_u.valuePtr()[lastId];
|
||||
}
|
||||
}
|
||||
if(PermutationMap(m_p.data(),m_p.size()).determinant()*PermutationMap(m_q.data(),m_q.size()).determinant()<0)
|
||||
det = -det;
|
||||
if(m_sluEqued!='N')
|
||||
return det/m_sluRscale.prod()/m_sluCscale.prod();
|
||||
else
|
||||
|
@ -25,6 +25,12 @@ using namespace std;
|
||||
|
||||
const int default_precision = 4;
|
||||
|
||||
// see --only-cubic-sizes
|
||||
bool only_cubic_sizes = false;
|
||||
|
||||
// see --dump-tables
|
||||
bool dump_tables = false;
|
||||
|
||||
uint8_t log2_pot(size_t x) {
|
||||
size_t l = 0;
|
||||
while (x >>= 1) l++;
|
||||
@ -130,6 +136,9 @@ struct inputfile_t
|
||||
cerr << "offending line:" << endl << line << endl;
|
||||
exit(1);
|
||||
}
|
||||
if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) {
|
||||
continue;
|
||||
}
|
||||
inputfile_entry_t entry;
|
||||
entry.product_size = uint16_t(product_size);
|
||||
entry.pot_block_size = uint16_t(block_size);
|
||||
@ -155,6 +164,9 @@ struct inputfile_t
|
||||
cerr << "offending line:" << endl << line << endl;
|
||||
exit(1);
|
||||
}
|
||||
if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) {
|
||||
continue;
|
||||
}
|
||||
inputfile_entry_t entry;
|
||||
entry.product_size = uint16_t(product_size);
|
||||
entry.pot_block_size = 0;
|
||||
@ -309,14 +321,82 @@ float efficiency_of_subset(
|
||||
efficiency_this_product_size = max(efficiency_this_product_size, efficiency_this_entry);
|
||||
}
|
||||
efficiency = min(efficiency, efficiency_this_product_size);
|
||||
first_entry_index_with_this_product_size = entry_index;
|
||||
product_size = first_file.entries[entry_index].product_size;
|
||||
if (entry_index < num_entries) {
|
||||
first_entry_index_with_this_product_size = entry_index;
|
||||
product_size = first_file.entries[entry_index].product_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return efficiency;
|
||||
}
|
||||
|
||||
void dump_table_for_subset(
|
||||
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
const vector<size_t>& subset)
|
||||
{
|
||||
const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]];
|
||||
const size_t num_entries = first_file.entries.size();
|
||||
size_t entry_index = 0;
|
||||
size_t first_entry_index_with_this_product_size = 0;
|
||||
uint16_t product_size = first_file.entries[0].product_size;
|
||||
size_t i = 0;
|
||||
size_triple_t min_product_size(first_file.entries.front().product_size);
|
||||
size_triple_t max_product_size(first_file.entries.back().product_size);
|
||||
if (!min_product_size.is_cubic() || !max_product_size.is_cubic()) {
|
||||
abort();
|
||||
}
|
||||
if (only_cubic_sizes) {
|
||||
cerr << "Can't generate tables with --only-cubic-sizes." << endl;
|
||||
abort();
|
||||
}
|
||||
cout << "struct LookupTable {" << endl;
|
||||
cout << " static const size_t BaseSize = " << min_product_size.k << ";" << endl;
|
||||
const size_t NumSizes = log2_pot(max_product_size.k / min_product_size.k) + 1;
|
||||
const size_t TableSize = NumSizes * NumSizes * NumSizes;
|
||||
cout << " static const size_t NumSizes = " << NumSizes << ";" << endl;
|
||||
cout << " static const unsigned short* Data() {" << endl;
|
||||
cout << " static const unsigned short data[" << TableSize << "] = {";
|
||||
while (entry_index < num_entries) {
|
||||
++entry_index;
|
||||
if (entry_index == num_entries ||
|
||||
first_file.entries[entry_index].product_size != product_size)
|
||||
{
|
||||
float best_efficiency_this_product_size = 0.0f;
|
||||
uint16_t best_block_size_this_product_size = 0;
|
||||
for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
|
||||
float efficiency_this_entry = 1.0f;
|
||||
for (auto i = subset.begin(); i != subset.end(); ++i) {
|
||||
efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency);
|
||||
}
|
||||
if (efficiency_this_entry > best_efficiency_this_product_size) {
|
||||
best_efficiency_this_product_size = efficiency_this_entry;
|
||||
best_block_size_this_product_size = first_file.entries[e].block_size;
|
||||
}
|
||||
}
|
||||
if ((i++) % NumSizes) {
|
||||
cout << " ";
|
||||
} else {
|
||||
cout << endl << " ";
|
||||
}
|
||||
cout << "0x" << hex << best_block_size_this_product_size << dec;
|
||||
if (entry_index < num_entries) {
|
||||
cout << ",";
|
||||
first_entry_index_with_this_product_size = entry_index;
|
||||
product_size = first_file.entries[entry_index].product_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (i != TableSize) {
|
||||
cerr << endl << "Wrote " << i << " table entries, expected " << TableSize << endl;
|
||||
abort();
|
||||
}
|
||||
cout << endl << " };" << endl;
|
||||
cout << " return data;" << endl;
|
||||
cout << " }" << endl;
|
||||
cout << "};" << endl;
|
||||
}
|
||||
|
||||
float efficiency_of_partition(
|
||||
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
const vector<vector<size_t>>& partition)
|
||||
@ -498,6 +578,10 @@ void print_partition(
|
||||
for (auto file = subset->begin(); file != subset->end(); ++file) {
|
||||
cout << " " << preprocessed_inputfiles[*file].filename << endl;
|
||||
}
|
||||
if (dump_tables) {
|
||||
cout << " Table:" << endl;
|
||||
dump_table_for_subset(preprocessed_inputfiles, *subset);
|
||||
}
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
@ -505,28 +589,23 @@ void print_partition(
|
||||
struct action_t
|
||||
{
|
||||
virtual const char* invokation_name() const { abort(); return nullptr; }
|
||||
virtual void run(int, char*[]) const { abort(); }
|
||||
virtual void run(const vector<string>&) const { abort(); }
|
||||
virtual ~action_t() {}
|
||||
};
|
||||
|
||||
struct partition_action_t : action_t
|
||||
{
|
||||
virtual const char* invokation_name() const { return "partition"; }
|
||||
virtual void run(int argc, char *argv[]) const
|
||||
virtual const char* invokation_name() const override { return "partition"; }
|
||||
virtual void run(const vector<string>& input_filenames) const override
|
||||
{
|
||||
vector<preprocessed_inputfile_t> preprocessed_inputfiles;
|
||||
|
||||
if (!argc) {
|
||||
if (input_filenames.empty()) {
|
||||
cerr << "The " << invokation_name() << " action needs a list of input files." << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
vector<string> inputfilenames;
|
||||
for (int i = 0; i < argc; i++) {
|
||||
inputfilenames.emplace_back(argv[i]);
|
||||
}
|
||||
|
||||
for (auto it = inputfilenames.begin(); it != inputfilenames.end(); ++it) {
|
||||
for (auto it = input_filenames.begin(); it != input_filenames.end(); ++it) {
|
||||
inputfile_t inputfile(*it);
|
||||
switch (inputfile.type) {
|
||||
case inputfile_t::type_t::all_pot_sizes:
|
||||
@ -610,7 +689,7 @@ struct evaluate_defaults_action_t : action_t
|
||||
static bool lower_efficiency(const results_entry_t& e1, const results_entry_t& e2) {
|
||||
return e1.default_efficiency < e2.default_efficiency;
|
||||
}
|
||||
virtual const char* invokation_name() const { return "evaluate-defaults"; }
|
||||
virtual const char* invokation_name() const override { return "evaluate-defaults"; }
|
||||
void show_usage_and_exit() const
|
||||
{
|
||||
cerr << "usage: " << invokation_name() << " default-sizes-data all-pot-sizes-data" << endl;
|
||||
@ -618,13 +697,13 @@ struct evaluate_defaults_action_t : action_t
|
||||
<< "performance measured over all POT sizes." << endl;
|
||||
exit(1);
|
||||
}
|
||||
virtual void run(int argc, char *argv[]) const
|
||||
virtual void run(const vector<string>& input_filenames) const override
|
||||
{
|
||||
if (argc != 2) {
|
||||
if (input_filenames.size() != 2) {
|
||||
show_usage_and_exit();
|
||||
}
|
||||
inputfile_t inputfile_default_sizes(argv[0]);
|
||||
inputfile_t inputfile_all_pot_sizes(argv[1]);
|
||||
inputfile_t inputfile_default_sizes(input_filenames[0]);
|
||||
inputfile_t inputfile_all_pot_sizes(input_filenames[1]);
|
||||
if (inputfile_default_sizes.type != inputfile_t::type_t::default_sizes) {
|
||||
cerr << inputfile_default_sizes.filename << " is not an input file with default sizes." << endl;
|
||||
show_usage_and_exit();
|
||||
@ -719,7 +798,7 @@ struct evaluate_defaults_action_t : action_t
|
||||
void show_usage_and_exit(int argc, char* argv[],
|
||||
const vector<unique_ptr<action_t>>& available_actions)
|
||||
{
|
||||
cerr << "usage: " << argv[0] << " <action> <input files...>" << endl;
|
||||
cerr << "usage: " << argv[0] << " <action> [options...] <input files...>" << endl;
|
||||
cerr << "available actions:" << endl;
|
||||
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
|
||||
cerr << " " << (*it)->invokation_name() << endl;
|
||||
@ -737,21 +816,61 @@ int main(int argc, char* argv[])
|
||||
available_actions.emplace_back(new partition_action_t);
|
||||
available_actions.emplace_back(new evaluate_defaults_action_t);
|
||||
|
||||
auto action = available_actions.end();
|
||||
vector<string> input_filenames;
|
||||
|
||||
action_t* action = nullptr;
|
||||
|
||||
if (argc < 2) {
|
||||
show_usage_and_exit(argc, argv, available_actions);
|
||||
}
|
||||
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
|
||||
if (!strcmp(argv[1], (*it)->invokation_name())) {
|
||||
action = it;
|
||||
break;
|
||||
for (int i = 1; i < argc; i++) {
|
||||
bool arg_handled = false;
|
||||
// Step 1. Try to match action invokation names.
|
||||
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
|
||||
if (!strcmp(argv[i], (*it)->invokation_name())) {
|
||||
if (!action) {
|
||||
action = it->get();
|
||||
arg_handled = true;
|
||||
break;
|
||||
} else {
|
||||
cerr << "can't specify more than one action!" << endl;
|
||||
show_usage_and_exit(argc, argv, available_actions);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (arg_handled) {
|
||||
continue;
|
||||
}
|
||||
// Step 2. Try to match option names.
|
||||
if (argv[i][0] == '-') {
|
||||
if (!strcmp(argv[i], "--only-cubic-sizes")) {
|
||||
only_cubic_sizes = true;
|
||||
arg_handled = true;
|
||||
}
|
||||
if (!strcmp(argv[i], "--dump-tables")) {
|
||||
dump_tables = true;
|
||||
arg_handled = true;
|
||||
}
|
||||
if (!arg_handled) {
|
||||
cerr << "Unrecognized option: " << argv[i] << endl;
|
||||
show_usage_and_exit(argc, argv, available_actions);
|
||||
}
|
||||
}
|
||||
if (arg_handled) {
|
||||
continue;
|
||||
}
|
||||
// Step 3. Default to interpreting args as input filenames.
|
||||
input_filenames.emplace_back(argv[i]);
|
||||
}
|
||||
|
||||
if (action == available_actions.end()) {
|
||||
if (dump_tables && only_cubic_sizes) {
|
||||
cerr << "Incompatible options: --only-cubic-sizes and --dump-tables." << endl;
|
||||
show_usage_and_exit(argc, argv, available_actions);
|
||||
}
|
||||
|
||||
(*action)->run(argc - 2, argv + 2);
|
||||
if (!action) {
|
||||
show_usage_and_exit(argc, argv, available_actions);
|
||||
}
|
||||
|
||||
action->run(input_filenames);
|
||||
}
|
||||
|
@ -446,7 +446,7 @@ void try_run_some_benchmarks(
|
||||
unsigned int seconds_to_sleep_if_lower_clock_speed = 1;
|
||||
|
||||
while (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
|
||||
if (seconds_to_sleep_if_lower_clock_speed > 30) {
|
||||
if (seconds_to_sleep_if_lower_clock_speed > 32) {
|
||||
cerr << "Sleeping longer probably won't make a difference." << endl;
|
||||
cerr << "Serializing benchmarks to " << session_filename << endl;
|
||||
serialize_benchmarks(session_filename, benchmarks, first_benchmark_to_run);
|
||||
@ -456,7 +456,7 @@ void try_run_some_benchmarks(
|
||||
rerun_last_tests = true;
|
||||
cerr << "Sleeping "
|
||||
<< seconds_to_sleep_if_lower_clock_speed
|
||||
<< " s..." << endl;
|
||||
<< " s... \r" << endl;
|
||||
sleep(seconds_to_sleep_if_lower_clock_speed);
|
||||
current_clock_speed = measure_clock_speed();
|
||||
seconds_to_sleep_if_lower_clock_speed *= 2;
|
||||
|
@ -41,3 +41,5 @@ before-evaluators
|
||||
6981:7e5d6f78da59 # dynamic loop swapping
|
||||
6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache
|
||||
6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1.
|
||||
7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5)
|
||||
|
||||
|
@ -26,7 +26,7 @@ macro(_metis_check_version)
|
||||
string(REGEX MATCH "define[ \t]+METIS_VER_SUBMINOR[ \t]+([0-9]+)" _metis_subminor_version_match "${_metis_version_header}")
|
||||
set(METIS_SUBMINOR_VERSION "${CMAKE_MATCH_1}")
|
||||
if(NOT METIS_MAJOR_VERSION)
|
||||
message(WARNING "Could not determine Metis version. Assuming version 4.0.0")
|
||||
message(STATUS "Could not determine Metis version. Assuming version 4.0.0")
|
||||
set(METIS_VERSION 4.0.0)
|
||||
else()
|
||||
set(METIS_VERSION ${METIS_MAJOR_VERSION}.${METIS_MINOR_VERSION}.${METIS_SUBMINOR_VERSION})
|
||||
|
@ -10,9 +10,10 @@ if(QT4_FOUND)
|
||||
target_link_libraries(Tutorial_sparse_example ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO} ${QT_QTCORE_LIBRARY} ${QT_QTGUI_LIBRARY})
|
||||
|
||||
add_custom_command(
|
||||
TARGET Tutorial_sparse_example
|
||||
POST_BUILD
|
||||
COMMAND Tutorial_sparse_example ARGS ${CMAKE_CURRENT_BINARY_DIR}/../html/Tutorial_sparse_example.jpeg
|
||||
TARGET Tutorial_sparse_example
|
||||
POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/../html/
|
||||
COMMAND Tutorial_sparse_example ARGS ${CMAKE_CURRENT_BINARY_DIR}/../html/Tutorial_sparse_example.jpeg
|
||||
)
|
||||
|
||||
add_dependencies(all_examples Tutorial_sparse_example)
|
||||
|
@ -47,6 +47,18 @@ ei_add_failtest("sparse_ref_3")
|
||||
ei_add_failtest("sparse_ref_4")
|
||||
ei_add_failtest("sparse_ref_5")
|
||||
|
||||
ei_add_failtest("partialpivlu_int")
|
||||
ei_add_failtest("fullpivlu_int")
|
||||
ei_add_failtest("llt_int")
|
||||
ei_add_failtest("ldlt_int")
|
||||
ei_add_failtest("qr_int")
|
||||
ei_add_failtest("colpivqr_int")
|
||||
ei_add_failtest("fullpivqr_int")
|
||||
ei_add_failtest("jacobisvd_int")
|
||||
ei_add_failtest("bdcsvd_int")
|
||||
ei_add_failtest("eigensolver_int")
|
||||
ei_add_failtest("eigensolver_cplx")
|
||||
|
||||
if (EIGEN_FAILTEST_FAILURE_COUNT)
|
||||
message(FATAL_ERROR
|
||||
"${EIGEN_FAILTEST_FAILURE_COUNT} out of ${EIGEN_FAILTEST_COUNT} failtests FAILED. "
|
||||
|
14
failtest/bdcsvd_int.cpp
Normal file
14
failtest/bdcsvd_int.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include "../Eigen/SVD"
|
||||
|
||||
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
|
||||
#define SCALAR int
|
||||
#else
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
int main()
|
||||
{
|
||||
BDCSVD<Matrix<SCALAR,Dynamic,Dynamic> > qr(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
|
||||
}
|
14
failtest/colpivqr_int.cpp
Normal file
14
failtest/colpivqr_int.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include "../Eigen/QR"
|
||||
|
||||
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
|
||||
#define SCALAR int
|
||||
#else
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
int main()
|
||||
{
|
||||
ColPivHouseholderQR<Matrix<SCALAR,Dynamic,Dynamic> > qr(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
|
||||
}
|
14
failtest/eigensolver_cplx.cpp
Normal file
14
failtest/eigensolver_cplx.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include "../Eigen/Eigenvalues"
|
||||
|
||||
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
|
||||
#define SCALAR std::complex<double>
|
||||
#else
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
int main()
|
||||
{
|
||||
EigenSolver<Matrix<SCALAR,Dynamic,Dynamic> > eig(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
|
||||
}
|
14
failtest/eigensolver_int.cpp
Normal file
14
failtest/eigensolver_int.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include "../Eigen/Eigenvalues"
|
||||
|
||||
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
|
||||
#define SCALAR int
|
||||
#else
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
int main()
|
||||
{
|
||||
EigenSolver<Matrix<SCALAR,Dynamic,Dynamic> > eig(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
|
||||
}
|
14
failtest/fullpivlu_int.cpp
Normal file
14
failtest/fullpivlu_int.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include "../Eigen/LU"
|
||||
|
||||
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
|
||||
#define SCALAR int
|
||||
#else
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
int main()
|
||||
{
|
||||
FullPivLU<Matrix<SCALAR,Dynamic,Dynamic> > lu(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
|
||||
}
|
14
failtest/fullpivqr_int.cpp
Normal file
14
failtest/fullpivqr_int.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include "../Eigen/QR"
|
||||
|
||||
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
|
||||
#define SCALAR int
|
||||
#else
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
int main()
|
||||
{
|
||||
FullPivHouseholderQR<Matrix<SCALAR,Dynamic,Dynamic> > qr(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
|
||||
}
|
14
failtest/jacobisvd_int.cpp
Normal file
14
failtest/jacobisvd_int.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include "../Eigen/SVD"
|
||||
|
||||
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
|
||||
#define SCALAR int
|
||||
#else
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
int main()
|
||||
{
|
||||
JacobiSVD<Matrix<SCALAR,Dynamic,Dynamic> > qr(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
|
||||
}
|
14
failtest/ldlt_int.cpp
Normal file
14
failtest/ldlt_int.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include "../Eigen/Cholesky"
|
||||
|
||||
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
|
||||
#define SCALAR int
|
||||
#else
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
int main()
|
||||
{
|
||||
LDLT<Matrix<SCALAR,Dynamic,Dynamic> > ldlt(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
|
||||
}
|
14
failtest/llt_int.cpp
Normal file
14
failtest/llt_int.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include "../Eigen/Cholesky"
|
||||
|
||||
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
|
||||
#define SCALAR int
|
||||
#else
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
int main()
|
||||
{
|
||||
LLT<Matrix<SCALAR,Dynamic,Dynamic> > llt(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
|
||||
}
|
14
failtest/partialpivlu_int.cpp
Normal file
14
failtest/partialpivlu_int.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include "../Eigen/LU"
|
||||
|
||||
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
|
||||
#define SCALAR int
|
||||
#else
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
int main()
|
||||
{
|
||||
PartialPivLU<Matrix<SCALAR,Dynamic,Dynamic> > lu(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
|
||||
}
|
14
failtest/qr_int.cpp
Normal file
14
failtest/qr_int.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include "../Eigen/QR"
|
||||
|
||||
#ifdef EIGEN_SHOULD_FAIL_TO_BUILD
|
||||
#define SCALAR int
|
||||
#else
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
int main()
|
||||
{
|
||||
HouseholderQR<Matrix<SCALAR,Dynamic,Dynamic> > qr(Matrix<SCALAR,Dynamic,Dynamic>::Random(10,10));
|
||||
}
|
@ -139,6 +139,7 @@ endif(TEST_LIB)
|
||||
set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official")
|
||||
add_custom_target(BuildOfficial)
|
||||
|
||||
ei_add_test(rand)
|
||||
ei_add_test(meta)
|
||||
ei_add_test(sizeof)
|
||||
ei_add_test(dynalloc)
|
||||
@ -226,6 +227,7 @@ ei_add_test(stdvector_overload)
|
||||
ei_add_test(stdlist)
|
||||
ei_add_test(stddeque)
|
||||
ei_add_test(sparse_basic)
|
||||
ei_add_test(sparse_block)
|
||||
ei_add_test(sparse_vector)
|
||||
ei_add_test(sparse_product)
|
||||
ei_add_test(sparse_ref)
|
||||
@ -330,3 +332,8 @@ endif(EIGEN_TEST_NVCC)
|
||||
|
||||
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/failtests)
|
||||
add_test(NAME failtests WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/failtests COMMAND ${CMAKE_COMMAND} ${Eigen_SOURCE_DIR} -G "${CMAKE_GENERATOR}" -DEIGEN_FAILTEST=ON)
|
||||
|
||||
option(EIGEN_TEST_BUILD_DOCUMENTATION "Test building the doxygen documentation" OFF)
|
||||
IF(EIGEN_TEST_BUILD_DOCUMENTATION)
|
||||
add_dependencies(buildtests doc)
|
||||
ENDIF()
|
||||
|
@ -24,7 +24,7 @@ template<typename MatrixType> void reverse(const MatrixType& m)
|
||||
|
||||
// this test relies a lot on Random.h, and there's not much more that we can do
|
||||
// to test it, hence I consider that we will have tested Random.h
|
||||
MatrixType m1 = MatrixType::Random(rows, cols);
|
||||
MatrixType m1 = MatrixType::Random(rows, cols), m2;
|
||||
VectorType v1 = VectorType::Random(rows);
|
||||
|
||||
MatrixType m1_r = m1.reverse();
|
||||
@ -96,6 +96,26 @@ template<typename MatrixType> void reverse(const MatrixType& m)
|
||||
|
||||
m1.reverse()(r, c) = x;
|
||||
VERIFY_IS_APPROX(x, m1(rows - 1 - r, cols - 1 - c));
|
||||
|
||||
m2 = m1;
|
||||
m2.reverseInPlace();
|
||||
VERIFY_IS_APPROX(m2,m1.reverse().eval());
|
||||
|
||||
m2 = m1;
|
||||
m2.col(0).reverseInPlace();
|
||||
VERIFY_IS_APPROX(m2.col(0),m1.col(0).reverse().eval());
|
||||
|
||||
m2 = m1;
|
||||
m2.row(0).reverseInPlace();
|
||||
VERIFY_IS_APPROX(m2.row(0),m1.row(0).reverse().eval());
|
||||
|
||||
m2 = m1;
|
||||
m2.rowwise().reverseInPlace();
|
||||
VERIFY_IS_APPROX(m2,m1.rowwise().reverse().eval());
|
||||
|
||||
m2 = m1;
|
||||
m2.colwise().reverseInPlace();
|
||||
VERIFY_IS_APPROX(m2,m1.colwise().reverse().eval());
|
||||
|
||||
/*
|
||||
m1.colwise().reverse()(r, c) = x;
|
||||
@ -113,11 +133,11 @@ void test_array_reverse()
|
||||
CALL_SUBTEST_2( reverse(Matrix2f()) );
|
||||
CALL_SUBTEST_3( reverse(Matrix4f()) );
|
||||
CALL_SUBTEST_4( reverse(Matrix4d()) );
|
||||
CALL_SUBTEST_5( reverse(MatrixXcf(3, 3)) );
|
||||
CALL_SUBTEST_6( reverse(MatrixXi(6, 3)) );
|
||||
CALL_SUBTEST_7( reverse(MatrixXcd(20, 20)) );
|
||||
CALL_SUBTEST_5( reverse(MatrixXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
CALL_SUBTEST_6( reverse(MatrixXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
CALL_SUBTEST_7( reverse(MatrixXcd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
CALL_SUBTEST_8( reverse(Matrix<float, 100, 100>()) );
|
||||
CALL_SUBTEST_9( reverse(Matrix<float,Dynamic,Dynamic,RowMajor>(6,3)) );
|
||||
CALL_SUBTEST_9( reverse(Matrix<float,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
}
|
||||
#ifdef EIGEN_TEST_PART_3
|
||||
Vector4f x; x << 1, 2, 3, 4;
|
||||
|
@ -17,6 +17,7 @@ template<typename MatrixType> void diagonalmatrices(const MatrixType& m)
|
||||
typedef Matrix<Scalar, Rows, 1> VectorType;
|
||||
typedef Matrix<Scalar, 1, Cols> RowVectorType;
|
||||
typedef Matrix<Scalar, Rows, Rows> SquareMatrixType;
|
||||
typedef Matrix<Scalar, Dynamic, Dynamic> DynMatrixType;
|
||||
typedef DiagonalMatrix<Scalar, Rows> LeftDiagonalMatrix;
|
||||
typedef DiagonalMatrix<Scalar, Cols> RightDiagonalMatrix;
|
||||
typedef Matrix<Scalar, Rows==Dynamic?Dynamic:2*Rows, Cols==Dynamic?Dynamic:2*Cols> BigMatrix;
|
||||
@ -64,6 +65,13 @@ template<typename MatrixType> void diagonalmatrices(const MatrixType& m)
|
||||
VERIFY_IS_APPROX( (((v1+v2).asDiagonal() * (m1+m2))(i,j)) , (v1+v2)(i) * (m1+m2)(i,j) );
|
||||
VERIFY_IS_APPROX( ((m1 * (rv1+rv2).asDiagonal())(i,j)) , (rv1+rv2)(j) * m1(i,j) );
|
||||
VERIFY_IS_APPROX( (((m1+m2) * (rv1+rv2).asDiagonal())(i,j)) , (rv1+rv2)(j) * (m1+m2)(i,j) );
|
||||
|
||||
if(rows>1)
|
||||
{
|
||||
DynMatrixType tmp = m1.topRows(rows/2), res;
|
||||
VERIFY_IS_APPROX( (res = m1.topRows(rows/2) * rv1.asDiagonal()), tmp * rv1.asDiagonal() );
|
||||
VERIFY_IS_APPROX( (res = v1.head(rows/2).asDiagonal()*m1.topRows(rows/2)), v1.head(rows/2).asDiagonal()*tmp );
|
||||
}
|
||||
|
||||
BigMatrix big;
|
||||
big.setZero(2*rows, 2*cols);
|
||||
@ -93,6 +101,17 @@ template<typename MatrixType> void diagonalmatrices(const MatrixType& m)
|
||||
VERIFY_IS_APPROX( (sq_m1 = (s1*v1).asDiagonal()), (s1*v1).asDiagonal().toDenseMatrix() );
|
||||
}
|
||||
|
||||
template<int>
|
||||
void bug987()
|
||||
{
|
||||
Matrix3Xd points = Matrix3Xd::Random(3, 3);
|
||||
Vector2d diag = Vector2d::Random();
|
||||
Matrix2Xd tmp1 = points.topRows<2>(), res1, res2;
|
||||
VERIFY_IS_APPROX( res1 = diag.asDiagonal() * points.topRows<2>(), res2 = diag.asDiagonal() * tmp1 );
|
||||
Matrix2d tmp2 = points.topLeftCorner<2,2>();
|
||||
VERIFY_IS_APPROX(( res1 = points.topLeftCorner<2,2>()*diag.asDiagonal()) , res2 = tmp2*diag.asDiagonal() );
|
||||
}
|
||||
|
||||
void test_diagonalmatrices()
|
||||
{
|
||||
for(int i = 0; i < g_repeat; i++) {
|
||||
@ -106,4 +125,5 @@ void test_diagonalmatrices()
|
||||
CALL_SUBTEST_8( diagonalmatrices(Matrix<double,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
CALL_SUBTEST_9( diagonalmatrices(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
}
|
||||
CALL_SUBTEST_10( bug987<0>() );
|
||||
}
|
||||
|
@ -95,6 +95,9 @@
|
||||
namespace Eigen
|
||||
{
|
||||
static std::vector<std::string> g_test_stack;
|
||||
// level == 0 <=> abort if test fail
|
||||
// level >= 1 <=> warning message to std::cerr if test fail
|
||||
static int g_test_level = 0;
|
||||
static int g_repeat;
|
||||
static unsigned int g_seed;
|
||||
static bool g_has_set_repeat, g_has_set_seed;
|
||||
@ -229,6 +232,8 @@ inline void verify_impl(bool condition, const char *testname, const char *file,
|
||||
{
|
||||
if (!condition)
|
||||
{
|
||||
if(Eigen::g_test_level>0)
|
||||
std::cerr << "WARNING: ";
|
||||
std::cerr << "Test " << testname << " failed in " << file << " (" << line << ")"
|
||||
<< std::endl << " " << condition_as_string << std::endl;
|
||||
std::cerr << "Stack:\n";
|
||||
@ -236,7 +241,8 @@ inline void verify_impl(bool condition, const char *testname, const char *file,
|
||||
for(int i=test_stack_size-1; i>=0; --i)
|
||||
std::cerr << " - " << Eigen::g_test_stack[i] << "\n";
|
||||
std::cerr << "\n";
|
||||
abort();
|
||||
if(Eigen::g_test_level==0)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -113,6 +113,9 @@ void mat_mat_scalar_scalar_product()
|
||||
template <typename MatrixType>
|
||||
void zero_sized_objects(const MatrixType& m)
|
||||
{
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
const int PacketSize = internal::packet_traits<Scalar>::size;
|
||||
const int PacketSize1 = PacketSize>1 ? PacketSize-1 : 1;
|
||||
Index rows = m.rows();
|
||||
Index cols = m.cols();
|
||||
|
||||
@ -132,9 +135,41 @@ void zero_sized_objects(const MatrixType& m)
|
||||
res = b*a;
|
||||
VERIFY(res.rows()==0 && res.cols()==cols);
|
||||
}
|
||||
|
||||
{
|
||||
Matrix<Scalar,PacketSize,0> a;
|
||||
Matrix<Scalar,0,1> b;
|
||||
Matrix<Scalar,PacketSize,1> res;
|
||||
VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(PacketSize,1) );
|
||||
VERIFY_IS_APPROX( (res=a.lazyProduct(b)), MatrixType::Zero(PacketSize,1) );
|
||||
}
|
||||
|
||||
{
|
||||
Matrix<Scalar,PacketSize1,0> a;
|
||||
Matrix<Scalar,0,1> b;
|
||||
Matrix<Scalar,PacketSize1,1> res;
|
||||
VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(PacketSize1,1) );
|
||||
VERIFY_IS_APPROX( (res=a.lazyProduct(b)), MatrixType::Zero(PacketSize1,1) );
|
||||
}
|
||||
|
||||
{
|
||||
Matrix<Scalar,PacketSize,Dynamic> a(PacketSize,0);
|
||||
Matrix<Scalar,Dynamic,1> b(0,1);
|
||||
Matrix<Scalar,PacketSize,1> res;
|
||||
VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(PacketSize,1) );
|
||||
VERIFY_IS_APPROX( (res=a.lazyProduct(b)), MatrixType::Zero(PacketSize,1) );
|
||||
}
|
||||
|
||||
{
|
||||
Matrix<Scalar,PacketSize1,Dynamic> a(PacketSize1,0);
|
||||
Matrix<Scalar,Dynamic,1> b(0,1);
|
||||
Matrix<Scalar,PacketSize1,1> res;
|
||||
VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(PacketSize1,1) );
|
||||
VERIFY_IS_APPROX( (res=a.lazyProduct(b)), MatrixType::Zero(PacketSize1,1) );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<int>
|
||||
void bug_127()
|
||||
{
|
||||
// Bug 127
|
||||
@ -159,6 +194,7 @@ void bug_127()
|
||||
a*b;
|
||||
}
|
||||
|
||||
template<int>
|
||||
void unaligned_objects()
|
||||
{
|
||||
// Regression test for the bug reported here:
|
||||
@ -188,6 +224,29 @@ void unaligned_objects()
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DONT_INLINE
|
||||
Index test_compute_block_size(Index m, Index n, Index k)
|
||||
{
|
||||
Index mc(m), nc(n), kc(k);
|
||||
internal::computeProductBlockingSizes<T,T>(kc, mc, nc);
|
||||
return kc+mc+nc;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Index compute_block_size()
|
||||
{
|
||||
Index ret = 0;
|
||||
ret += test_compute_block_size<T>(0,1,1);
|
||||
ret += test_compute_block_size<T>(1,0,1);
|
||||
ret += test_compute_block_size<T>(1,1,0);
|
||||
ret += test_compute_block_size<T>(0,0,1);
|
||||
ret += test_compute_block_size<T>(0,1,0);
|
||||
ret += test_compute_block_size<T>(1,0,0);
|
||||
ret += test_compute_block_size<T>(0,0,0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void test_product_extra()
|
||||
{
|
||||
for(int i = 0; i < g_repeat; i++) {
|
||||
@ -198,6 +257,9 @@ void test_product_extra()
|
||||
CALL_SUBTEST_4( product_extra(MatrixXcd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
|
||||
CALL_SUBTEST_1( zero_sized_objects(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
}
|
||||
CALL_SUBTEST_5( bug_127() );
|
||||
CALL_SUBTEST_6( unaligned_objects() );
|
||||
CALL_SUBTEST_5( bug_127<0>() );
|
||||
CALL_SUBTEST_6( unaligned_objects<0>() );
|
||||
CALL_SUBTEST_7( compute_block_size<float>() );
|
||||
CALL_SUBTEST_7( compute_block_size<double>() );
|
||||
CALL_SUBTEST_7( compute_block_size<std::complex<double> >() );
|
||||
}
|
||||
|
88
test/rand.cpp
Normal file
88
test/rand.cpp
Normal file
@ -0,0 +1,88 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include "main.h"
|
||||
|
||||
template<typename Scalar> Scalar check_in_range(Scalar x, Scalar y)
|
||||
{
|
||||
Scalar r = internal::random<Scalar>(x,y);
|
||||
VERIFY(r>=x);
|
||||
if(y>=x)
|
||||
{
|
||||
VERIFY(r<=y);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
template<typename Scalar> void check_all_in_range(Scalar x, Scalar y)
|
||||
{
|
||||
Array<int,1,Dynamic> mask(y-x+1);
|
||||
mask.fill(0);
|
||||
long n = (y-x+1)*32;
|
||||
for(long k=0; k<n; ++k)
|
||||
{
|
||||
mask( check_in_range(x,y)-x )++;
|
||||
}
|
||||
VERIFY( (mask>0).all() );
|
||||
}
|
||||
|
||||
void test_rand()
|
||||
{
|
||||
long long_ref = NumTraits<long>::highest()/10;
|
||||
char char_offset = (std::min)(g_repeat,64);
|
||||
char short_offset = (std::min)(g_repeat,16000);
|
||||
|
||||
for(int i = 0; i < g_repeat*10; i++) {
|
||||
CALL_SUBTEST(check_in_range<float>(10,11));
|
||||
CALL_SUBTEST(check_in_range<float>(1.24234523,1.24234523));
|
||||
CALL_SUBTEST(check_in_range<float>(-1,1));
|
||||
CALL_SUBTEST(check_in_range<float>(-1432.2352,-1432.2352));
|
||||
|
||||
CALL_SUBTEST(check_in_range<double>(10,11));
|
||||
CALL_SUBTEST(check_in_range<double>(1.24234523,1.24234523));
|
||||
CALL_SUBTEST(check_in_range<double>(-1,1));
|
||||
CALL_SUBTEST(check_in_range<double>(-1432.2352,-1432.2352));
|
||||
|
||||
CALL_SUBTEST(check_in_range<int>(0,-1));
|
||||
CALL_SUBTEST(check_in_range<short>(0,-1));
|
||||
CALL_SUBTEST(check_in_range<long>(0,-1));
|
||||
CALL_SUBTEST(check_in_range<int>(-673456,673456));
|
||||
CALL_SUBTEST(check_in_range<short>(-24345,24345));
|
||||
CALL_SUBTEST(check_in_range<long>(-long_ref,long_ref));
|
||||
}
|
||||
|
||||
CALL_SUBTEST(check_all_in_range<char>(11,11));
|
||||
CALL_SUBTEST(check_all_in_range<char>(11,11+char_offset));
|
||||
CALL_SUBTEST(check_all_in_range<char>(-5,5));
|
||||
CALL_SUBTEST(check_all_in_range<char>(-11-char_offset,-11));
|
||||
CALL_SUBTEST(check_all_in_range<char>(-126,-126+char_offset));
|
||||
CALL_SUBTEST(check_all_in_range<char>(126-char_offset,126));
|
||||
CALL_SUBTEST(check_all_in_range<char>(-126,126));
|
||||
|
||||
CALL_SUBTEST(check_all_in_range<short>(11,11));
|
||||
CALL_SUBTEST(check_all_in_range<short>(11,11+short_offset));
|
||||
CALL_SUBTEST(check_all_in_range<short>(-5,5));
|
||||
CALL_SUBTEST(check_all_in_range<short>(-11-short_offset,-11));
|
||||
CALL_SUBTEST(check_all_in_range<short>(-24345,-24345+short_offset));
|
||||
CALL_SUBTEST(check_all_in_range<short>(24345,24345+short_offset));
|
||||
|
||||
CALL_SUBTEST(check_all_in_range<int>(11,11));
|
||||
CALL_SUBTEST(check_all_in_range<int>(11,11+g_repeat));
|
||||
CALL_SUBTEST(check_all_in_range<int>(-5,5));
|
||||
CALL_SUBTEST(check_all_in_range<int>(-11-g_repeat,-11));
|
||||
CALL_SUBTEST(check_all_in_range<int>(-673456,-673456+g_repeat));
|
||||
CALL_SUBTEST(check_all_in_range<int>(673456,673456+g_repeat));
|
||||
|
||||
CALL_SUBTEST(check_all_in_range<long>(11,11));
|
||||
CALL_SUBTEST(check_all_in_range<long>(11,11+g_repeat));
|
||||
CALL_SUBTEST(check_all_in_range<long>(-5,5));
|
||||
CALL_SUBTEST(check_all_in_range<long>(-11-g_repeat,-11));
|
||||
CALL_SUBTEST(check_all_in_range<long>(-long_ref,-long_ref+g_repeat));
|
||||
CALL_SUBTEST(check_all_in_range<long>( long_ref, long_ref+g_repeat));
|
||||
}
|
@ -25,6 +25,22 @@ template<typename MatrixType> void real_qz(const MatrixType& m)
|
||||
MatrixType A = MatrixType::Random(dim,dim),
|
||||
B = MatrixType::Random(dim,dim);
|
||||
|
||||
|
||||
// Regression test for bug 985: Randomly set rows or columns to zero
|
||||
Index k=internal::random<Index>(0, dim-1);
|
||||
switch(internal::random<int>(0,10)) {
|
||||
case 0:
|
||||
A.row(k).setZero(); break;
|
||||
case 1:
|
||||
A.col(k).setZero(); break;
|
||||
case 2:
|
||||
B.row(k).setZero(); break;
|
||||
case 3:
|
||||
B.col(k).setZero(); break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
RealQZ<MatrixType> qz(A,B);
|
||||
|
||||
VERIFY_IS_EQUAL(qz.info(), Success);
|
||||
|
@ -58,48 +58,6 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
|
||||
|
||||
VERIFY_IS_APPROX(m, refMat);
|
||||
|
||||
// test InnerIterators and Block expressions
|
||||
for (Index t=0; t<10; ++t)
|
||||
{
|
||||
Index j = internal::random<Index>(0,cols-1);
|
||||
Index i = internal::random<Index>(0,rows-1);
|
||||
Index w = internal::random<Index>(1,cols-j-1);
|
||||
Index h = internal::random<Index>(1,rows-i-1);
|
||||
|
||||
VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w));
|
||||
for(Index c=0; c<w; c++)
|
||||
{
|
||||
VERIFY_IS_APPROX(m.block(i,j,h,w).col(c), refMat.block(i,j,h,w).col(c));
|
||||
for(Index r=0; r<h; r++)
|
||||
{
|
||||
// FIXME col().coeff() not implemented yet
|
||||
// VERIFY_IS_APPROX(m.block(i,j,h,w).col(c).coeff(r), refMat.block(i,j,h,w).col(c).coeff(r));
|
||||
}
|
||||
}
|
||||
for(Index r=0; r<h; r++)
|
||||
{
|
||||
VERIFY_IS_APPROX(m.block(i,j,h,w).row(r), refMat.block(i,j,h,w).row(r));
|
||||
for(Index c=0; c<w; c++)
|
||||
{
|
||||
// FIXME row().coeff() not implemented yet
|
||||
// VERIFY_IS_APPROX(m.block(i,j,h,w).row(r).coeff(c), refMat.block(i,j,h,w).row(r).coeff(c));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(Index c=0; c<cols; c++)
|
||||
{
|
||||
VERIFY_IS_APPROX(m.col(c) + m.col(c), (m + m).col(c));
|
||||
VERIFY_IS_APPROX(m.col(c) + m.col(c), refMat.col(c) + refMat.col(c));
|
||||
}
|
||||
|
||||
for(Index r=0; r<rows; r++)
|
||||
{
|
||||
VERIFY_IS_APPROX(m.row(r) + m.row(r), (m + m).row(r));
|
||||
VERIFY_IS_APPROX(m.row(r) + m.row(r), refMat.row(r) + refMat.row(r));
|
||||
}
|
||||
|
||||
|
||||
// test assertion
|
||||
VERIFY_RAISES_ASSERT( m.coeffRef(-1,1) = 0 );
|
||||
VERIFY_RAISES_ASSERT( m.coeffRef(0,m.cols()) = 0 );
|
||||
@ -184,82 +142,6 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
|
||||
VERIFY_IS_APPROX(m2,m1);
|
||||
}
|
||||
|
||||
// test innerVector()
|
||||
{
|
||||
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
|
||||
SparseMatrixType m2(rows, cols);
|
||||
initSparse<Scalar>(density, refMat2, m2);
|
||||
Index j0 = internal::random<Index>(0,outer-1);
|
||||
Index j1 = internal::random<Index>(0,outer-1);
|
||||
if(SparseMatrixType::IsRowMajor)
|
||||
VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.row(j0));
|
||||
else
|
||||
VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.col(j0));
|
||||
|
||||
if(SparseMatrixType::IsRowMajor)
|
||||
VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.row(j0)+refMat2.row(j1));
|
||||
else
|
||||
VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.col(j0)+refMat2.col(j1));
|
||||
|
||||
SparseMatrixType m3(rows,cols);
|
||||
m3.reserve(VectorXi::Constant(outer,int(inner/2)));
|
||||
for(Index j=0; j<outer; ++j)
|
||||
for(Index k=0; k<(std::min)(j,inner); ++k)
|
||||
m3.insertByOuterInner(j,k) = k+1;
|
||||
for(Index j=0; j<(std::min)(outer, inner); ++j)
|
||||
{
|
||||
VERIFY(j==numext::real(m3.innerVector(j).nonZeros()));
|
||||
if(j>0)
|
||||
VERIFY(j==numext::real(m3.innerVector(j).lastCoeff()));
|
||||
}
|
||||
m3.makeCompressed();
|
||||
for(Index j=0; j<(std::min)(outer, inner); ++j)
|
||||
{
|
||||
VERIFY(j==numext::real(m3.innerVector(j).nonZeros()));
|
||||
if(j>0)
|
||||
VERIFY(j==numext::real(m3.innerVector(j).lastCoeff()));
|
||||
}
|
||||
|
||||
VERIFY(m3.innerVector(j0).nonZeros() == m3.transpose().innerVector(j0).nonZeros());
|
||||
|
||||
// m2.innerVector(j0) = 2*m2.innerVector(j1);
|
||||
// refMat2.col(j0) = 2*refMat2.col(j1);
|
||||
// VERIFY_IS_APPROX(m2, refMat2);
|
||||
}
|
||||
|
||||
// test innerVectors()
|
||||
{
|
||||
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
|
||||
SparseMatrixType m2(rows, cols);
|
||||
initSparse<Scalar>(density, refMat2, m2);
|
||||
if(internal::random<float>(0,1)>0.5) m2.makeCompressed();
|
||||
Index j0 = internal::random<Index>(0,outer-2);
|
||||
Index j1 = internal::random<Index>(0,outer-2);
|
||||
Index n0 = internal::random<Index>(1,outer-(std::max)(j0,j1));
|
||||
if(SparseMatrixType::IsRowMajor)
|
||||
VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(j0,0,n0,cols));
|
||||
else
|
||||
VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(0,j0,rows,n0));
|
||||
if(SparseMatrixType::IsRowMajor)
|
||||
VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0),
|
||||
refMat2.middleRows(j0,n0)+refMat2.middleRows(j1,n0));
|
||||
else
|
||||
VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0),
|
||||
refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0));
|
||||
|
||||
VERIFY_IS_APPROX(m2, refMat2);
|
||||
|
||||
VERIFY(m2.innerVectors(j0,n0).nonZeros() == m2.transpose().innerVectors(j0,n0).nonZeros());
|
||||
|
||||
m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0);
|
||||
if(SparseMatrixType::IsRowMajor)
|
||||
refMat2.middleRows(j0,n0) = (refMat2.middleRows(j0,n0) + refMat2.middleRows(j1,n0)).eval();
|
||||
else
|
||||
refMat2.middleCols(j0,n0) = (refMat2.middleCols(j0,n0) + refMat2.middleCols(j1,n0)).eval();
|
||||
|
||||
VERIFY_IS_APPROX(m2, refMat2);
|
||||
}
|
||||
|
||||
// test basic computations
|
||||
{
|
||||
DenseMatrix refM1 = DenseMatrix::Zero(rows, cols);
|
||||
@ -330,40 +212,6 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
|
||||
VERIFY(m2.isApprox(m3));
|
||||
}
|
||||
|
||||
|
||||
|
||||
// test generic blocks
|
||||
{
|
||||
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
|
||||
SparseMatrixType m2(rows, cols);
|
||||
initSparse<Scalar>(density, refMat2, m2);
|
||||
Index j0 = internal::random<Index>(0,outer-2);
|
||||
Index j1 = internal::random<Index>(0,outer-2);
|
||||
Index n0 = internal::random<Index>(1,outer-(std::max)(j0,j1));
|
||||
if(SparseMatrixType::IsRowMajor)
|
||||
VERIFY_IS_APPROX(m2.block(j0,0,n0,cols), refMat2.block(j0,0,n0,cols));
|
||||
else
|
||||
VERIFY_IS_APPROX(m2.block(0,j0,rows,n0), refMat2.block(0,j0,rows,n0));
|
||||
|
||||
if(SparseMatrixType::IsRowMajor)
|
||||
VERIFY_IS_APPROX(m2.block(j0,0,n0,cols)+m2.block(j1,0,n0,cols),
|
||||
refMat2.block(j0,0,n0,cols)+refMat2.block(j1,0,n0,cols));
|
||||
else
|
||||
VERIFY_IS_APPROX(m2.block(0,j0,rows,n0)+m2.block(0,j1,rows,n0),
|
||||
refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0));
|
||||
|
||||
Index i = internal::random<Index>(0,m2.outerSize()-1);
|
||||
if(SparseMatrixType::IsRowMajor) {
|
||||
m2.innerVector(i) = m2.innerVector(i) * s1;
|
||||
refMat2.row(i) = refMat2.row(i) * s1;
|
||||
VERIFY_IS_APPROX(m2,refMat2);
|
||||
} else {
|
||||
m2.innerVector(i) = m2.innerVector(i) * s1;
|
||||
refMat2.col(i) = refMat2.col(i) * s1;
|
||||
VERIFY_IS_APPROX(m2,refMat2);
|
||||
}
|
||||
}
|
||||
|
||||
// test prune
|
||||
{
|
||||
SparseMatrixType m2(rows, cols);
|
||||
@ -602,8 +450,8 @@ void test_sparse_basic()
|
||||
CALL_SUBTEST_2(( sparse_basic(SparseMatrix<std::complex<double>, ColMajor>(r, c)) ));
|
||||
CALL_SUBTEST_2(( sparse_basic(SparseMatrix<std::complex<double>, RowMajor>(r, c)) ));
|
||||
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double>(r, c)) ));
|
||||
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,ColMajor,long int>(r, c)) ));
|
||||
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,RowMajor,long int>(r, c)) ));
|
||||
CALL_SUBTEST_5(( sparse_basic(SparseMatrix<double,ColMajor,long int>(r, c)) ));
|
||||
CALL_SUBTEST_5(( sparse_basic(SparseMatrix<double,RowMajor,long int>(r, c)) ));
|
||||
|
||||
r = Eigen::internal::random<int>(1,100);
|
||||
c = Eigen::internal::random<int>(1,100);
|
||||
@ -611,8 +459,8 @@ void test_sparse_basic()
|
||||
r = c; // check square matrices in 25% of tries
|
||||
}
|
||||
|
||||
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,ColMajor,short int>(short(r), short(c))) ));
|
||||
CALL_SUBTEST_1(( sparse_basic(SparseMatrix<double,RowMajor,short int>(short(r), short(c))) ));
|
||||
CALL_SUBTEST_6(( sparse_basic(SparseMatrix<double,ColMajor,short int>(short(r), short(c))) ));
|
||||
CALL_SUBTEST_6(( sparse_basic(SparseMatrix<double,RowMajor,short int>(short(r), short(c))) ));
|
||||
}
|
||||
|
||||
// Regression test for bug 900: (manually insert higher values here, if you have enough RAM):
|
||||
|
254
test/sparse_block.cpp
Normal file
254
test/sparse_block.cpp
Normal file
@ -0,0 +1,254 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include "sparse.h"
|
||||
|
||||
template<typename SparseMatrixType> void sparse_block(const SparseMatrixType& ref)
|
||||
{
|
||||
const Index rows = ref.rows();
|
||||
const Index cols = ref.cols();
|
||||
const Index inner = ref.innerSize();
|
||||
const Index outer = ref.outerSize();
|
||||
|
||||
typedef typename SparseMatrixType::Scalar Scalar;
|
||||
|
||||
double density = (std::max)(8./(rows*cols), 0.01);
|
||||
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
|
||||
typedef Matrix<Scalar,Dynamic,1> DenseVector;
|
||||
typedef Matrix<Scalar,1,Dynamic> RowDenseVector;
|
||||
|
||||
Scalar s1 = internal::random<Scalar>();
|
||||
{
|
||||
SparseMatrixType m(rows, cols);
|
||||
DenseMatrix refMat = DenseMatrix::Zero(rows, cols);
|
||||
initSparse<Scalar>(density, refMat, m);
|
||||
|
||||
VERIFY_IS_APPROX(m, refMat);
|
||||
|
||||
// test InnerIterators and Block expressions
|
||||
for (int t=0; t<10; ++t)
|
||||
{
|
||||
Index j = internal::random<Index>(0,cols-2);
|
||||
Index i = internal::random<Index>(0,rows-2);
|
||||
Index w = internal::random<Index>(1,cols-j);
|
||||
Index h = internal::random<Index>(1,rows-i);
|
||||
|
||||
VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w));
|
||||
for(Index c=0; c<w; c++)
|
||||
{
|
||||
VERIFY_IS_APPROX(m.block(i,j,h,w).col(c), refMat.block(i,j,h,w).col(c));
|
||||
for(Index r=0; r<h; r++)
|
||||
{
|
||||
VERIFY_IS_APPROX(m.block(i,j,h,w).col(c).coeff(r), refMat.block(i,j,h,w).col(c).coeff(r));
|
||||
VERIFY_IS_APPROX(m.block(i,j,h,w).coeff(r,c), refMat.block(i,j,h,w).coeff(r,c));
|
||||
}
|
||||
}
|
||||
for(Index r=0; r<h; r++)
|
||||
{
|
||||
VERIFY_IS_APPROX(m.block(i,j,h,w).row(r), refMat.block(i,j,h,w).row(r));
|
||||
for(Index c=0; c<w; c++)
|
||||
{
|
||||
VERIFY_IS_APPROX(m.block(i,j,h,w).row(r).coeff(c), refMat.block(i,j,h,w).row(r).coeff(c));
|
||||
VERIFY_IS_APPROX(m.block(i,j,h,w).coeff(r,c), refMat.block(i,j,h,w).coeff(r,c));
|
||||
}
|
||||
}
|
||||
|
||||
VERIFY_IS_APPROX(m.middleCols(j,w), refMat.middleCols(j,w));
|
||||
VERIFY_IS_APPROX(m.middleRows(i,h), refMat.middleRows(i,h));
|
||||
for(Index r=0; r<h; r++)
|
||||
{
|
||||
VERIFY_IS_APPROX(m.middleCols(j,w).row(r), refMat.middleCols(j,w).row(r));
|
||||
VERIFY_IS_APPROX(m.middleRows(i,h).row(r), refMat.middleRows(i,h).row(r));
|
||||
for(Index c=0; c<w; c++)
|
||||
{
|
||||
VERIFY_IS_APPROX(m.col(c).coeff(r), refMat.col(c).coeff(r));
|
||||
VERIFY_IS_APPROX(m.row(r).coeff(c), refMat.row(r).coeff(c));
|
||||
|
||||
VERIFY_IS_APPROX(m.middleCols(j,w).coeff(r,c), refMat.middleCols(j,w).coeff(r,c));
|
||||
VERIFY_IS_APPROX(m.middleRows(i,h).coeff(r,c), refMat.middleRows(i,h).coeff(r,c));
|
||||
if(m.middleCols(j,w).coeff(r,c) != Scalar(0))
|
||||
{
|
||||
VERIFY_IS_APPROX(m.middleCols(j,w).coeffRef(r,c), refMat.middleCols(j,w).coeff(r,c));
|
||||
}
|
||||
if(m.middleRows(i,h).coeff(r,c) != Scalar(0))
|
||||
{
|
||||
VERIFY_IS_APPROX(m.middleRows(i,h).coeff(r,c), refMat.middleRows(i,h).coeff(r,c));
|
||||
}
|
||||
}
|
||||
}
|
||||
for(Index c=0; c<w; c++)
|
||||
{
|
||||
VERIFY_IS_APPROX(m.middleCols(j,w).col(c), refMat.middleCols(j,w).col(c));
|
||||
VERIFY_IS_APPROX(m.middleRows(i,h).col(c), refMat.middleRows(i,h).col(c));
|
||||
}
|
||||
}
|
||||
|
||||
for(Index c=0; c<cols; c++)
|
||||
{
|
||||
VERIFY_IS_APPROX(m.col(c) + m.col(c), (m + m).col(c));
|
||||
VERIFY_IS_APPROX(m.col(c) + m.col(c), refMat.col(c) + refMat.col(c));
|
||||
}
|
||||
|
||||
for(Index r=0; r<rows; r++)
|
||||
{
|
||||
VERIFY_IS_APPROX(m.row(r) + m.row(r), (m + m).row(r));
|
||||
VERIFY_IS_APPROX(m.row(r) + m.row(r), refMat.row(r) + refMat.row(r));
|
||||
}
|
||||
}
|
||||
|
||||
// test innerVector()
|
||||
{
|
||||
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
|
||||
SparseMatrixType m2(rows, cols);
|
||||
initSparse<Scalar>(density, refMat2, m2);
|
||||
Index j0 = internal::random<Index>(0,outer-1);
|
||||
Index j1 = internal::random<Index>(0,outer-1);
|
||||
if(SparseMatrixType::IsRowMajor)
|
||||
VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.row(j0));
|
||||
else
|
||||
VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.col(j0));
|
||||
|
||||
if(SparseMatrixType::IsRowMajor)
|
||||
VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.row(j0)+refMat2.row(j1));
|
||||
else
|
||||
VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.col(j0)+refMat2.col(j1));
|
||||
|
||||
SparseMatrixType m3(rows,cols);
|
||||
m3.reserve(VectorXi::Constant(outer,int(inner/2)));
|
||||
for(Index j=0; j<outer; ++j)
|
||||
for(Index k=0; k<(std::min)(j,inner); ++k)
|
||||
m3.insertByOuterInner(j,k) = k+1;
|
||||
for(Index j=0; j<(std::min)(outer, inner); ++j)
|
||||
{
|
||||
VERIFY(j==numext::real(m3.innerVector(j).nonZeros()));
|
||||
if(j>0)
|
||||
VERIFY(j==numext::real(m3.innerVector(j).lastCoeff()));
|
||||
}
|
||||
m3.makeCompressed();
|
||||
for(Index j=0; j<(std::min)(outer, inner); ++j)
|
||||
{
|
||||
VERIFY(j==numext::real(m3.innerVector(j).nonZeros()));
|
||||
if(j>0)
|
||||
VERIFY(j==numext::real(m3.innerVector(j).lastCoeff()));
|
||||
}
|
||||
|
||||
VERIFY(m3.innerVector(j0).nonZeros() == m3.transpose().innerVector(j0).nonZeros());
|
||||
|
||||
// m2.innerVector(j0) = 2*m2.innerVector(j1);
|
||||
// refMat2.col(j0) = 2*refMat2.col(j1);
|
||||
// VERIFY_IS_APPROX(m2, refMat2);
|
||||
}
|
||||
|
||||
// test innerVectors()
|
||||
{
|
||||
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
|
||||
SparseMatrixType m2(rows, cols);
|
||||
initSparse<Scalar>(density, refMat2, m2);
|
||||
if(internal::random<float>(0,1)>0.5) m2.makeCompressed();
|
||||
Index j0 = internal::random<Index>(0,outer-2);
|
||||
Index j1 = internal::random<Index>(0,outer-2);
|
||||
Index n0 = internal::random<Index>(1,outer-(std::max)(j0,j1));
|
||||
if(SparseMatrixType::IsRowMajor)
|
||||
VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(j0,0,n0,cols));
|
||||
else
|
||||
VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(0,j0,rows,n0));
|
||||
if(SparseMatrixType::IsRowMajor)
|
||||
VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0),
|
||||
refMat2.middleRows(j0,n0)+refMat2.middleRows(j1,n0));
|
||||
else
|
||||
VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0),
|
||||
refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0));
|
||||
|
||||
VERIFY_IS_APPROX(m2, refMat2);
|
||||
|
||||
VERIFY(m2.innerVectors(j0,n0).nonZeros() == m2.transpose().innerVectors(j0,n0).nonZeros());
|
||||
|
||||
m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0);
|
||||
if(SparseMatrixType::IsRowMajor)
|
||||
refMat2.middleRows(j0,n0) = (refMat2.middleRows(j0,n0) + refMat2.middleRows(j1,n0)).eval();
|
||||
else
|
||||
refMat2.middleCols(j0,n0) = (refMat2.middleCols(j0,n0) + refMat2.middleCols(j1,n0)).eval();
|
||||
|
||||
VERIFY_IS_APPROX(m2, refMat2);
|
||||
}
|
||||
|
||||
// test generic blocks
|
||||
{
|
||||
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
|
||||
SparseMatrixType m2(rows, cols);
|
||||
initSparse<Scalar>(density, refMat2, m2);
|
||||
Index j0 = internal::random<Index>(0,outer-2);
|
||||
Index j1 = internal::random<Index>(0,outer-2);
|
||||
Index n0 = internal::random<Index>(1,outer-(std::max)(j0,j1));
|
||||
if(SparseMatrixType::IsRowMajor)
|
||||
VERIFY_IS_APPROX(m2.block(j0,0,n0,cols), refMat2.block(j0,0,n0,cols));
|
||||
else
|
||||
VERIFY_IS_APPROX(m2.block(0,j0,rows,n0), refMat2.block(0,j0,rows,n0));
|
||||
|
||||
if(SparseMatrixType::IsRowMajor)
|
||||
VERIFY_IS_APPROX(m2.block(j0,0,n0,cols)+m2.block(j1,0,n0,cols),
|
||||
refMat2.block(j0,0,n0,cols)+refMat2.block(j1,0,n0,cols));
|
||||
else
|
||||
VERIFY_IS_APPROX(m2.block(0,j0,rows,n0)+m2.block(0,j1,rows,n0),
|
||||
refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0));
|
||||
|
||||
Index i = internal::random<Index>(0,m2.outerSize()-1);
|
||||
if(SparseMatrixType::IsRowMajor) {
|
||||
m2.innerVector(i) = m2.innerVector(i) * s1;
|
||||
refMat2.row(i) = refMat2.row(i) * s1;
|
||||
VERIFY_IS_APPROX(m2,refMat2);
|
||||
} else {
|
||||
m2.innerVector(i) = m2.innerVector(i) * s1;
|
||||
refMat2.col(i) = refMat2.col(i) * s1;
|
||||
VERIFY_IS_APPROX(m2,refMat2);
|
||||
}
|
||||
|
||||
Index r0 = internal::random<Index>(0,rows-2);
|
||||
Index c0 = internal::random<Index>(0,cols-2);
|
||||
Index r1 = internal::random<Index>(1,rows-r0);
|
||||
Index c1 = internal::random<Index>(1,cols-c0);
|
||||
|
||||
VERIFY_IS_APPROX(DenseVector(m2.col(c0)), refMat2.col(c0));
|
||||
VERIFY_IS_APPROX(m2.col(c0), refMat2.col(c0));
|
||||
|
||||
VERIFY_IS_APPROX(RowDenseVector(m2.row(r0)), refMat2.row(r0));
|
||||
VERIFY_IS_APPROX(m2.row(r0), refMat2.row(r0));
|
||||
|
||||
VERIFY_IS_APPROX(m2.block(r0,c0,r1,c1), refMat2.block(r0,c0,r1,c1));
|
||||
VERIFY_IS_APPROX((2*m2).block(r0,c0,r1,c1), (2*refMat2).block(r0,c0,r1,c1));
|
||||
}
|
||||
}
|
||||
|
||||
void test_sparse_block()
|
||||
{
|
||||
for(int i = 0; i < g_repeat; i++) {
|
||||
int r = Eigen::internal::random<int>(1,200), c = Eigen::internal::random<int>(1,200);
|
||||
if(Eigen::internal::random<int>(0,4) == 0) {
|
||||
r = c; // check square matrices in 25% of tries
|
||||
}
|
||||
EIGEN_UNUSED_VARIABLE(r+c);
|
||||
CALL_SUBTEST_1(( sparse_block(SparseMatrix<double>(1, 1)) ));
|
||||
CALL_SUBTEST_1(( sparse_block(SparseMatrix<double>(8, 8)) ));
|
||||
CALL_SUBTEST_1(( sparse_block(SparseMatrix<double>(r, c)) ));
|
||||
CALL_SUBTEST_2(( sparse_block(SparseMatrix<std::complex<double>, ColMajor>(r, c)) ));
|
||||
CALL_SUBTEST_2(( sparse_block(SparseMatrix<std::complex<double>, RowMajor>(r, c)) ));
|
||||
|
||||
CALL_SUBTEST_3(( sparse_block(SparseMatrix<double,ColMajor,long int>(r, c)) ));
|
||||
CALL_SUBTEST_3(( sparse_block(SparseMatrix<double,RowMajor,long int>(r, c)) ));
|
||||
|
||||
r = Eigen::internal::random<int>(1,100);
|
||||
c = Eigen::internal::random<int>(1,100);
|
||||
if(Eigen::internal::random<int>(0,4) == 0) {
|
||||
r = c; // check square matrices in 25% of tries
|
||||
}
|
||||
|
||||
CALL_SUBTEST_4(( sparse_block(SparseMatrix<double,ColMajor,short int>(short(r), short(c))) ));
|
||||
CALL_SUBTEST_4(( sparse_block(SparseMatrix<double,RowMajor,short int>(short(r), short(c))) ));
|
||||
}
|
||||
}
|
@ -67,6 +67,9 @@ template<typename SparseMatrixType> void sparse_product()
|
||||
VERIFY_IS_APPROX(m4 = m2*m3/s1, refMat4 = refMat2*refMat3/s1);
|
||||
VERIFY_IS_APPROX(m4 = m2*m3*s1, refMat4 = refMat2*refMat3*s1);
|
||||
VERIFY_IS_APPROX(m4 = s2*m2*m3*s1, refMat4 = s2*refMat2*refMat3*s1);
|
||||
VERIFY_IS_APPROX(m4 = (m2+m2)*m3, refMat4 = (refMat2+refMat2)*refMat3);
|
||||
VERIFY_IS_APPROX(m4 = m2*m3.leftCols(cols/2), refMat4 = refMat2*refMat3.leftCols(cols/2));
|
||||
VERIFY_IS_APPROX(m4 = m2*(m3+m3).leftCols(cols/2), refMat4 = refMat2*(refMat3+refMat3).leftCols(cols/2));
|
||||
|
||||
VERIFY_IS_APPROX(m4=(m2*m3).pruned(0), refMat4=refMat2*refMat3);
|
||||
VERIFY_IS_APPROX(m4=(m2t.transpose()*m3).pruned(0), refMat4=refMat2t.transpose()*refMat3);
|
||||
@ -194,7 +197,7 @@ template<typename SparseMatrixType> void sparse_product()
|
||||
VERIFY_IS_APPROX(d3=d1*m2.transpose(), refM3=d1*refM2.transpose());
|
||||
}
|
||||
|
||||
// test self-adjoint and traingular-view products
|
||||
// test self-adjoint and triangular-view products
|
||||
{
|
||||
DenseMatrix b = DenseMatrix::Random(rows, rows);
|
||||
DenseMatrix x = DenseMatrix::Random(rows, rows);
|
||||
|
@ -49,18 +49,39 @@ void svd_compare_to_full(const MatrixType& m,
|
||||
unsigned int computationOptions,
|
||||
const SvdType& referenceSvd)
|
||||
{
|
||||
typedef typename MatrixType::Index Index;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
Index rows = m.rows();
|
||||
Index cols = m.cols();
|
||||
Index diagSize = (std::min)(rows, cols);
|
||||
RealScalar prec = test_precision<RealScalar>();
|
||||
|
||||
SvdType svd(m, computationOptions);
|
||||
|
||||
VERIFY_IS_APPROX(svd.singularValues(), referenceSvd.singularValues());
|
||||
|
||||
if(computationOptions & (ComputeFullV|ComputeThinV))
|
||||
{
|
||||
VERIFY( (svd.matrixV().adjoint()*svd.matrixV()).isIdentity(prec) );
|
||||
VERIFY_IS_APPROX( svd.matrixV().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint(),
|
||||
referenceSvd.matrixV().leftCols(diagSize) * referenceSvd.singularValues().asDiagonal() * referenceSvd.matrixV().leftCols(diagSize).adjoint());
|
||||
}
|
||||
|
||||
if(computationOptions & (ComputeFullU|ComputeThinU))
|
||||
{
|
||||
VERIFY( (svd.matrixU().adjoint()*svd.matrixU()).isIdentity(prec) );
|
||||
VERIFY_IS_APPROX( svd.matrixU().leftCols(diagSize) * svd.singularValues().cwiseAbs2().asDiagonal() * svd.matrixU().leftCols(diagSize).adjoint(),
|
||||
referenceSvd.matrixU().leftCols(diagSize) * referenceSvd.singularValues().cwiseAbs2().asDiagonal() * referenceSvd.matrixU().leftCols(diagSize).adjoint());
|
||||
}
|
||||
|
||||
// The following checks are not critical.
|
||||
// For instance, with Dived&Conquer SVD, if only the factor 'V' is computedt then different matrix-matrix product implementation will be used
|
||||
// and the resulting 'V' factor might be significantly different when the SVD decomposition is not unique, especially with single precision float.
|
||||
++g_test_level;
|
||||
if(computationOptions & ComputeFullU) VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU());
|
||||
if(computationOptions & ComputeThinU) VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU().leftCols(diagSize));
|
||||
if(computationOptions & ComputeFullV) VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV());
|
||||
if(computationOptions & ComputeFullV) VERIFY_IS_APPROX(svd.matrixV().cwiseAbs(), referenceSvd.matrixV().cwiseAbs());
|
||||
if(computationOptions & ComputeThinV) VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV().leftCols(diagSize));
|
||||
--g_test_level;
|
||||
}
|
||||
|
||||
//
|
||||
@ -85,33 +106,48 @@ void svd_least_square(const MatrixType& m, unsigned int computationOptions)
|
||||
SvdType svd(m, computationOptions);
|
||||
|
||||
if(internal::is_same<RealScalar,double>::value) svd.setThreshold(1e-8);
|
||||
else if(internal::is_same<RealScalar,float>::value) svd.setThreshold(1e-4);
|
||||
|
||||
else if(internal::is_same<RealScalar,float>::value) svd.setThreshold(2e-4);
|
||||
|
||||
SolutionType x = svd.solve(rhs);
|
||||
|
||||
// evaluate normal equation which works also for least-squares solutions
|
||||
if(internal::is_same<RealScalar,double>::value || svd.rank()==m.diagonal().size())
|
||||
{
|
||||
// This test is not stable with single precision.
|
||||
// This is probably because squaring m signicantly affects the precision.
|
||||
VERIFY_IS_APPROX(m.adjoint()*(m*x),m.adjoint()*rhs);
|
||||
}
|
||||
|
||||
|
||||
RealScalar residual = (m*x-rhs).norm();
|
||||
// Check that there is no significantly better solution in the neighborhood of x
|
||||
RealScalar rhs_norm = rhs.norm();
|
||||
if(!test_isMuchSmallerThan(residual,rhs.norm()))
|
||||
{
|
||||
// ^^^ If the residual is very small, then we have an exact solution, so we are already good.
|
||||
|
||||
// evaluate normal equation which works also for least-squares solutions
|
||||
if(internal::is_same<RealScalar,double>::value || svd.rank()==m.diagonal().size())
|
||||
{
|
||||
using std::sqrt;
|
||||
// This test is not stable with single precision.
|
||||
// This is probably because squaring m signicantly affects the precision.
|
||||
if(internal::is_same<RealScalar,float>::value) ++g_test_level;
|
||||
|
||||
VERIFY_IS_APPROX(m.adjoint()*(m*x),m.adjoint()*rhs);
|
||||
|
||||
if(internal::is_same<RealScalar,float>::value) --g_test_level;
|
||||
}
|
||||
|
||||
// Check that there is no significantly better solution in the neighborhood of x
|
||||
for(Index k=0;k<x.rows();++k)
|
||||
{
|
||||
using std::abs;
|
||||
|
||||
SolutionType y(x);
|
||||
y.row(k) = (1.+2*NumTraits<RealScalar>::epsilon())*x.row(k);
|
||||
RealScalar residual_y = (m*y-rhs).norm();
|
||||
VERIFY( test_isMuchSmallerThan(abs(residual_y-residual), rhs_norm) || residual < residual_y );
|
||||
if(internal::is_same<RealScalar,float>::value) ++g_test_level;
|
||||
VERIFY( test_isApprox(residual_y,residual) || residual < residual_y );
|
||||
if(internal::is_same<RealScalar,float>::value) --g_test_level;
|
||||
|
||||
y.row(k) = (1.-2*NumTraits<RealScalar>::epsilon())*x.row(k);
|
||||
residual_y = (m*y-rhs).norm();
|
||||
VERIFY( test_isMuchSmallerThan(abs(residual_y-residual), rhs_norm) || residual < residual_y );
|
||||
if(internal::is_same<RealScalar,float>::value) ++g_test_level;
|
||||
VERIFY( test_isApprox(residual_y,residual) || residual < residual_y );
|
||||
if(internal::is_same<RealScalar,float>::value) --g_test_level;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -82,8 +82,10 @@ template<typename MatrixType> void swap(const MatrixType& m)
|
||||
|
||||
void test_swap()
|
||||
{
|
||||
int s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE);
|
||||
CALL_SUBTEST_1( swap(Matrix3f()) ); // fixed size, no vectorization
|
||||
CALL_SUBTEST_2( swap(Matrix4d()) ); // fixed size, possible vectorization
|
||||
CALL_SUBTEST_3( swap(MatrixXd(3,3)) ); // dyn size, no vectorization
|
||||
CALL_SUBTEST_4( swap(MatrixXf(30,30)) ); // dyn size, possible vectorization
|
||||
CALL_SUBTEST_3( swap(MatrixXd(s,s)) ); // dyn size, no vectorization
|
||||
CALL_SUBTEST_4( swap(MatrixXf(s,s)) ); // dyn size, possible vectorization
|
||||
TEST_SET_BUT_UNUSED_VARIABLE(s)
|
||||
}
|
||||
|
@ -9,7 +9,17 @@
|
||||
|
||||
#include "main.h"
|
||||
|
||||
typedef Matrix<float,8,1> Vector8f;
|
||||
typedef Matrix<float, 6,1> Vector6f;
|
||||
typedef Matrix<float, 8,1> Vector8f;
|
||||
typedef Matrix<float, 12,1> Vector12f;
|
||||
|
||||
typedef Matrix<double, 5,1> Vector5d;
|
||||
typedef Matrix<double, 6,1> Vector6d;
|
||||
typedef Matrix<double, 7,1> Vector7d;
|
||||
typedef Matrix<double, 8,1> Vector8d;
|
||||
typedef Matrix<double, 9,1> Vector9d;
|
||||
typedef Matrix<double,10,1> Vector10d;
|
||||
typedef Matrix<double,12,1> Vector12d;
|
||||
|
||||
struct TestNew1
|
||||
{
|
||||
@ -81,10 +91,13 @@ void construct_at_boundary(int boundary)
|
||||
|
||||
void unalignedassert()
|
||||
{
|
||||
#if EIGEN_ALIGN_STATICALLY
|
||||
#if EIGEN_ALIGN_STATICALLY
|
||||
construct_at_boundary<Vector2f>(4);
|
||||
construct_at_boundary<Vector3f>(4);
|
||||
construct_at_boundary<Vector4f>(16);
|
||||
construct_at_boundary<Vector6f>(4);
|
||||
construct_at_boundary<Vector8f>(EIGEN_ALIGN_BYTES);
|
||||
construct_at_boundary<Vector12f>(16);
|
||||
construct_at_boundary<Matrix2f>(16);
|
||||
construct_at_boundary<Matrix3f>(4);
|
||||
construct_at_boundary<Matrix4f>(EIGEN_ALIGN_BYTES);
|
||||
@ -92,6 +105,13 @@ void unalignedassert()
|
||||
construct_at_boundary<Vector2d>(16);
|
||||
construct_at_boundary<Vector3d>(4);
|
||||
construct_at_boundary<Vector4d>(EIGEN_ALIGN_BYTES);
|
||||
construct_at_boundary<Vector5d>(4);
|
||||
construct_at_boundary<Vector6d>(16);
|
||||
construct_at_boundary<Vector7d>(4);
|
||||
construct_at_boundary<Vector8d>(EIGEN_ALIGN_BYTES);
|
||||
construct_at_boundary<Vector9d>(4);
|
||||
construct_at_boundary<Vector10d>(16);
|
||||
construct_at_boundary<Vector12d>(EIGEN_ALIGN_BYTES);
|
||||
construct_at_boundary<Matrix2d>(EIGEN_ALIGN_BYTES);
|
||||
construct_at_boundary<Matrix3d>(4);
|
||||
construct_at_boundary<Matrix4d>(EIGEN_ALIGN_BYTES);
|
||||
@ -100,7 +120,7 @@ void unalignedassert()
|
||||
construct_at_boundary<Vector3cf>(4);
|
||||
construct_at_boundary<Vector2cd>(EIGEN_ALIGN_BYTES);
|
||||
construct_at_boundary<Vector3cd>(16);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
check_unalignedassert_good<TestNew1>();
|
||||
check_unalignedassert_good<TestNew2>();
|
||||
@ -112,11 +132,19 @@ void unalignedassert()
|
||||
check_unalignedassert_good<Depends<true> >();
|
||||
|
||||
#if EIGEN_ALIGN_STATICALLY
|
||||
if(EIGEN_ALIGN_BYTES==16)
|
||||
if(EIGEN_ALIGN_BYTES>=16)
|
||||
{
|
||||
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4f>(8));
|
||||
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(8));
|
||||
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector12f>(8));
|
||||
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2d>(8));
|
||||
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4d>(8));
|
||||
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector6d>(8));
|
||||
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8d>(8));
|
||||
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector10d>(8));
|
||||
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector12d>(8));
|
||||
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cf>(8));
|
||||
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4i>(8));
|
||||
}
|
||||
for(int b=8; b<EIGEN_ALIGN_BYTES; b+=8)
|
||||
{
|
||||
|
@ -214,7 +214,7 @@ template<typename Scalar, bool Enable = internal::packet_traits<Scalar>::Vectori
|
||||
>(DefaultTraversal,CompleteUnrolling)));
|
||||
|
||||
VERIFY((test_assign(Matrix11(), Matrix<Scalar,PacketSize,EIGEN_PLAIN_ENUM_MIN(2,PacketSize)>()*Matrix<Scalar,EIGEN_PLAIN_ENUM_MIN(2,PacketSize),PacketSize>(),
|
||||
PacketSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD?DefaultTraversal:InnerVectorizedTraversal, CompleteUnrolling)));
|
||||
InnerVectorizedTraversal, CompleteUnrolling)));
|
||||
#endif
|
||||
|
||||
VERIFY(test_assign(MatrixXX(10,10),MatrixXX(20,20).block(10,10,2,3),
|
||||
|
@ -49,8 +49,8 @@
|
||||
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h"
|
||||
@ -80,8 +80,8 @@
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h"
|
||||
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h"
|
||||
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/Tensor.h"
|
||||
|
@ -266,16 +266,16 @@ array<t, n> repeat(t v) {
|
||||
}
|
||||
|
||||
template<std::size_t I, class Head, class Tail>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(type_list<Head, Tail>& a) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(type_list<Head, Tail>&) {
|
||||
return get<I, type_list<Head, Tail> >::value;
|
||||
}
|
||||
template<std::size_t I, class Head, class Tail>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(const type_list<Head, Tail>& a) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(const type_list<Head, Tail>&) {
|
||||
return get<I, type_list<Head, Tail> >::value;
|
||||
}
|
||||
|
||||
template <class NList>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NList::HeadType::type array_prod(const NList& l) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NList::HeadType::type array_prod(const NList&) {
|
||||
return arg_prod<NList>::value;
|
||||
};
|
||||
|
||||
|
@ -1157,7 +1157,41 @@ in TensorFunctors.h for information on how to implement a reduction operator.
|
||||
|
||||
## Convolutions
|
||||
|
||||
TBD: convolve(const KernelDerived& kernel, const Dimensions& dims)
|
||||
### <Operation> convolve(const Kernel& kernel, const Dimensions& dims)
|
||||
|
||||
Returns a tensor that is the output of the convolution of the input tensor with the kernel,
|
||||
along the specified dimensions of the input tensor. The dimension size for dimensions of the output tensor
|
||||
which were part of the convolution will be reduced by the formula:
|
||||
output_dim_size = input_dim_size - kernel_dim_size + 1 (requires: input_dim_size >= kernel_dim_size).
|
||||
The dimension sizes for dimensions that were not part of the convolution will remain the same.
|
||||
Performance of the convolution can depend on the length of the stride(s) of the input tensor dimension(s) along which the
|
||||
convolution is computed (the first dimension has the shortest stride for ColMajor, whereas RowMajor's shortest stride is
|
||||
for the last dimension).
|
||||
|
||||
// Compute convolution along the second and third dimension.
|
||||
Tensor<float, 4, DataLayout> input(3, 3, 7, 11);
|
||||
Tensor<float, 2, DataLayout> kernel(2, 2);
|
||||
Tensor<float, 4, DataLayout> output(3, 2, 6, 11);
|
||||
input.setRandom();
|
||||
kernel.setRandom();
|
||||
|
||||
Eigen::array<ptrdiff_t, 2> dims({1, 2}); // Specify second and third dimension for convolution.
|
||||
output = input.convolve(kernel, dims);
|
||||
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
for (int j = 0; j < 2; ++j) {
|
||||
for (int k = 0; k < 6; ++k) {
|
||||
for (int l = 0; l < 11; ++l) {
|
||||
const float result = output(i,j,k,l);
|
||||
const float expected = input(i,j+0,k+0,l) * kernel(0,0) +
|
||||
input(i,j+1,k+0,l) * kernel(1,0) +
|
||||
input(i,j+0,k+1,l) * kernel(0,1) +
|
||||
input(i,j+1,k+1,l) * kernel(1,1);
|
||||
VERIFY_IS_APPROX(result, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
## Geometrical Operations
|
||||
|
@ -520,48 +520,101 @@ class TensorBase<Derived, WriteAccessors> : public TensorBase<Derived, ReadOnlyA
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TensorLayoutSwapOp<Derived>
|
||||
const TensorLayoutSwapOp<const Derived>
|
||||
swap_layout() const {
|
||||
return TensorLayoutSwapOp<const Derived>(derived());
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TensorLayoutSwapOp<Derived>
|
||||
swap_layout() {
|
||||
return TensorLayoutSwapOp<Derived>(derived());
|
||||
}
|
||||
|
||||
template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorConcatenationOp<const Axis, const Derived, const OtherDerived>
|
||||
concatenate(const OtherDerived& other, const Axis& axis) const {
|
||||
return TensorConcatenationOp<const Axis, const Derived, const OtherDerived>(derived(), other, axis);
|
||||
}
|
||||
template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TensorConcatenationOp<const Axis, Derived, OtherDerived>
|
||||
concatenate(const OtherDerived& other, const Axis& axis) const {
|
||||
return TensorConcatenationOp<const Axis, Derived, OtherDerived>(derived(), other.derived(), axis);
|
||||
concatenate(const OtherDerived& other, const Axis& axis) {
|
||||
return TensorConcatenationOp<const Axis, Derived, OtherDerived>(derived(), other, axis);
|
||||
}
|
||||
|
||||
template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorReshapingOp<const NewDimensions, const Derived>
|
||||
reshape(const NewDimensions& newDimensions) const {
|
||||
return TensorReshapingOp<const NewDimensions, const Derived>(derived(), newDimensions);
|
||||
}
|
||||
template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TensorReshapingOp<const NewDimensions, Derived>
|
||||
reshape(const NewDimensions& newDimensions) const {
|
||||
reshape(const NewDimensions& newDimensions) {
|
||||
return TensorReshapingOp<const NewDimensions, Derived>(derived(), newDimensions);
|
||||
}
|
||||
|
||||
template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorSlicingOp<const StartIndices, const Sizes, const Derived>
|
||||
slice(const StartIndices& startIndices, const Sizes& sizes) const {
|
||||
return TensorSlicingOp<const StartIndices, const Sizes, const Derived>(derived(), startIndices, sizes);
|
||||
}
|
||||
template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TensorSlicingOp<const StartIndices, const Sizes, Derived>
|
||||
slice(const StartIndices& startIndices, const Sizes& sizes) const {
|
||||
slice(const StartIndices& startIndices, const Sizes& sizes) {
|
||||
return TensorSlicingOp<const StartIndices, const Sizes, Derived>(derived(), startIndices, sizes);
|
||||
}
|
||||
|
||||
template <DenseIndex DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TensorChippingOp<DimId, Derived>
|
||||
const TensorChippingOp<DimId, const Derived>
|
||||
chip(const Index offset) const {
|
||||
return TensorChippingOp<DimId, const Derived>(derived(), offset, DimId);
|
||||
}
|
||||
template <Index DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TensorChippingOp<DimId, Derived>
|
||||
chip(const Index offset) {
|
||||
return TensorChippingOp<DimId, Derived>(derived(), offset, DimId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorChippingOp<Dynamic, const Derived>
|
||||
chip(const Index offset, const Index dim) const {
|
||||
return TensorChippingOp<Dynamic, const Derived>(derived(), offset, dim);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TensorChippingOp<Dynamic, Derived>
|
||||
chip(const Index offset, const Index dim) const {
|
||||
chip(const Index offset, const Index dim) {
|
||||
return TensorChippingOp<Dynamic, Derived>(derived(), offset, dim);
|
||||
}
|
||||
|
||||
template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorReverseOp<const ReverseDimensions, const Derived>
|
||||
reverse(const ReverseDimensions& rev) const {
|
||||
return TensorReverseOp<const ReverseDimensions, const Derived>(derived(), rev);
|
||||
}
|
||||
template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TensorReverseOp<const ReverseDimensions, Derived>
|
||||
reverse(const ReverseDimensions& rev) const {
|
||||
reverse(const ReverseDimensions& rev) {
|
||||
return TensorReverseOp<const ReverseDimensions, Derived>(derived(), rev);
|
||||
}
|
||||
|
||||
template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorShufflingOp<const Shuffle, const Derived>
|
||||
shuffle(const Shuffle& shuffle) const {
|
||||
return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle);
|
||||
}
|
||||
template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TensorShufflingOp<const Shuffle, Derived>
|
||||
shuffle(const Shuffle& shuffle) const {
|
||||
shuffle(const Shuffle& shuffle) {
|
||||
return TensorShufflingOp<const Shuffle, Derived>(derived(), shuffle);
|
||||
}
|
||||
|
||||
template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorStridingOp<const Strides, const Derived>
|
||||
stride(const Strides& strides) const {
|
||||
return TensorStridingOp<const Strides, const Derived>(derived(), strides);
|
||||
}
|
||||
template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TensorStridingOp<const Strides, Derived>
|
||||
stride(const Strides& strides) const {
|
||||
stride(const Strides& strides) {
|
||||
return TensorStridingOp<const Strides, Derived>(derived(), strides);
|
||||
}
|
||||
|
||||
|
@ -157,6 +157,8 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
||||
eigen_assert(NumInputDims > m_dim.actualDim());
|
||||
|
||||
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
||||
eigen_assert(op.offset() < input_dims[m_dim.actualDim()]);
|
||||
|
||||
int j = 0;
|
||||
for (int i = 0; i < NumInputDims; ++i) {
|
||||
if (i != m_dim.actualDim()) {
|
||||
@ -246,7 +248,9 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
|
||||
Scalar* result = m_impl.data();
|
||||
if (m_dim.actualDim() == NumDims && result) {
|
||||
if (((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumDims) ||
|
||||
(static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) &&
|
||||
result) {
|
||||
return result + m_inputOffset;
|
||||
} else {
|
||||
return NULL;
|
||||
|
@ -21,8 +21,8 @@ namespace Eigen {
|
||||
*/
|
||||
namespace internal {
|
||||
|
||||
|
||||
template <typename Index, typename InputDims, size_t NumKernelDims> class IndexMapper {
|
||||
template <typename Index, typename InputDims, size_t NumKernelDims, int Layout>
|
||||
class IndexMapper {
|
||||
public:
|
||||
IndexMapper(const InputDims& input_dims, const array<Index, NumKernelDims>& kernel_dims,
|
||||
const array<Index, NumKernelDims>& indices) {
|
||||
@ -38,13 +38,19 @@ template <typename Index, typename InputDims, size_t NumKernelDims> class IndexM
|
||||
|
||||
array<Index, NumDims> inputStrides;
|
||||
array<Index, NumDims> outputStrides;
|
||||
for (int i = 0; i < NumDims; ++i) {
|
||||
if (i > 0) {
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
inputStrides[0] = 1;
|
||||
outputStrides[0] = 1;
|
||||
for (int i = 1; i < NumDims; ++i) {
|
||||
inputStrides[i] = inputStrides[i-1] * input_dims[i-1];
|
||||
outputStrides[i] = outputStrides[i-1] * dimensions[i-1];
|
||||
} else {
|
||||
inputStrides[0] = 1;
|
||||
outputStrides[0] = 1;
|
||||
}
|
||||
} else {
|
||||
inputStrides[NumDims - 1] = 1;
|
||||
outputStrides[NumDims - 1] = 1;
|
||||
for (int i = static_cast<int>(NumDims) - 2; i >= 0; --i) {
|
||||
inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1];
|
||||
outputStrides[i] = outputStrides[i + 1] * dimensions[i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
@ -52,13 +58,20 @@ template <typename Index, typename InputDims, size_t NumKernelDims> class IndexM
|
||||
array<Index, NumDims> cudaOutputDimensions;
|
||||
array<Index, NumDims> tmp = dimensions;
|
||||
array<Index, NumDims> ordering;
|
||||
const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
|
||||
? 0
|
||||
: NumDims - NumKernelDims;
|
||||
for (int i = 0; i < NumKernelDims; ++i) {
|
||||
ordering[i] = indices[i];
|
||||
const Index index = i + offset;
|
||||
ordering[index] = indices[i];
|
||||
tmp[indices[i]] = -1;
|
||||
cudaInputDimensions[i] = input_dims[ordering[i]];
|
||||
cudaOutputDimensions[i] = dimensions[ordering[i]];
|
||||
cudaInputDimensions[index] = input_dims[indices[i]];
|
||||
cudaOutputDimensions[index] = dimensions[indices[i]];
|
||||
}
|
||||
int written = NumKernelDims;
|
||||
|
||||
int written = static_cast<int>(Layout) == static_cast<int>(ColMajor)
|
||||
? NumKernelDims
|
||||
: 0;
|
||||
for (int i = 0; i < NumDims; ++i) {
|
||||
if (tmp[i] >= 0) {
|
||||
ordering[written] = i;
|
||||
@ -73,61 +86,123 @@ template <typename Index, typename InputDims, size_t NumKernelDims> class IndexM
|
||||
m_outputStrides[i] = outputStrides[ordering[i]];
|
||||
}
|
||||
|
||||
for (int i = 0; i < NumDims; ++i) {
|
||||
if (i > NumKernelDims) {
|
||||
m_cudaInputStrides[i] = m_cudaInputStrides[i-1] * cudaInputDimensions[i-1];
|
||||
m_cudaOutputStrides[i] = m_cudaOutputStrides[i-1] * cudaOutputDimensions[i-1];
|
||||
} else {
|
||||
m_cudaInputStrides[i] = 1;
|
||||
m_cudaOutputStrides[i] = 1;
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
for (int i = 0; i < NumDims; ++i) {
|
||||
if (i > NumKernelDims) {
|
||||
m_cudaInputStrides[i] =
|
||||
m_cudaInputStrides[i - 1] * cudaInputDimensions[i - 1];
|
||||
m_cudaOutputStrides[i] =
|
||||
m_cudaOutputStrides[i - 1] * cudaOutputDimensions[i - 1];
|
||||
} else {
|
||||
m_cudaInputStrides[i] = 1;
|
||||
m_cudaOutputStrides[i] = 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int i = NumDims - 1; i >= 0; --i) {
|
||||
if (i + 1 < offset) {
|
||||
m_cudaInputStrides[i] =
|
||||
m_cudaInputStrides[i + 1] * cudaInputDimensions[i + 1];
|
||||
m_cudaOutputStrides[i] =
|
||||
m_cudaOutputStrides[i + 1] * cudaOutputDimensions[i + 1];
|
||||
} else {
|
||||
m_cudaInputStrides[i] = 1;
|
||||
m_cudaOutputStrides[i] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputPlaneToTensorInputOffset(Index p) const {
|
||||
Index inputIndex = 0;
|
||||
for (int d = NumDims - 1; d > NumKernelDims; --d) {
|
||||
const Index idx = p / m_cudaInputStrides[d];
|
||||
inputIndex += idx * m_inputStrides[d];
|
||||
p -= idx * m_cudaInputStrides[d];
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
for (int d = NumDims - 1; d > NumKernelDims; --d) {
|
||||
const Index idx = p / m_cudaInputStrides[d];
|
||||
inputIndex += idx * m_inputStrides[d];
|
||||
p -= idx * m_cudaInputStrides[d];
|
||||
}
|
||||
inputIndex += p * m_inputStrides[NumKernelDims];
|
||||
} else {
|
||||
int limit = 0;
|
||||
if (NumKernelDims < NumDims) {
|
||||
limit = NumDims - NumKernelDims - 1;
|
||||
}
|
||||
for (int d = 0; d < limit; ++d) {
|
||||
const Index idx = p / m_cudaInputStrides[d];
|
||||
inputIndex += idx * m_inputStrides[d];
|
||||
p -= idx * m_cudaInputStrides[d];
|
||||
}
|
||||
inputIndex += p * m_inputStrides[limit];
|
||||
}
|
||||
inputIndex += p * m_inputStrides[NumKernelDims];
|
||||
return inputIndex;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputPlaneToTensorOutputOffset(Index p) const {
|
||||
Index outputIndex = 0;
|
||||
for (int d = NumDims - 1; d > NumKernelDims; --d) {
|
||||
const Index idx = p / m_cudaOutputStrides[d];
|
||||
outputIndex += idx * m_outputStrides[d];
|
||||
p -= idx * m_cudaOutputStrides[d];
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
for (int d = NumDims - 1; d > NumKernelDims; --d) {
|
||||
const Index idx = p / m_cudaOutputStrides[d];
|
||||
outputIndex += idx * m_outputStrides[d];
|
||||
p -= idx * m_cudaOutputStrides[d];
|
||||
}
|
||||
outputIndex += p * m_outputStrides[NumKernelDims];
|
||||
} else {
|
||||
int limit = 0;
|
||||
if (NumKernelDims < NumDims) {
|
||||
limit = NumDims - NumKernelDims - 1;
|
||||
}
|
||||
for (int d = 0; d < limit; ++d) {
|
||||
const Index idx = p / m_cudaOutputStrides[d];
|
||||
outputIndex += idx * m_outputStrides[d];
|
||||
p -= idx * m_cudaOutputStrides[d];
|
||||
}
|
||||
outputIndex += p * m_outputStrides[limit];
|
||||
}
|
||||
outputIndex += p * m_outputStrides[NumKernelDims];
|
||||
return outputIndex;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i) const {
|
||||
return i * m_inputStrides[0];
|
||||
const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
|
||||
? 0
|
||||
: NumDims - NumKernelDims;
|
||||
return i * m_inputStrides[offset];
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i) const {
|
||||
return i * m_outputStrides[0];
|
||||
const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
|
||||
? 0
|
||||
: NumDims - NumKernelDims;
|
||||
return i * m_outputStrides[offset];
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j) const {
|
||||
return i * m_inputStrides[0] + j*m_inputStrides[1];
|
||||
const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
|
||||
? 0
|
||||
: NumDims - NumKernelDims;
|
||||
return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1];
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j) const {
|
||||
return i * m_outputStrides[0] + j * m_outputStrides[1];
|
||||
const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
|
||||
? 0
|
||||
: NumDims - NumKernelDims;
|
||||
return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1];
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j, Index k) const {
|
||||
return i * m_inputStrides[0] + j*m_inputStrides[1] + k*m_inputStrides[2];
|
||||
const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
|
||||
? 0
|
||||
: NumDims - NumKernelDims;
|
||||
return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1] +
|
||||
k * m_inputStrides[offset + 2];
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j, Index k) const {
|
||||
return i * m_outputStrides[0] + j*m_outputStrides[1] + k*m_outputStrides[2];
|
||||
const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
|
||||
? 0
|
||||
: NumDims - NumKernelDims;
|
||||
return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1] +
|
||||
k * m_outputStrides[offset + 2];
|
||||
}
|
||||
|
||||
private:
|
||||
@ -237,35 +312,61 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
: m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_kernel(NULL), m_local_kernel(false), m_device(device)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
// Only column major tensors are supported for now.
|
||||
EIGEN_STATIC_ASSERT((static_cast<int>(Layout) == static_cast<int>(ColMajor)), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
|
||||
const typename TensorEvaluator<InputArgType, Device>::Dimensions& input_dims = m_inputImpl.dimensions();
|
||||
const typename TensorEvaluator<KernelArgType, Device>::Dimensions& kernel_dims = m_kernelImpl.dimensions();
|
||||
|
||||
m_inputStride[0] = 1;
|
||||
for (int i = 1; i < NumDims; ++i) {
|
||||
m_inputStride[i] = m_inputStride[i-1] * input_dims[i-1];
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
m_inputStride[0] = 1;
|
||||
for (int i = 1; i < NumDims; ++i) {
|
||||
m_inputStride[i] = m_inputStride[i - 1] * input_dims[i - 1];
|
||||
}
|
||||
} else {
|
||||
m_inputStride[NumDims - 1] = 1;
|
||||
for (int i = NumDims - 2; i >= 0; --i) {
|
||||
m_inputStride[i] = m_inputStride[i + 1] * input_dims[i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
m_dimensions = m_inputImpl.dimensions();
|
||||
for (int i = 0; i < NumKernelDims; ++i) {
|
||||
const Index index = op.indices()[i];
|
||||
const Index input_dim = input_dims[index];
|
||||
const Index kernel_dim = kernel_dims[i];
|
||||
const Index result_dim = input_dim - kernel_dim + 1;
|
||||
m_dimensions[index] = result_dim;
|
||||
if (i > 0) {
|
||||
m_kernelStride[i] = m_kernelStride[i-1] * kernel_dims[i-1];
|
||||
} else {
|
||||
m_kernelStride[0] = 1;
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
for (int i = 0; i < NumKernelDims; ++i) {
|
||||
const Index index = op.indices()[i];
|
||||
const Index input_dim = input_dims[index];
|
||||
const Index kernel_dim = kernel_dims[i];
|
||||
const Index result_dim = input_dim - kernel_dim + 1;
|
||||
m_dimensions[index] = result_dim;
|
||||
if (i > 0) {
|
||||
m_kernelStride[i] = m_kernelStride[i - 1] * kernel_dims[i - 1];
|
||||
} else {
|
||||
m_kernelStride[0] = 1;
|
||||
}
|
||||
m_indexStride[i] = m_inputStride[index];
|
||||
}
|
||||
m_indexStride[i] = m_inputStride[index];
|
||||
}
|
||||
|
||||
m_outputStride[0] = 1;
|
||||
for (int i = 1; i < NumDims; ++i) {
|
||||
m_outputStride[i] = m_outputStride[i-1] * m_dimensions[i-1];
|
||||
m_outputStride[0] = 1;
|
||||
for (int i = 1; i < NumDims; ++i) {
|
||||
m_outputStride[i] = m_outputStride[i - 1] * m_dimensions[i - 1];
|
||||
}
|
||||
} else {
|
||||
for (int i = NumKernelDims - 1; i >= 0; --i) {
|
||||
const Index index = op.indices()[i];
|
||||
const Index input_dim = input_dims[index];
|
||||
const Index kernel_dim = kernel_dims[i];
|
||||
const Index result_dim = input_dim - kernel_dim + 1;
|
||||
m_dimensions[index] = result_dim;
|
||||
if (i < NumKernelDims - 1) {
|
||||
m_kernelStride[i] = m_kernelStride[i + 1] * kernel_dims[i + 1];
|
||||
} else {
|
||||
m_kernelStride[NumKernelDims - 1] = 1;
|
||||
}
|
||||
m_indexStride[i] = m_inputStride[index];
|
||||
}
|
||||
|
||||
m_outputStride[NumDims - 1] = 1;
|
||||
for (int i = NumDims - 2; i >= 0; --i) {
|
||||
m_outputStride[i] = m_outputStride[i + 1] * m_dimensions[i + 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -310,13 +411,24 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
Index indices[2] = {index, index+PacketSize-1};
|
||||
Index startInputs[2] = {0, 0};
|
||||
for (int i = NumDims - 1; i > 0; --i) {
|
||||
const Index idx0 = indices[0] / m_outputStride[i];
|
||||
const Index idx1 = indices[1] / m_outputStride[i];
|
||||
startInputs[0] += idx0 * m_inputStride[i];
|
||||
startInputs[1] += idx1 * m_inputStride[i];
|
||||
indices[0] -= idx0 * m_outputStride[i];
|
||||
indices[1] -= idx1 * m_outputStride[i];
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
for (int i = NumDims - 1; i > 0; --i) {
|
||||
const Index idx0 = indices[0] / m_outputStride[i];
|
||||
const Index idx1 = indices[1] / m_outputStride[i];
|
||||
startInputs[0] += idx0 * m_inputStride[i];
|
||||
startInputs[1] += idx1 * m_inputStride[i];
|
||||
indices[0] -= idx0 * m_outputStride[i];
|
||||
indices[1] -= idx1 * m_outputStride[i];
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < NumDims - 1; ++i) {
|
||||
const Index idx0 = indices[0] / m_outputStride[i];
|
||||
const Index idx1 = indices[1] / m_outputStride[i];
|
||||
startInputs[0] += idx0 * m_inputStride[i];
|
||||
startInputs[1] += idx1 * m_inputStride[i];
|
||||
indices[0] -= idx0 * m_outputStride[i];
|
||||
indices[1] -= idx1 * m_outputStride[i];
|
||||
}
|
||||
}
|
||||
startInputs[0] += indices[0];
|
||||
startInputs[1] += indices[1];
|
||||
@ -344,10 +456,18 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
private:
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const {
|
||||
Index startInput = 0;
|
||||
for (int i = NumDims - 1; i > 0; --i) {
|
||||
const Index idx = index / m_outputStride[i];
|
||||
startInput += idx * m_inputStride[i];
|
||||
index -= idx * m_outputStride[i];
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
for (int i = NumDims - 1; i > 0; --i) {
|
||||
const Index idx = index / m_outputStride[i];
|
||||
startInput += idx * m_inputStride[i];
|
||||
index -= idx * m_outputStride[i];
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < NumDims - 1; ++i) {
|
||||
const Index idx = index / m_outputStride[i];
|
||||
startInput += idx * m_inputStride[i];
|
||||
index -= idx * m_outputStride[i];
|
||||
}
|
||||
}
|
||||
startInput += index;
|
||||
return startInput;
|
||||
@ -378,7 +498,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void preloadKernel() {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void preloadKernel() {
|
||||
// Don't make a local copy of the kernel unless we have to (i.e. it's an
|
||||
// expression that needs to be evaluated)
|
||||
const Scalar* in_place = m_kernelImpl.data();
|
||||
@ -431,11 +551,14 @@ struct GetKernelSize<Dynamic> {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
template <typename InputEvaluator, typename Index, typename InputDims, int StaticKernelSize>
|
||||
__global__ void EigenConvolutionKernel1D(InputEvaluator eval, const internal::IndexMapper<Index, InputDims, 1> indexMapper, const float* __restrict kernel, const int numPlanes, const int numX, const int maxX, const int kernelSize, float* buffer) {
|
||||
template <typename InputEvaluator, typename Index, typename InputDims,
|
||||
int StaticKernelSize>
|
||||
__global__ void EigenConvolutionKernel1D(
|
||||
InputEvaluator eval,
|
||||
const internal::IndexMapper<Index, InputDims, 1, InputEvaluator::Layout>
|
||||
indexMapper,
|
||||
const float* __restrict kernel, const int numPlanes, const int numX,
|
||||
const int maxX, const int kernelSize, float* buffer) {
|
||||
extern __shared__ float s[];
|
||||
|
||||
const int first_x = blockIdx.x * maxX;
|
||||
@ -453,7 +576,7 @@ __global__ void EigenConvolutionKernel1D(InputEvaluator eval, const internal::In
|
||||
#pragma unroll
|
||||
for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
|
||||
const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x);
|
||||
s[i + plane_kernel_offset] = eval.coeff(tensor_index);
|
||||
s[i + plane_kernel_offset] = eval.coeff(tensor_index);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
@ -476,9 +599,15 @@ __global__ void EigenConvolutionKernel1D(InputEvaluator eval, const internal::In
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename InputEvaluator, typename Index, typename InputDims, int StaticKernelSizeX, int StaticKernelSizeY>
|
||||
__global__ void EigenConvolutionKernel2D(InputEvaluator eval, const internal::IndexMapper<Index, InputDims, 2> indexMapper, const float* __restrict kernel, const int numPlanes, const int numX, const int maxX, const int numY, const int maxY, const int kernelSizeX, const int kernelSizeY, float* buffer) {
|
||||
template <typename InputEvaluator, typename Index, typename InputDims,
|
||||
int StaticKernelSizeX, int StaticKernelSizeY>
|
||||
__global__ void EigenConvolutionKernel2D(
|
||||
InputEvaluator eval,
|
||||
const internal::IndexMapper<Index, InputDims, 2, InputEvaluator::Layout>
|
||||
indexMapper,
|
||||
const float* __restrict kernel, const int numPlanes, const int numX,
|
||||
const int maxX, const int numY, const int maxY, const int kernelSizeX,
|
||||
const int kernelSizeY, float* buffer) {
|
||||
extern __shared__ float s[];
|
||||
|
||||
const int first_x = blockIdx.x * maxX;
|
||||
@ -538,9 +667,15 @@ __global__ void EigenConvolutionKernel2D(InputEvaluator eval, const internal::In
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename InputEvaluator, typename Index, typename InputDims>
|
||||
__global__ void EigenConvolutionKernel3D(InputEvaluator eval, const internal::IndexMapper<Index, InputDims, 3> indexMapper, const float* __restrict kernel, const size_t numPlanes, const size_t numX, const size_t maxX, const size_t numY, const size_t maxY, const size_t numZ, const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY, const size_t kernelSizeZ, float* buffer) {
|
||||
__global__ void EigenConvolutionKernel3D(
|
||||
InputEvaluator eval,
|
||||
const internal::IndexMapper<Index, InputDims, 3, InputEvaluator::Layout>
|
||||
indexMapper,
|
||||
const float* __restrict kernel, const size_t numPlanes, const size_t numX,
|
||||
const size_t maxX, const size_t numY, const size_t maxY, const size_t numZ,
|
||||
const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY,
|
||||
const size_t kernelSizeZ, float* buffer) {
|
||||
extern __shared__ float s[];
|
||||
|
||||
// Load inputs to shared memory
|
||||
@ -622,8 +757,6 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
: m_inputImpl(op.inputExpression(), device), m_kernelArg(op.kernelExpression()), m_kernelImpl(op.kernelExpression(), device), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, GpuDevice>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, GpuDevice>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
// Only column major tensors are supported for now.
|
||||
EIGEN_STATIC_ASSERT((static_cast<int>(Layout) == static_cast<int>(ColMajor)), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
|
||||
const typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions& input_dims = m_inputImpl.dimensions();
|
||||
const typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions& kernel_dims = m_kernelImpl.dimensions();
|
||||
@ -712,10 +845,14 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
|
||||
const int numX = dimensions()[m_indices[0]];
|
||||
const int numP = dimensions().TotalSize() / numX;
|
||||
|
||||
int maxX;
|
||||
dim3 block_size;
|
||||
if (m_indices[0] == 0) {
|
||||
|
||||
const int single_stride_dim =
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor)
|
||||
? 0
|
||||
: m_inputImpl.dimensions().rank() - 1;
|
||||
if (m_indices[0] == single_stride_dim) {
|
||||
// Maximum the reuse
|
||||
const int inner_dim = ((maxSharedMem / (sizeof(Scalar)) - kernel_size + 1 + 31) / 32) * 32;
|
||||
maxX = (std::min<int>)(inner_dim, numX);
|
||||
@ -747,7 +884,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
|
||||
const array<Index, 1> indices(m_indices[0]);
|
||||
const array<Index, 1> kernel_dims(m_kernelImpl.dimensions()[0]);
|
||||
internal::IndexMapper<Index, InputDims, 1> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
|
||||
internal::IndexMapper<Index, InputDims, 1, Layout> indexMapper(
|
||||
m_inputImpl.dimensions(), kernel_dims, indices);
|
||||
switch(kernel_size) {
|
||||
case 4: {
|
||||
LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 4, data);
|
||||
@ -765,11 +903,15 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
}
|
||||
|
||||
case 2: {
|
||||
const int kernel_size_x = m_kernelImpl.dimensions()[0];
|
||||
const int kernel_size_y = m_kernelImpl.dimensions()[1];
|
||||
const int idxX =
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 1;
|
||||
const int idxY =
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 0;
|
||||
const int kernel_size_x = m_kernelImpl.dimensions()[idxX];
|
||||
const int kernel_size_y = m_kernelImpl.dimensions()[idxY];
|
||||
|
||||
const int numX = dimensions()[m_indices[0]];
|
||||
const int numY = dimensions()[m_indices[1]];
|
||||
const int numX = dimensions()[m_indices[idxX]];
|
||||
const int numY = dimensions()[m_indices[idxY]];
|
||||
const int numP = dimensions().TotalSize() / (numX*numY);
|
||||
|
||||
const float scaling_factor = sqrtf(static_cast<float>(maxSharedMem) / (sizeof(Scalar) * kernel_size_y * kernel_size_x));
|
||||
@ -798,9 +940,11 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
|
||||
//cout << "launching 2D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " maxX: " << maxX << " maxY: " << maxY << " maxP: " << maxP << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
|
||||
|
||||
const array<Index, 2> indices(m_indices[0], m_indices[1]);
|
||||
const array<Index, 2> kernel_dims(m_kernelImpl.dimensions()[0], m_kernelImpl.dimensions()[1]);
|
||||
internal::IndexMapper<Index, InputDims, 2> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
|
||||
const array<Index, 2> indices(m_indices[idxX], m_indices[idxY]);
|
||||
const array<Index, 2> kernel_dims(m_kernelImpl.dimensions()[idxX],
|
||||
m_kernelImpl.dimensions()[idxY]);
|
||||
internal::IndexMapper<Index, InputDims, 2, Layout> indexMapper(
|
||||
m_inputImpl.dimensions(), kernel_dims, indices);
|
||||
switch (kernel_size_x) {
|
||||
case 4: {
|
||||
switch (kernel_size_y) {
|
||||
@ -837,13 +981,20 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
}
|
||||
|
||||
case 3: {
|
||||
const int kernel_size_x = m_kernelImpl.dimensions()[0];
|
||||
const int kernel_size_y = m_kernelImpl.dimensions()[1];
|
||||
const int kernel_size_z = m_kernelImpl.dimensions()[2];
|
||||
const int idxX =
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 2;
|
||||
const int idxY =
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 1;
|
||||
const int idxZ =
|
||||
static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 2 : 0;
|
||||
|
||||
const int numX = dimensions()[m_indices[0]];
|
||||
const int numY = dimensions()[m_indices[1]];
|
||||
const int numZ = dimensions()[m_indices[2]];
|
||||
const int kernel_size_x = m_kernelImpl.dimensions()[idxX];
|
||||
const int kernel_size_y = m_kernelImpl.dimensions()[idxY];
|
||||
const int kernel_size_z = m_kernelImpl.dimensions()[idxZ];
|
||||
|
||||
const int numX = dimensions()[m_indices[idxX]];
|
||||
const int numY = dimensions()[m_indices[idxY]];
|
||||
const int numZ = dimensions()[m_indices[idxZ]];
|
||||
const int numP = dimensions().TotalSize() / (numX*numY*numZ);
|
||||
|
||||
const int maxX = (std::min<int>)(128, (std::min<int>)(maxSharedMem / (sizeof(Scalar) * kernel_size_y * kernel_size_z) - kernel_size_x + 1, numX));
|
||||
@ -860,16 +1011,20 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
assert(shared_mem <= maxSharedMem);
|
||||
|
||||
//cout << "launching 3D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
|
||||
const array<Index, 3> indices(m_indices[0], m_indices[1], m_indices[2]);
|
||||
const array<Index, 3> kernel_dims(m_kernelImpl.dimensions()[0], m_kernelImpl.dimensions()[1], m_kernelImpl.dimensions()[2]);
|
||||
internal::IndexMapper<Index, InputDims, 3> indexMapper(m_inputImpl.dimensions(), kernel_dims, indices);
|
||||
const array<Index, 3> indices(m_indices[idxX], m_indices[idxY],
|
||||
m_indices[idxZ]);
|
||||
const array<Index, 3> kernel_dims(m_kernelImpl.dimensions()[idxX],
|
||||
m_kernelImpl.dimensions()[idxY],
|
||||
m_kernelImpl.dimensions()[idxZ]);
|
||||
internal::IndexMapper<Index, InputDims, 3, Layout> indexMapper(
|
||||
m_inputImpl.dimensions(), kernel_dims, indices);
|
||||
|
||||
LAUNCH_CUDA_KERNEL((EigenConvolutionKernel3D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, numZ, maxZ, kernel_size_x, kernel_size_y, kernel_size_z, data);
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
assert(false && "not supported yet");
|
||||
EIGEN_STATIC_ASSERT((NumKernelDims >= 1 && NumKernelDims <= 3), THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -21,8 +21,7 @@ namespace Eigen {
|
||||
* Example:
|
||||
* C.device(EIGEN_GPU) = A + B;
|
||||
*
|
||||
* Todo: thread pools.
|
||||
* Todo: operator +=, -=, *= and so on.
|
||||
* Todo: operator *= and /=.
|
||||
*/
|
||||
|
||||
template <typename ExpressionType, typename DeviceType> class TensorDevice {
|
||||
@ -33,8 +32,7 @@ template <typename ExpressionType, typename DeviceType> class TensorDevice {
|
||||
EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
|
||||
typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
|
||||
Assign assign(m_expression, other);
|
||||
static const bool Vectorize = TensorEvaluator<const Assign, DeviceType>::PacketAccess;
|
||||
internal::TensorExecutor<const Assign, DeviceType, Vectorize>::run(assign, m_device);
|
||||
internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -45,8 +43,18 @@ template <typename ExpressionType, typename DeviceType> class TensorDevice {
|
||||
Sum sum(m_expression, other);
|
||||
typedef TensorAssignOp<ExpressionType, const Sum> Assign;
|
||||
Assign assign(m_expression, sum);
|
||||
static const bool Vectorize = TensorEvaluator<const Assign, DeviceType>::PacketAccess;
|
||||
internal::TensorExecutor<const Assign, DeviceType, Vectorize>::run(assign, m_device);
|
||||
internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) {
|
||||
typedef typename OtherDerived::Scalar Scalar;
|
||||
typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference;
|
||||
Difference difference(m_expression, other);
|
||||
typedef TensorAssignOp<ExpressionType, const Difference> Assign;
|
||||
Assign assign(m_expression, difference);
|
||||
internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -65,8 +73,7 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, ThreadPool
|
||||
EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
|
||||
typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
|
||||
Assign assign(m_expression, other);
|
||||
static const bool Vectorize = TensorEvaluator<const Assign, ThreadPoolDevice>::PacketAccess;
|
||||
internal::TensorExecutor<const Assign, ThreadPoolDevice, Vectorize>::run(assign, m_device);
|
||||
internal::TensorExecutor<const Assign, ThreadPoolDevice>::run(assign, m_device);
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -77,8 +84,18 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, ThreadPool
|
||||
Sum sum(m_expression, other);
|
||||
typedef TensorAssignOp<ExpressionType, const Sum> Assign;
|
||||
Assign assign(m_expression, sum);
|
||||
static const bool Vectorize = TensorEvaluator<const Assign, ThreadPoolDevice>::PacketAccess;
|
||||
internal::TensorExecutor<const Assign, ThreadPoolDevice, Vectorize>::run(assign, m_device);
|
||||
internal::TensorExecutor<const Assign, ThreadPoolDevice>::run(assign, m_device);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) {
|
||||
typedef typename OtherDerived::Scalar Scalar;
|
||||
typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference;
|
||||
Difference difference(m_expression, other);
|
||||
typedef TensorAssignOp<ExpressionType, const Difference> Assign;
|
||||
Assign assign(m_expression, difference);
|
||||
internal::TensorExecutor<const Assign, ThreadPoolDevice>::run(assign, m_device);
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -99,7 +116,7 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, GpuDevice>
|
||||
EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
|
||||
typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
|
||||
Assign assign(m_expression, other);
|
||||
internal::TensorExecutor<const Assign, GpuDevice, false>::run(assign, m_device);
|
||||
internal::TensorExecutor<const Assign, GpuDevice>::run(assign, m_device);
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -110,13 +127,24 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, GpuDevice>
|
||||
Sum sum(m_expression, other);
|
||||
typedef TensorAssignOp<ExpressionType, const Sum> Assign;
|
||||
Assign assign(m_expression, sum);
|
||||
internal::TensorExecutor<const Assign, GpuDevice, false>::run(assign, m_device);
|
||||
internal::TensorExecutor<const Assign, GpuDevice>::run(assign, m_device);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) {
|
||||
typedef typename OtherDerived::Scalar Scalar;
|
||||
typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference;
|
||||
Difference difference(m_expression, other);
|
||||
typedef TensorAssignOp<ExpressionType, const Difference> Assign;
|
||||
Assign assign(m_expression, difference);
|
||||
internal::TensorExecutor<const Assign, GpuDevice>::run(assign, m_device);
|
||||
return *this;
|
||||
}
|
||||
|
||||
protected:
|
||||
const GpuDevice& m_device;
|
||||
ExpressionType m_expression;
|
||||
ExpressionType& m_expression;
|
||||
};
|
||||
#endif
|
||||
|
||||
|
@ -145,39 +145,39 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0
|
||||
|
||||
Sizes() { }
|
||||
template <typename DenseIndex>
|
||||
explicit Sizes(const array<DenseIndex, Base::count>& indices) {
|
||||
explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
|
||||
// todo: add assertion
|
||||
}
|
||||
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template <typename... DenseIndex> Sizes(DenseIndex... indices) { }
|
||||
explicit Sizes(std::initializer_list<std::size_t> l) {
|
||||
template <typename... DenseIndex> Sizes(DenseIndex... /*indices*/) { }
|
||||
explicit Sizes(std::initializer_list<std::size_t>) {
|
||||
// todo: add assertion
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0) {
|
||||
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex) {
|
||||
}
|
||||
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1) {
|
||||
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex, const DenseIndex) {
|
||||
}
|
||||
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) {
|
||||
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex, const DenseIndex, const DenseIndex) {
|
||||
}
|
||||
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) {
|
||||
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) {
|
||||
}
|
||||
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) {
|
||||
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) {
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename T> Sizes& operator = (const T& other) {
|
||||
template <typename T> Sizes& operator = (const T&) {
|
||||
// to do: check the size of other
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
|
||||
return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count - 1, false>::run(indices, *static_cast<const Base*>(this);
|
||||
return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count - 1, false>::run(indices, *static_cast<const Base*>(this));
|
||||
}
|
||||
template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
|
||||
return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count - 1, true>::run(indices, *static_cast<const Base*>(this);
|
||||
return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count - 1, true>::run(indices, *static_cast<const Base*>(this));
|
||||
}
|
||||
};
|
||||
|
||||
@ -343,7 +343,7 @@ template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::s
|
||||
template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > {
|
||||
static const size_t value = Sizes<V1,V2,V3,V4,V5>::count;
|
||||
};
|
||||
template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_get(const Sizes<V1,V2,V3,V4,V5>& a) {
|
||||
template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_get(const Sizes<V1,V2,V3,V4,V5>&) {
|
||||
return get<n, typename Sizes<V1,V2,V3,V4,V5>::Base>::value;
|
||||
};
|
||||
|
||||
|
@ -352,11 +352,12 @@ template<typename IfArgType, typename ThenArgType, typename ElseArgType, typenam
|
||||
struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
|
||||
{
|
||||
typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned & TensorEvaluator<ElseArgType, Device>::IsAligned,
|
||||
PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & TensorEvaluator<ElseArgType, Device>::PacketAccess/* &
|
||||
TensorEvaluator<IfArgType>::PacketAccess*/,
|
||||
PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & TensorEvaluator<ElseArgType, Device>::PacketAccess &
|
||||
internal::packet_traits<Scalar>::HasBlend,
|
||||
Layout = TensorEvaluator<IfArgType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
};
|
||||
@ -373,7 +374,6 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
||||
}
|
||||
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
|
||||
typedef typename internal::traits<XprType>::Packet PacketReturnType;
|
||||
typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
|
||||
@ -403,7 +403,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
|
||||
{
|
||||
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
internal::Selector<PacketSize> select;
|
||||
for (Index i = 0; i < PacketSize; ++i) {
|
||||
select.select[i] = m_condImpl.coeff(index+i);
|
||||
|
@ -77,7 +77,7 @@ template <typename T> struct MeanReducer
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
|
||||
return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * packet_traits<Packet>::size);
|
||||
return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits<Packet>::size);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
@ -54,7 +54,7 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_> >
|
||||
static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
|
||||
enum {
|
||||
Options = Options_,
|
||||
Flags = compute_tensor_flags<Scalar_, Options_>::ret | LvalueBit,
|
||||
Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit),
|
||||
};
|
||||
};
|
||||
|
||||
@ -69,7 +69,7 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_> >
|
||||
static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
|
||||
enum {
|
||||
Options = Options_,
|
||||
Flags = compute_tensor_flags<Scalar_, Options_>::ret | LvalueBit,
|
||||
Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit),
|
||||
};
|
||||
};
|
||||
|
||||
@ -86,7 +86,7 @@ struct traits<TensorMap<PlainObjectType, Options_> >
|
||||
static const int Layout = BaseTraits::Layout;
|
||||
enum {
|
||||
Options = Options_,
|
||||
Flags = ((BaseTraits::Flags | LvalueBit) & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0),
|
||||
Flags = (BaseTraits::Flags & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0),
|
||||
};
|
||||
};
|
||||
|
||||
@ -102,7 +102,7 @@ struct traits<TensorRef<PlainObjectType> >
|
||||
static const int Layout = BaseTraits::Layout;
|
||||
enum {
|
||||
Options = BaseTraits::Options,
|
||||
Flags = ((BaseTraits::Flags | LvalueBit) & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0),
|
||||
Flags = (BaseTraits::Flags & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0),
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -361,7 +361,6 @@ struct evaluator<DynamicSparseMatrix<_Scalar,_Options,_StorageIndex> >
|
||||
: evaluator_base<DynamicSparseMatrix<_Scalar,_Options,_StorageIndex> >
|
||||
{
|
||||
typedef _Scalar Scalar;
|
||||
typedef _StorageIndex Index;
|
||||
typedef DynamicSparseMatrix<_Scalar,_Options,_StorageIndex> SparseMatrixType;
|
||||
typedef typename SparseMatrixType::InnerIterator InnerIterator;
|
||||
typedef typename SparseMatrixType::ReverseInnerIterator ReverseInnerIterator;
|
||||
@ -378,6 +377,8 @@ struct evaluator<DynamicSparseMatrix<_Scalar,_Options,_StorageIndex> >
|
||||
operator const SparseMatrixType&() const { return *m_matrix; }
|
||||
|
||||
Scalar coeff(Index row, Index col) const { return m_matrix->coeff(row,col); }
|
||||
|
||||
Index nonZerosEstimate() const { return m_matrix->nonZeros(); }
|
||||
|
||||
const SparseMatrixType *m_matrix;
|
||||
};
|
||||
|
@ -18,7 +18,7 @@ namespace Eigen {
|
||||
namespace internal
|
||||
{
|
||||
template <typename Scalar>
|
||||
inline bool GetMarketLine (std::stringstream& line, int& M, int& N, int& i, int& j, Scalar& value)
|
||||
inline bool GetMarketLine (std::stringstream& line, Index& M, Index& N, Index& i, Index& j, Scalar& value)
|
||||
{
|
||||
line >> i >> j >> value;
|
||||
i--;
|
||||
@ -31,7 +31,7 @@ namespace internal
|
||||
return false;
|
||||
}
|
||||
template <typename Scalar>
|
||||
inline bool GetMarketLine (std::stringstream& line, int& M, int& N, int& i, int& j, std::complex<Scalar>& value)
|
||||
inline bool GetMarketLine (std::stringstream& line, Index& M, Index& N, Index& i, Index& j, std::complex<Scalar>& value)
|
||||
{
|
||||
Scalar valR, valI;
|
||||
line >> i >> j >> valR >> valI;
|
||||
|
@ -340,11 +340,9 @@ static void test_chip_as_lvalue()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<int DataLayout>
|
||||
static void test_chip_raw_data()
|
||||
static void test_chip_raw_data_col_major()
|
||||
{
|
||||
Tensor<float, 5, DataLayout> tensor(2,3,5,7,11);
|
||||
Tensor<float, 5, ColMajor> tensor(2,3,5,7,11);
|
||||
tensor.setRandom();
|
||||
|
||||
typedef TensorEvaluator<decltype(tensor.template chip<4>(3)), DefaultDevice> Evaluator4;
|
||||
@ -353,12 +351,7 @@ static void test_chip_raw_data()
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
for (int k = 0; k < 5; ++k) {
|
||||
for (int l = 0; l < 7; ++l) {
|
||||
int chip_index;
|
||||
if (DataLayout == ColMajor) {
|
||||
chip_index = i + 2 * (j + 3 * (k + 5 * l));
|
||||
} else {
|
||||
chip_index = 11 * (l + 7 * (k + 5 * (j + 3 * i)));
|
||||
}
|
||||
int chip_index = i + 2 * (j + 3 * (k + 5 * l));
|
||||
VERIFY_IS_EQUAL(chip.data()[chip_index], tensor(i,j,k,l,3));
|
||||
}
|
||||
}
|
||||
@ -382,6 +375,41 @@ static void test_chip_raw_data()
|
||||
VERIFY_IS_EQUAL(chip3.data(), static_cast<float*>(0));
|
||||
}
|
||||
|
||||
static void test_chip_raw_data_row_major()
|
||||
{
|
||||
Tensor<float, 5, RowMajor> tensor(11,7,5,3,2);
|
||||
tensor.setRandom();
|
||||
|
||||
typedef TensorEvaluator<decltype(tensor.template chip<0>(3)), DefaultDevice> Evaluator0;
|
||||
auto chip = Evaluator0(tensor.template chip<0>(3), DefaultDevice());
|
||||
for (int i = 0; i < 7; ++i) {
|
||||
for (int j = 0; j < 5; ++j) {
|
||||
for (int k = 0; k < 3; ++k) {
|
||||
for (int l = 0; l < 2; ++l) {
|
||||
int chip_index = l + 2 * (k + 3 * (j + 5 * i));
|
||||
VERIFY_IS_EQUAL(chip.data()[chip_index], tensor(3,i,j,k,l));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef TensorEvaluator<decltype(tensor.template chip<1>(0)), DefaultDevice> Evaluator1;
|
||||
auto chip1 = Evaluator1(tensor.template chip<1>(0), DefaultDevice());
|
||||
VERIFY_IS_EQUAL(chip1.data(), static_cast<float*>(0));
|
||||
|
||||
typedef TensorEvaluator<decltype(tensor.template chip<2>(0)), DefaultDevice> Evaluator2;
|
||||
auto chip2 = Evaluator2(tensor.template chip<2>(0), DefaultDevice());
|
||||
VERIFY_IS_EQUAL(chip2.data(), static_cast<float*>(0));
|
||||
|
||||
typedef TensorEvaluator<decltype(tensor.template chip<3>(0)), DefaultDevice> Evaluator3;
|
||||
auto chip3 = Evaluator3(tensor.template chip<3>(0), DefaultDevice());
|
||||
VERIFY_IS_EQUAL(chip3.data(), static_cast<float*>(0));
|
||||
|
||||
typedef TensorEvaluator<decltype(tensor.template chip<4>(0)), DefaultDevice> Evaluator4;
|
||||
auto chip4 = Evaluator4(tensor.template chip<4>(0), DefaultDevice());
|
||||
VERIFY_IS_EQUAL(chip4.data(), static_cast<float*>(0));
|
||||
}
|
||||
|
||||
void test_cxx11_tensor_chipping()
|
||||
{
|
||||
CALL_SUBTEST(test_simple_chip<ColMajor>());
|
||||
@ -392,6 +420,6 @@ void test_cxx11_tensor_chipping()
|
||||
CALL_SUBTEST(test_chip_in_expr<RowMajor>());
|
||||
CALL_SUBTEST(test_chip_as_lvalue<ColMajor>());
|
||||
CALL_SUBTEST(test_chip_as_lvalue<RowMajor>());
|
||||
CALL_SUBTEST(test_chip_raw_data<ColMajor>());
|
||||
CALL_SUBTEST(test_chip_raw_data<RowMajor>());
|
||||
CALL_SUBTEST(test_chip_raw_data_col_major());
|
||||
CALL_SUBTEST(test_chip_raw_data_row_major());
|
||||
}
|
||||
|
@ -13,8 +13,6 @@
|
||||
using Eigen::Tensor;
|
||||
|
||||
|
||||
|
||||
|
||||
static void test_simple_assign()
|
||||
{
|
||||
Tensor<int, 3> random(2,3,7);
|
||||
@ -33,7 +31,32 @@ static void test_simple_assign()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void test_assign_of_const_tensor()
|
||||
{
|
||||
Tensor<int, 3> random(2,3,7);
|
||||
random.setRandom();
|
||||
|
||||
TensorMap<Tensor<const int, 3> > constant1(random.data(), 2, 3, 7);
|
||||
TensorMap<const Tensor<int, 3> > constant2(random.data(), 2, 3, 7);
|
||||
const TensorMap<Tensor<int, 3> > constant3(random.data(), 2, 3, 7);
|
||||
|
||||
Tensor<int, 2> result1 = constant1.chip(0, 2);
|
||||
Tensor<int, 2> result2 = constant2.chip(0, 2);
|
||||
Tensor<int, 2> result3 = constant3.chip(0, 2);
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
VERIFY_IS_EQUAL((result1(i,j)), random(i,j,0));
|
||||
VERIFY_IS_EQUAL((result2(i,j)), random(i,j,0));
|
||||
VERIFY_IS_EQUAL((result3(i,j)), random(i,j,0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void test_cxx11_tensor_const()
|
||||
{
|
||||
CALL_SUBTEST(test_simple_assign());
|
||||
CALL_SUBTEST(test_assign_of_const_tensor());
|
||||
}
|
||||
|
@ -14,15 +14,16 @@
|
||||
using Eigen::Tensor;
|
||||
using Eigen::DefaultDevice;
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_evals()
|
||||
{
|
||||
Tensor<float, 2> input(3, 3);
|
||||
Tensor<float, 1> kernel(2);
|
||||
Tensor<float, 2, DataLayout> input(3, 3);
|
||||
Tensor<float, 1, DataLayout> kernel(2);
|
||||
|
||||
input.setRandom();
|
||||
kernel.setRandom();
|
||||
|
||||
Tensor<float, 2> result(2,3);
|
||||
Tensor<float, 2, DataLayout> result(2,3);
|
||||
result.setZero();
|
||||
Eigen::array<Tensor<float, 2>::Index, 1> dims3({0});
|
||||
|
||||
@ -41,15 +42,15 @@ static void test_evals()
|
||||
VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1)); // index 5
|
||||
}
|
||||
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_expr()
|
||||
{
|
||||
Tensor<float, 2> input(3, 3);
|
||||
Tensor<float, 2> kernel(2, 2);
|
||||
Tensor<float, 2, DataLayout> input(3, 3);
|
||||
Tensor<float, 2, DataLayout> kernel(2, 2);
|
||||
input.setRandom();
|
||||
kernel.setRandom();
|
||||
|
||||
Tensor<float, 2> result(2,2);
|
||||
Tensor<float, 2, DataLayout> result(2,2);
|
||||
Eigen::array<ptrdiff_t, 2> dims({0, 1});
|
||||
result = input.convolve(kernel, dims);
|
||||
|
||||
@ -63,10 +64,10 @@ static void test_expr()
|
||||
input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1));
|
||||
}
|
||||
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_modes() {
|
||||
Tensor<float, 1> input(3);
|
||||
Tensor<float, 1> kernel(3);
|
||||
Tensor<float, 1, DataLayout> input(3);
|
||||
Tensor<float, 1, DataLayout> kernel(3);
|
||||
input(0) = 1.0f;
|
||||
input(1) = 2.0f;
|
||||
input(2) = 3.0f;
|
||||
@ -74,13 +75,13 @@ static void test_modes() {
|
||||
kernel(1) = 1.0f;
|
||||
kernel(2) = 0.0f;
|
||||
|
||||
const Eigen::array<ptrdiff_t, 1> dims{{0}};
|
||||
const Eigen::array<ptrdiff_t, 1> dims({0});
|
||||
Eigen::array<std::pair<ptrdiff_t, ptrdiff_t>, 1> padding;
|
||||
|
||||
// Emulate VALID mode (as defined in
|
||||
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
|
||||
padding[0] = std::make_pair(0, 0);
|
||||
Tensor<float, 1> valid(1);
|
||||
Tensor<float, 1, DataLayout> valid(1);
|
||||
valid = input.pad(padding).convolve(kernel, dims);
|
||||
VERIFY_IS_EQUAL(valid.dimension(0), 1);
|
||||
VERIFY_IS_APPROX(valid(0), 2.5f);
|
||||
@ -88,7 +89,7 @@ static void test_modes() {
|
||||
// Emulate SAME mode (as defined in
|
||||
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
|
||||
padding[0] = std::make_pair(1, 1);
|
||||
Tensor<float, 1> same(3);
|
||||
Tensor<float, 1, DataLayout> same(3);
|
||||
same = input.pad(padding).convolve(kernel, dims);
|
||||
VERIFY_IS_EQUAL(same.dimension(0), 3);
|
||||
VERIFY_IS_APPROX(same(0), 1.0f);
|
||||
@ -98,7 +99,7 @@ static void test_modes() {
|
||||
// Emulate FULL mode (as defined in
|
||||
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
|
||||
padding[0] = std::make_pair(2, 2);
|
||||
Tensor<float, 1> full(5);
|
||||
Tensor<float, 1, DataLayout> full(5);
|
||||
full = input.pad(padding).convolve(kernel, dims);
|
||||
VERIFY_IS_EQUAL(full.dimension(0), 5);
|
||||
VERIFY_IS_APPROX(full(0), 0.0f);
|
||||
@ -108,18 +109,18 @@ static void test_modes() {
|
||||
VERIFY_IS_APPROX(full(4), 1.5f);
|
||||
}
|
||||
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_strides() {
|
||||
Tensor<float, 1> input(13);
|
||||
Tensor<float, 1> kernel(3);
|
||||
Tensor<float, 1, DataLayout> input(13);
|
||||
Tensor<float, 1, DataLayout> kernel(3);
|
||||
input.setRandom();
|
||||
kernel.setRandom();
|
||||
|
||||
const Eigen::array<ptrdiff_t, 1> dims{{0}};
|
||||
const Eigen::array<ptrdiff_t, 1> stride_of_3{{3}};
|
||||
const Eigen::array<ptrdiff_t, 1> stride_of_2{{2}};
|
||||
const Eigen::array<ptrdiff_t, 1> dims({0});
|
||||
const Eigen::array<ptrdiff_t, 1> stride_of_3({3});
|
||||
const Eigen::array<ptrdiff_t, 1> stride_of_2({2});
|
||||
|
||||
Tensor<float, 1> result;
|
||||
Tensor<float, 1, DataLayout> result;
|
||||
result = input.stride(stride_of_3).convolve(kernel, dims).stride(stride_of_2);
|
||||
|
||||
VERIFY_IS_EQUAL(result.dimension(0), 2);
|
||||
@ -129,13 +130,14 @@ static void test_strides() {
|
||||
input(12)*kernel(2)));
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void test_cxx11_tensor_convolution()
|
||||
{
|
||||
CALL_SUBTEST(test_evals());
|
||||
CALL_SUBTEST(test_expr());
|
||||
CALL_SUBTEST(test_modes());
|
||||
CALL_SUBTEST(test_strides());
|
||||
CALL_SUBTEST(test_evals<ColMajor>());
|
||||
CALL_SUBTEST(test_evals<RowMajor>());
|
||||
CALL_SUBTEST(test_expr<ColMajor>());
|
||||
CALL_SUBTEST(test_expr<RowMajor>());
|
||||
CALL_SUBTEST(test_modes<ColMajor>());
|
||||
CALL_SUBTEST(test_modes<RowMajor>());
|
||||
CALL_SUBTEST(test_strides<ColMajor>());
|
||||
CALL_SUBTEST(test_strides<RowMajor>());
|
||||
}
|
||||
|
@ -117,11 +117,10 @@ void test_cuda_elementwise()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void test_cuda_reduction()
|
||||
{
|
||||
Tensor<float, 4> in1(Eigen::array<int, 4>(72,53,97,113));
|
||||
Tensor<float, 2> out(Eigen::array<int, 2>(72,97));
|
||||
Tensor<float, 4> in1(72,53,97,113);
|
||||
Tensor<float, 2> out(72,97);
|
||||
in1.setRandom();
|
||||
|
||||
std::size_t in1_bytes = in1.size() * sizeof(float);
|
||||
@ -138,8 +137,8 @@ void test_cuda_reduction()
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_in1(d_in1, Eigen::array<int, 4>(72,53,97,113));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, Eigen::array<int, 2>(72,97));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_in1(d_in1, 72,53,97,113);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97);
|
||||
|
||||
array<int, 2> reduction_axis;
|
||||
reduction_axis[0] = 1;
|
||||
@ -156,10 +155,10 @@ void test_cuda_reduction()
|
||||
for (int k = 0; k < 53; ++k) {
|
||||
for (int l = 0; l < 113; ++l) {
|
||||
expected =
|
||||
std::max<float>(expected, in1(Eigen::array<int, 4>(i, k, j, l)));
|
||||
std::max<float>(expected, in1(i, k, j, l));
|
||||
}
|
||||
}
|
||||
VERIFY_IS_APPROX(out(Eigen::array<int, 2>(i,j)), expected);
|
||||
VERIFY_IS_APPROX(out(i,j), expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -170,7 +169,7 @@ static void test_cuda_contraction()
|
||||
// with these dimensions, the output has 300 * 140 elements, which is
|
||||
// more than 30 * 1024, which is the number of threads in blocks on
|
||||
// a 15 SM GK110 GPU
|
||||
Tensor<float, 4, DataLayout> t_left(Eigen::array<int, 4>(6, 50, 3, 31));
|
||||
Tensor<float, 4, DataLayout> t_left(6, 50, 3, 31);
|
||||
Tensor<float, 5, DataLayout> t_right(Eigen::array<int, 5>(3, 31, 7, 20, 1));
|
||||
Tensor<float, 5, DataLayout> t_result(Eigen::array<int, 5>(6, 50, 7, 20, 1));
|
||||
|
||||
@ -196,12 +195,9 @@ static void test_cuda_contraction()
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> >
|
||||
gpu_t_left(d_t_left, Eigen::array<int, 4>(6, 50, 3, 31));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> >
|
||||
gpu_t_right(d_t_right, Eigen::array<int, 5>(3, 31, 7, 20, 1));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> >
|
||||
gpu_t_result(d_t_result, Eigen::array<int, 5>(6, 50, 7, 20, 1));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_t_left(d_t_left, 6, 50, 3, 31);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_t_right(d_t_right, 3, 31, 7, 20, 1);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_t_result(d_t_result, 6, 50, 7, 20, 1);
|
||||
|
||||
typedef Eigen::Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> > MapXf;
|
||||
MapXf m_left(t_left.data(), 300, 93);
|
||||
@ -226,11 +222,12 @@ static void test_cuda_contraction()
|
||||
}
|
||||
}
|
||||
|
||||
template<int DataLayout>
|
||||
static void test_cuda_convolution_1d()
|
||||
{
|
||||
Tensor<float, 4> input(Eigen::array<int, 4>(74,37,11,137));
|
||||
Tensor<float, 1> kernel(Eigen::array<int, 1>(4));
|
||||
Tensor<float, 4> out(Eigen::array<int, 4>(74,34,11,137));
|
||||
Tensor<float, 4, DataLayout> input(74,37,11,137);
|
||||
Tensor<float, 1, DataLayout> kernel(4);
|
||||
Tensor<float, 4, DataLayout> out(74,34,11,137);
|
||||
input = input.constant(10.0f) + input.random();
|
||||
kernel = kernel.constant(7.0f) + kernel.random();
|
||||
|
||||
@ -252,9 +249,9 @@ static void test_cuda_convolution_1d()
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_input(d_input, Eigen::array<int, 4>(74,37,11,137));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1> > gpu_kernel(d_kernel, Eigen::array<int, 1>(4));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_out(d_out, Eigen::array<int, 4>(74,34,11,137));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_input(d_input, 74,37,11,137);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1, DataLayout> > gpu_kernel(d_kernel, 4);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out, 74,34,11,137);
|
||||
|
||||
Eigen::array<int, 1> dims(1);
|
||||
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
|
||||
@ -266,11 +263,9 @@ static void test_cuda_convolution_1d()
|
||||
for (int j = 0; j < 34; ++j) {
|
||||
for (int k = 0; k < 11; ++k) {
|
||||
for (int l = 0; l < 137; ++l) {
|
||||
const float result = out(Eigen::array<int, 4>(i,j,k,l));
|
||||
const float expected = input(Eigen::array<int, 4>(i,j+0,k,l)) * kernel(Eigen::array<int, 1>(0)) +
|
||||
input(Eigen::array<int, 4>(i,j+1,k,l)) * kernel(Eigen::array<int, 1>(1)) +
|
||||
input(Eigen::array<int, 4>(i,j+2,k,l)) * kernel(Eigen::array<int, 1>(2)) +
|
||||
input(Eigen::array<int, 4>(i,j+3,k,l)) * kernel(Eigen::array<int, 1>(3));
|
||||
const float result = out(i,j,k,l);
|
||||
const float expected = input(i,j+0,k,l) * kernel(0) + input(i,j+1,k,l) * kernel(1) +
|
||||
input(i,j+2,k,l) * kernel(2) + input(i,j+3,k,l) * kernel(3);
|
||||
VERIFY_IS_APPROX(result, expected);
|
||||
}
|
||||
}
|
||||
@ -278,12 +273,11 @@ static void test_cuda_convolution_1d()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void test_cuda_convolution_2d()
|
||||
static void test_cuda_convolution_inner_dim_col_major_1d()
|
||||
{
|
||||
Tensor<float, 4> input(Eigen::array<int, 4>(74,37,11,137));
|
||||
Tensor<float, 2> kernel(Eigen::array<int, 2>(3,4));
|
||||
Tensor<float, 4> out(Eigen::array<int, 4>(74,35,8,137));
|
||||
Tensor<float, 4, ColMajor> input(74,9,11,7);
|
||||
Tensor<float, 1, ColMajor> kernel(4);
|
||||
Tensor<float, 4, ColMajor> out(71,9,11,7);
|
||||
input = input.constant(10.0f) + input.random();
|
||||
kernel = kernel.constant(7.0f) + kernel.random();
|
||||
|
||||
@ -305,9 +299,110 @@ static void test_cuda_convolution_2d()
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_input(d_input, Eigen::array<int, 4>(74,37,11,137));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_kernel(d_kernel, Eigen::array<int, 2>(3,4));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_out(d_out, Eigen::array<int, 4>(74,35,8,137));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_input(d_input,74,9,11,7);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1, ColMajor> > gpu_kernel(d_kernel,4);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_out(d_out,71,9,11,7);
|
||||
|
||||
Eigen::array<int, 1> dims(0);
|
||||
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
|
||||
|
||||
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 71; ++i) {
|
||||
for (int j = 0; j < 9; ++j) {
|
||||
for (int k = 0; k < 11; ++k) {
|
||||
for (int l = 0; l < 7; ++l) {
|
||||
const float result = out(i,j,k,l);
|
||||
const float expected = input(i+0,j,k,l) * kernel(0) + input(i+1,j,k,l) * kernel(1) +
|
||||
input(i+2,j,k,l) * kernel(2) + input(i+3,j,k,l) * kernel(3);
|
||||
VERIFY_IS_APPROX(result, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void test_cuda_convolution_inner_dim_row_major_1d()
|
||||
{
|
||||
Tensor<float, 4, RowMajor> input(7,9,11,74);
|
||||
Tensor<float, 1, RowMajor> kernel(4);
|
||||
Tensor<float, 4, RowMajor> out(7,9,11,71);
|
||||
input = input.constant(10.0f) + input.random();
|
||||
kernel = kernel.constant(7.0f) + kernel.random();
|
||||
|
||||
std::size_t input_bytes = input.size() * sizeof(float);
|
||||
std::size_t kernel_bytes = kernel.size() * sizeof(float);
|
||||
std::size_t out_bytes = out.size() * sizeof(float);
|
||||
|
||||
float* d_input;
|
||||
float* d_kernel;
|
||||
float* d_out;
|
||||
cudaMalloc((void**)(&d_input), input_bytes);
|
||||
cudaMalloc((void**)(&d_kernel), kernel_bytes);
|
||||
cudaMalloc((void**)(&d_out), out_bytes);
|
||||
|
||||
cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
cudaStream_t stream;
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_input(d_input, 7,9,11,74);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1, RowMajor> > gpu_kernel(d_kernel, 4);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_out(d_out, 7,9,11,71);
|
||||
|
||||
Eigen::array<int, 1> dims(3);
|
||||
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
|
||||
|
||||
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 7; ++i) {
|
||||
for (int j = 0; j < 9; ++j) {
|
||||
for (int k = 0; k < 11; ++k) {
|
||||
for (int l = 0; l < 71; ++l) {
|
||||
const float result = out(i,j,k,l);
|
||||
const float expected = input(i,j,k,l+0) * kernel(0) + input(i,j,k,l+1) * kernel(1) +
|
||||
input(i,j,k,l+2) * kernel(2) + input(i,j,k,l+3) * kernel(3);
|
||||
VERIFY_IS_APPROX(result, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int DataLayout>
|
||||
static void test_cuda_convolution_2d()
|
||||
{
|
||||
Tensor<float, 4, DataLayout> input(74,37,11,137);
|
||||
Tensor<float, 2, DataLayout> kernel(3,4);
|
||||
Tensor<float, 4, DataLayout> out(74,35,8,137);
|
||||
input = input.constant(10.0f) + input.random();
|
||||
kernel = kernel.constant(7.0f) + kernel.random();
|
||||
|
||||
std::size_t input_bytes = input.size() * sizeof(float);
|
||||
std::size_t kernel_bytes = kernel.size() * sizeof(float);
|
||||
std::size_t out_bytes = out.size() * sizeof(float);
|
||||
|
||||
float* d_input;
|
||||
float* d_kernel;
|
||||
float* d_out;
|
||||
cudaMalloc((void**)(&d_input), input_bytes);
|
||||
cudaMalloc((void**)(&d_kernel), kernel_bytes);
|
||||
cudaMalloc((void**)(&d_out), out_bytes);
|
||||
|
||||
cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
cudaStream_t stream;
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_input(d_input,74,37,11,137);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > gpu_kernel(d_kernel,3,4);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out,74,35,8,137);
|
||||
|
||||
Eigen::array<int, 2> dims(1,2);
|
||||
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
|
||||
@ -319,32 +414,32 @@ static void test_cuda_convolution_2d()
|
||||
for (int j = 0; j < 35; ++j) {
|
||||
for (int k = 0; k < 8; ++k) {
|
||||
for (int l = 0; l < 137; ++l) {
|
||||
const float result = out(Eigen::array<int, 4>(i,j,k,l));
|
||||
const float expected = input(Eigen::array<int, 4>(i,j+0,k+0,l)) * kernel(Eigen::array<int, 2>(0,0)) +
|
||||
input(Eigen::array<int, 4>(i,j+1,k+0,l)) * kernel(Eigen::array<int, 2>(1,0)) +
|
||||
input(Eigen::array<int, 4>(i,j+2,k+0,l)) * kernel(Eigen::array<int, 2>(2,0)) +
|
||||
input(Eigen::array<int, 4>(i,j+0,k+1,l)) * kernel(Eigen::array<int, 2>(0,1)) +
|
||||
input(Eigen::array<int, 4>(i,j+1,k+1,l)) * kernel(Eigen::array<int, 2>(1,1)) +
|
||||
input(Eigen::array<int, 4>(i,j+2,k+1,l)) * kernel(Eigen::array<int, 2>(2,1)) +
|
||||
input(Eigen::array<int, 4>(i,j+0,k+2,l)) * kernel(Eigen::array<int, 2>(0,2)) +
|
||||
input(Eigen::array<int, 4>(i,j+1,k+2,l)) * kernel(Eigen::array<int, 2>(1,2)) +
|
||||
input(Eigen::array<int, 4>(i,j+2,k+2,l)) * kernel(Eigen::array<int, 2>(2,2)) +
|
||||
input(Eigen::array<int, 4>(i,j+0,k+3,l)) * kernel(Eigen::array<int, 2>(0,3)) +
|
||||
input(Eigen::array<int, 4>(i,j+1,k+3,l)) * kernel(Eigen::array<int, 2>(1,3)) +
|
||||
input(Eigen::array<int, 4>(i,j+2,k+3,l)) * kernel(Eigen::array<int, 2>(2,3));
|
||||
VERIFY_IS_APPROX(result, expected);
|
||||
const float result = out(i,j,k,l);
|
||||
const float expected = input(i,j+0,k+0,l) * kernel(0,0) +
|
||||
input(i,j+1,k+0,l) * kernel(1,0) +
|
||||
input(i,j+2,k+0,l) * kernel(2,0) +
|
||||
input(i,j+0,k+1,l) * kernel(0,1) +
|
||||
input(i,j+1,k+1,l) * kernel(1,1) +
|
||||
input(i,j+2,k+1,l) * kernel(2,1) +
|
||||
input(i,j+0,k+2,l) * kernel(0,2) +
|
||||
input(i,j+1,k+2,l) * kernel(1,2) +
|
||||
input(i,j+2,k+2,l) * kernel(2,2) +
|
||||
input(i,j+0,k+3,l) * kernel(0,3) +
|
||||
input(i,j+1,k+3,l) * kernel(1,3) +
|
||||
input(i,j+2,k+3,l) * kernel(2,3);
|
||||
VERIFY_IS_APPROX(result, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<int DataLayout>
|
||||
static void test_cuda_convolution_3d()
|
||||
{
|
||||
Tensor<float, 5> input(Eigen::array<int, 5>(74,37,11,137,17));
|
||||
Tensor<float, 3> kernel(Eigen::array<int, 3>(3,4,2));
|
||||
Tensor<float, 5> out(Eigen::array<int, 5>(74,35,8,136,17));
|
||||
Tensor<float, 5, DataLayout> input(Eigen::array<int, 5>(74,37,11,137,17));
|
||||
Tensor<float, 3, DataLayout> kernel(3,4,2);
|
||||
Tensor<float, 5, DataLayout> out(Eigen::array<int, 5>(74,35,8,136,17));
|
||||
input = input.constant(10.0f) + input.random();
|
||||
kernel = kernel.constant(7.0f) + kernel.random();
|
||||
|
||||
@ -366,9 +461,9 @@ static void test_cuda_convolution_3d()
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 5> > gpu_input(d_input, Eigen::array<int, 5>(74,37,11,137,17));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_kernel(d_kernel, Eigen::array<int, 3>(3,4,2));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 5> > gpu_out(d_out, Eigen::array<int, 5>(74,35,8,136,17));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_input(d_input,74,37,11,137,17);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> > gpu_kernel(d_kernel,3,4,2);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_out(d_out,74,35,8,136,17);
|
||||
|
||||
Eigen::array<int, 3> dims(1,2,3);
|
||||
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
|
||||
@ -381,31 +476,31 @@ static void test_cuda_convolution_3d()
|
||||
for (int k = 0; k < 8; ++k) {
|
||||
for (int l = 0; l < 136; ++l) {
|
||||
for (int m = 0; m < 17; ++m) {
|
||||
const float result = out(Eigen::array<int, 5>(i,j,k,l,m));
|
||||
const float expected = input(Eigen::array<int, 5>(i,j+0,k+0,l+0,m)) * kernel(Eigen::array<int, 3>(0,0,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+1,k+0,l+0,m)) * kernel(Eigen::array<int, 3>(1,0,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+2,k+0,l+0,m)) * kernel(Eigen::array<int, 3>(2,0,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+0,k+1,l+0,m)) * kernel(Eigen::array<int, 3>(0,1,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+1,k+1,l+0,m)) * kernel(Eigen::array<int, 3>(1,1,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+2,k+1,l+0,m)) * kernel(Eigen::array<int, 3>(2,1,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+0,k+2,l+0,m)) * kernel(Eigen::array<int, 3>(0,2,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+1,k+2,l+0,m)) * kernel(Eigen::array<int, 3>(1,2,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+2,k+2,l+0,m)) * kernel(Eigen::array<int, 3>(2,2,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+0,k+3,l+0,m)) * kernel(Eigen::array<int, 3>(0,3,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+1,k+3,l+0,m)) * kernel(Eigen::array<int, 3>(1,3,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+2,k+3,l+0,m)) * kernel(Eigen::array<int, 3>(2,3,0)) +
|
||||
input(Eigen::array<int, 5>(i,j+0,k+0,l+1,m)) * kernel(Eigen::array<int, 3>(0,0,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+1,k+0,l+1,m)) * kernel(Eigen::array<int, 3>(1,0,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+2,k+0,l+1,m)) * kernel(Eigen::array<int, 3>(2,0,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+0,k+1,l+1,m)) * kernel(Eigen::array<int, 3>(0,1,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+1,k+1,l+1,m)) * kernel(Eigen::array<int, 3>(1,1,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+2,k+1,l+1,m)) * kernel(Eigen::array<int, 3>(2,1,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+0,k+2,l+1,m)) * kernel(Eigen::array<int, 3>(0,2,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+1,k+2,l+1,m)) * kernel(Eigen::array<int, 3>(1,2,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+2,k+2,l+1,m)) * kernel(Eigen::array<int, 3>(2,2,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+0,k+3,l+1,m)) * kernel(Eigen::array<int, 3>(0,3,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+1,k+3,l+1,m)) * kernel(Eigen::array<int, 3>(1,3,1)) +
|
||||
input(Eigen::array<int, 5>(i,j+2,k+3,l+1,m)) * kernel(Eigen::array<int, 3>(2,3,1));
|
||||
const float result = out(i,j,k,l,m);
|
||||
const float expected = input(i,j+0,k+0,l+0,m) * kernel(0,0,0) +
|
||||
input(i,j+1,k+0,l+0,m) * kernel(1,0,0) +
|
||||
input(i,j+2,k+0,l+0,m) * kernel(2,0,0) +
|
||||
input(i,j+0,k+1,l+0,m) * kernel(0,1,0) +
|
||||
input(i,j+1,k+1,l+0,m) * kernel(1,1,0) +
|
||||
input(i,j+2,k+1,l+0,m) * kernel(2,1,0) +
|
||||
input(i,j+0,k+2,l+0,m) * kernel(0,2,0) +
|
||||
input(i,j+1,k+2,l+0,m) * kernel(1,2,0) +
|
||||
input(i,j+2,k+2,l+0,m) * kernel(2,2,0) +
|
||||
input(i,j+0,k+3,l+0,m) * kernel(0,3,0) +
|
||||
input(i,j+1,k+3,l+0,m) * kernel(1,3,0) +
|
||||
input(i,j+2,k+3,l+0,m) * kernel(2,3,0) +
|
||||
input(i,j+0,k+0,l+1,m) * kernel(0,0,1) +
|
||||
input(i,j+1,k+0,l+1,m) * kernel(1,0,1) +
|
||||
input(i,j+2,k+0,l+1,m) * kernel(2,0,1) +
|
||||
input(i,j+0,k+1,l+1,m) * kernel(0,1,1) +
|
||||
input(i,j+1,k+1,l+1,m) * kernel(1,1,1) +
|
||||
input(i,j+2,k+1,l+1,m) * kernel(2,1,1) +
|
||||
input(i,j+0,k+2,l+1,m) * kernel(0,2,1) +
|
||||
input(i,j+1,k+2,l+1,m) * kernel(1,2,1) +
|
||||
input(i,j+2,k+2,l+1,m) * kernel(2,2,1) +
|
||||
input(i,j+0,k+3,l+1,m) * kernel(0,3,1) +
|
||||
input(i,j+1,k+3,l+1,m) * kernel(1,3,1) +
|
||||
input(i,j+2,k+3,l+1,m) * kernel(2,3,1);
|
||||
VERIFY_IS_APPROX(result, expected);
|
||||
}
|
||||
}
|
||||
@ -414,91 +509,6 @@ static void test_cuda_convolution_3d()
|
||||
}
|
||||
}
|
||||
|
||||
static float* CudaCopyFloat(float* data, int size) {
|
||||
const int nbytes = size * sizeof(float);
|
||||
float* result = NULL;
|
||||
if (cudaMalloc((void**)(&result), nbytes) != cudaSuccess) {
|
||||
return NULL;
|
||||
} else {
|
||||
if (data != NULL) {
|
||||
cudaMemcpy(result, data, nbytes, cudaMemcpyHostToDevice);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
static void test_cuda_constant_broadcast()
|
||||
{
|
||||
cudaStream_t stream;
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Tensor<float, 1> t1(10);
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
t1(i) = 10.0f * i;
|
||||
}
|
||||
float* t1_cuda = CudaCopyFloat(t1.data(), t1.size());
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1> > t1_gpu(t1_cuda, 10);
|
||||
|
||||
Tensor<float, 1> t2(1);
|
||||
t2 = t2.constant(20.0f);
|
||||
float* t2_cuda = CudaCopyFloat(t2.data(), t2.size());
|
||||
Eigen::TensorMap<Eigen::TensorFixedSize<float, Sizes<1> > > t2_gpu(t2_cuda, 1);
|
||||
|
||||
float* t3_cuda = CudaCopyFloat(NULL, 10);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1> > t3_gpu(t3_cuda, 10);
|
||||
|
||||
t3_gpu.device(gpu_device) =
|
||||
t1_gpu + t2_gpu.broadcast(Eigen::array<int, 1>(10));
|
||||
|
||||
Eigen::Tensor<float, 1> t3(10);
|
||||
cudaMemcpy(t3.data(), t3_gpu.data(), 10 * sizeof(float),
|
||||
cudaMemcpyDeviceToHost);
|
||||
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
VERIFY_IS_APPROX(t3(i), t1(i) + t2(0));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void test_cuda_cast()
|
||||
{
|
||||
Tensor<double, 3> in(Eigen::array<int, 3>(72,53,97));
|
||||
Tensor<float, 3> out(Eigen::array<int, 3>(72,53,97));
|
||||
in.setRandom();
|
||||
|
||||
std::size_t in_bytes = in.size() * sizeof(double);
|
||||
std::size_t out_bytes = out.size() * sizeof(float);
|
||||
|
||||
double* d_in;
|
||||
float* d_out;
|
||||
cudaMalloc((void**)(&d_in), in_bytes);
|
||||
cudaMalloc((void**)(&d_out), out_bytes);
|
||||
|
||||
cudaMemcpy(d_in, in.data(), in_bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
cudaStream_t stream;
|
||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<double, 3> > gpu_in(d_in, Eigen::array<int, 3>(72,53,97));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<int, 3>(72,53,97));
|
||||
|
||||
gpu_out.device(gpu_device) = gpu_in.template cast<float>();
|
||||
|
||||
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 72; ++i) {
|
||||
for (int j = 0; j < 53; ++j) {
|
||||
for (int k = 0; k < 97; ++k) {
|
||||
VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), static_cast<float>(in(Eigen::array<int, 3>(i,j,k))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void test_cxx11_tensor_cuda()
|
||||
{
|
||||
CALL_SUBTEST(test_cuda_elementwise_small());
|
||||
@ -506,9 +516,12 @@ void test_cxx11_tensor_cuda()
|
||||
CALL_SUBTEST(test_cuda_reduction());
|
||||
CALL_SUBTEST(test_cuda_contraction<ColMajor>());
|
||||
CALL_SUBTEST(test_cuda_contraction<RowMajor>());
|
||||
CALL_SUBTEST(test_cuda_convolution_1d());
|
||||
CALL_SUBTEST(test_cuda_convolution_2d());
|
||||
CALL_SUBTEST(test_cuda_convolution_3d());
|
||||
CALL_SUBTEST(test_cuda_constant_broadcast());
|
||||
CALL_SUBTEST(test_cuda_cast());
|
||||
CALL_SUBTEST(test_cuda_convolution_1d<ColMajor>());
|
||||
CALL_SUBTEST(test_cuda_convolution_1d<RowMajor>());
|
||||
CALL_SUBTEST(test_cuda_convolution_inner_dim_col_major_1d());
|
||||
CALL_SUBTEST(test_cuda_convolution_inner_dim_row_major_1d());
|
||||
CALL_SUBTEST(test_cuda_convolution_2d<ColMajor>());
|
||||
CALL_SUBTEST(test_cuda_convolution_2d<RowMajor>());
|
||||
CALL_SUBTEST(test_cuda_convolution_3d<ColMajor>());
|
||||
CALL_SUBTEST(test_cuda_convolution_3d<RowMajor>());
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user