Pulle latest updates from trunk

This commit is contained in:
Benoit Steiner 2015-02-19 11:59:52 -08:00
commit 92ceb02c6d
29 changed files with 327 additions and 104 deletions

View File

@ -277,6 +277,7 @@ class CholmodBase : public SparseSolverBase<Derived>
if(!x_cd) if(!x_cd)
{ {
this->m_info = NumericalIssue; this->m_info = NumericalIssue;
return;
} }
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.) // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols()); dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols());
@ -298,6 +299,7 @@ class CholmodBase : public SparseSolverBase<Derived>
if(!x_cs) if(!x_cs)
{ {
this->m_info = NumericalIssue; this->m_info = NumericalIssue;
return;
} }
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.) // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
dest = viewAsEigen<DestScalar,DestOptions,DestIndex>(*x_cs); dest = viewAsEigen<DestScalar,DestOptions,DestIndex>(*x_cs);

View File

@ -287,6 +287,21 @@ template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Pack
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
{ return a; } { return a; }
template<size_t offset, typename Packet>
struct protate_impl
{
static Packet run(const Packet& a) { return a; }
};
/** \internal \returns a packet with the coefficients rotated to the right in little-endian convention,
* by the given offset, e.g. for offset == 1:
* (packet[3], packet[2], packet[1], packet[0]) becomes (packet[0], packet[3], packet[2], packet[1])
*/
template<size_t offset, typename Packet> EIGEN_DEVICE_FUNC inline Packet protate(const Packet& a)
{
EIGEN_STATIC_ASSERT(offset < unpacket_traits<Packet>::size, ROTATION_BY_ILLEGAL_OFFSET);
return offset ? protate_impl<offset, Packet>::run(a) : a;
}
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */ /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)

View File

@ -1,5 +1,9 @@
ADD_SUBDIRECTORY(SSE)
ADD_SUBDIRECTORY(AltiVec) ADD_SUBDIRECTORY(AltiVec)
ADD_SUBDIRECTORY(NEON)
ADD_SUBDIRECTORY(AVX) ADD_SUBDIRECTORY(AVX)
ADD_SUBDIRECTORY(CUDA)
ADD_SUBDIRECTORY(Default) ADD_SUBDIRECTORY(Default)
ADD_SUBDIRECTORY(NEON)
ADD_SUBDIRECTORY(SSE)

View File

@ -0,0 +1,6 @@
FILE(GLOB Eigen_Core_arch_CUDA_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_arch_CUDA_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/CUDA COMPONENT Devel
)

View File

@ -309,6 +309,23 @@ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
a_hi = vget_high_s32(a_r64); a_hi = vget_high_s32(a_r64);
return vcombine_s32(a_hi, a_lo); return vcombine_s32(a_hi, a_lo);
} }
template<size_t offset>
struct protate_impl<offset, Packet4f>
{
static Packet4f run(const Packet4f& a) {
return vextq_f32(a, a, offset);
}
};
template<size_t offset>
struct protate_impl<offset, Packet4i>
{
static Packet4i run(const Packet4i& a) {
return vextq_s32(a, a, offset);
}
};
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); } template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); }
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); } template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); }
@ -625,6 +642,14 @@ template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { retu
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); } template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); }
template<size_t offset>
struct protate_impl<offset, Packet2d>
{
static Packet2d run(const Packet2d& a) {
return vextq_f64(a, a, offset);
}
};
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); } template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); }
#if EIGEN_COMP_CLANG && defined(__apple_build_version__) #if EIGEN_COMP_CLANG && defined(__apple_build_version__)

View File

@ -138,7 +138,6 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
#ifdef EIGEN_VECTORIZE_SSE4_1 #ifdef EIGEN_VECTORIZE_SSE4_1
fx = _mm_floor_ps(fx); fx = _mm_floor_ps(fx);
#else #else
tmp = _mm_setzero_ps();
emm0 = _mm_cvttps_epi32(fx); emm0 = _mm_cvttps_epi32(fx);
tmp = _mm_cvtepi32_ps(emm0); tmp = _mm_cvtepi32_ps(emm0);
/* if greater, substract 1 */ /* if greater, substract 1 */
@ -207,7 +206,6 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
#ifdef EIGEN_VECTORIZE_SSE4_1 #ifdef EIGEN_VECTORIZE_SSE4_1
fx = _mm_floor_pd(fx); fx = _mm_floor_pd(fx);
#else #else
tmp = _mm_setzero_pd();
emm0 = _mm_cvttpd_epi32(fx); emm0 = _mm_cvttpd_epi32(fx);
tmp = _mm_cvtepi32_pd(emm0); tmp = _mm_cvtepi32_pd(emm0);
/* if greater, substract 1 */ /* if greater, substract 1 */

View File

@ -462,6 +462,29 @@ template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
{ return _mm_shuffle_epi32(a,0x1B); } { return _mm_shuffle_epi32(a,0x1B); }
template<size_t offset>
struct protate_impl<offset, Packet4f>
{
static Packet4f run(const Packet4f& a) {
return vec4f_swizzle1(a, offset, (offset + 1) % 4, (offset + 2) % 4, (offset + 3) % 4);
}
};
template<size_t offset>
struct protate_impl<offset, Packet4i>
{
static Packet4i run(const Packet4i& a) {
return vec4i_swizzle1(a, offset, (offset + 1) % 4, (offset + 2) % 4, (offset + 3) % 4);
}
};
template<size_t offset>
struct protate_impl<offset, Packet2d>
{
static Packet2d run(const Packet2d& a) {
return vec2d_swizzle1(a, offset, (offset + 1) % 2);
}
};
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
{ {

View File

@ -79,23 +79,37 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
* - the number of scalars that fit into a packet (when vectorization is enabled). * - the number of scalars that fit into a packet (when vectorization is enabled).
* *
* \sa setCpuCacheSizes */ * \sa setCpuCacheSizes */
#define CEIL(a, b) ((a)+(b)-1)/(b)
template<typename LhsScalar, typename RhsScalar, int KcFactor, typename SizeType> template<typename LhsScalar, typename RhsScalar, int KcFactor>
void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, int num_threads) void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
{ {
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
EIGEN_UNUSED_VARIABLE(num_threads);
enum {
kr = 16,
mr = Traits::mr,
nr = Traits::nr
};
k = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
if (k > kr) k -= k % kr;
m = std::min<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
if (m > mr) m -= m % mr;
n = std::min<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
if (n > nr) n -= n % nr;
return;
#endif
// Explanations: // Explanations:
// Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and // Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and
// mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed // kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed
// per kc x nr vertical small panels where nr is the blocking size along the n dimension // per mr x kc horizontal small panels where mr is the blocking size along the m dimension
// at the register level. For vectorization purpose, these small vertical panels are unpacked, // at the register level. This small horizontal panel has to stay within L1 cache.
// e.g., each coefficient is replicated to fit a packet. This small vertical panel has to
// stay in L1 cache.
std::ptrdiff_t l1, l2, l3; std::ptrdiff_t l1, l2, l3;
manage_caching_sizes(GetAction, &l1, &l2, &l3); manage_caching_sizes(GetAction, &l1, &l2, &l3);
if (num_threads > 1) { if (num_threads > 1) {
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
typedef typename Traits::ResScalar ResScalar; typedef typename Traits::ResScalar ResScalar;
enum { enum {
kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)), kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
@ -108,32 +122,32 @@ void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, int num_
nr = Traits::nr, nr = Traits::nr,
nr_mask = (0xffffffff/nr)*nr nr_mask = (0xffffffff/nr)*nr
}; };
SizeType k_cache = (l1-ksub)/kdiv; Index k_cache = (l1-ksub)/kdiv;
if (k_cache < k) { if (k_cache < k) {
k = k_cache & k_mask; k = k_cache & k_mask;
eigen_assert(k > 0); eigen_internal_assert(k > 0);
} }
SizeType n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k); Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k);
SizeType n_per_thread = CEIL(n, num_threads); Index n_per_thread = numext::div_ceil(n, num_threads);
if (n_cache <= n_per_thread) { if (n_cache <= n_per_thread) {
// Don't exceed the capacity of the l2 cache. // Don't exceed the capacity of the l2 cache.
eigen_assert(n_cache >= static_cast<SizeType>(nr)); eigen_internal_assert(n_cache >= static_cast<Index>(nr));
n = n_cache & nr_mask; n = n_cache & nr_mask;
eigen_assert(n > 0); eigen_internal_assert(n > 0);
} else { } else {
n = (std::min<SizeType>)(n, (n_per_thread + nr - 1) & nr_mask); n = (std::min<Index>)(n, (n_per_thread + nr - 1) & nr_mask);
} }
if (l3 > l2) { if (l3 > l2) {
// l3 is shared between all cores, so we'll give each thread its own chunk of l3. // l3 is shared between all cores, so we'll give each thread its own chunk of l3.
SizeType m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads); Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads);
SizeType m_per_thread = CEIL(m, num_threads); Index m_per_thread = numext::div_ceil(m, num_threads);
if(m_cache < m_per_thread && m_cache >= static_cast<SizeType>(mr)) { if(m_cache < m_per_thread && m_cache >= static_cast<Index>(mr)) {
m = m_cache & mr_mask; m = m_cache & mr_mask;
eigen_assert(m > 0); eigen_internal_assert(m > 0);
} else { } else {
m = (std::min<SizeType>)(m, (m_per_thread + mr - 1) & mr_mask); m = (std::min<Index>)(m, (m_per_thread + mr - 1) & mr_mask);
} }
} }
} }
@ -141,19 +155,19 @@ void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, int num_
// In unit tests we do not want to use extra large matrices, // In unit tests we do not want to use extra large matrices,
// so we reduce the block size to check the blocking strategy is not flawed // so we reduce the block size to check the blocking strategy is not flawed
#ifndef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS #ifndef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
k = std::min<SizeType>(k,sizeof(LhsScalar)<=4 ? 360 : 240); k = std::min<Index>(k,sizeof(LhsScalar)<=4 ? 360 : 240);
n = std::min<SizeType>(n,3840/sizeof(RhsScalar)); n = std::min<Index>(n,3840/sizeof(RhsScalar));
m = std::min<SizeType>(m,3840/sizeof(RhsScalar)); m = std::min<Index>(m,3840/sizeof(RhsScalar));
#else #else
k = std::min<SizeType>(k,24); k = std::min<Index>(k,24);
n = std::min<SizeType>(n,384/sizeof(RhsScalar)); n = std::min<Index>(n,384/sizeof(RhsScalar));
m = std::min<SizeType>(m,384/sizeof(RhsScalar)); m = std::min<Index>(m,384/sizeof(RhsScalar));
#endif #endif
} }
} }
template<typename LhsScalar, typename RhsScalar, typename SizeType> template<typename LhsScalar, typename RhsScalar>
inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, int num_threads) inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
{ {
computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n, num_threads); computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n, num_threads);
} }
@ -758,7 +772,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
const Index peeled_kc = depth & ~(pk-1); const Index peeled_kc = depth & ~(pk-1);
const Index prefetch_res_offset = 32/sizeof(ResScalar); const Index prefetch_res_offset = 32/sizeof(ResScalar);
// const Index depth2 = depth & ~1; // const Index depth2 = depth & ~1;
//---------- Process 3 * LhsProgress rows at once ---------- //---------- Process 3 * LhsProgress rows at once ----------
// This corresponds to 3*LhsProgress x nr register blocks. // This corresponds to 3*LhsProgress x nr register blocks.
// Usually, make sense only with FMA // Usually, make sense only with FMA
@ -798,14 +812,45 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr]; const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
prefetch(&blB[0]); prefetch(&blB[0]);
LhsPacket A0, A1; LhsPacket A0, A1;
#define EIGEN_ARCH_PREFERS_ROTATING_KERNEL EIGEN_ARCH_ARM
#if EIGEN_ARCH_PREFERS_ROTATING_KERNEL
static const bool UseRotatingKernel =
Traits::LhsPacketSize == 4 &&
Traits::RhsPacketSize == 4 &&
Traits::ResPacketSize == 4;
#endif
for(Index k=0; k<peeled_kc; k+=pk) for(Index k=0; k<peeled_kc; k+=pk)
{ {
EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX4"); EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX4");
RhsPacket B_0, T0; RhsPacket B_0, T0;
LhsPacket A2; LhsPacket A2;
#define EIGEN_GEBGP_ONESTEP(K) \ #define EIGEN_GEBP_ONESTEP_LOADRHS_NONROTATING(K,N) \
traits.loadRhs(&blB[(N+4*K)*RhsProgress], B_0);
#if EIGEN_ARCH_PREFERS_ROTATING_KERNEL
#define EIGEN_GEBP_ONESTEP_LOADRHS(K,N) \
do { \
if (UseRotatingKernel) { \
if (N == 0) { \
B_0 = pload<RhsPacket>(&blB[(0+4*K)*RhsProgress]); \
} else { \
EIGEN_ASM_COMMENT("Do not reorder code, we're very tight on registers"); \
B_0 = protate<1>(B_0); \
} \
} else { \
EIGEN_GEBP_ONESTEP_LOADRHS_NONROTATING(K,N); \
} \
} while (false)
#else
#define EIGEN_GEBP_ONESTEP_LOADRHS(K,N) \
EIGEN_GEBP_ONESTEP_LOADRHS_NONROTATING(K,N)
#endif
#define EIGEN_GEBP_ONESTEP(K) \
do { \ do { \
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
@ -814,34 +859,34 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \ traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \ traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \ traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
traits.loadRhs(&blB[(0+4*K)*RhsProgress], B_0); \ EIGEN_GEBP_ONESTEP_LOADRHS(K, 0); \
traits.madd(A0, B_0, C0, T0); \ traits.madd(A0, B_0, C0, T0); \
traits.madd(A1, B_0, C4, T0); \ traits.madd(A1, B_0, C4, T0); \
traits.madd(A2, B_0, C8, B_0); \ traits.madd(A2, B_0, C8, B_0); \
traits.loadRhs(&blB[1+4*K*RhsProgress], B_0); \ EIGEN_GEBP_ONESTEP_LOADRHS(K, 1); \
traits.madd(A0, B_0, C1, T0); \ traits.madd(A0, B_0, C1, T0); \
traits.madd(A1, B_0, C5, T0); \ traits.madd(A1, B_0, C5, T0); \
traits.madd(A2, B_0, C9, B_0); \ traits.madd(A2, B_0, C9, B_0); \
traits.loadRhs(&blB[2+4*K*RhsProgress], B_0); \ EIGEN_GEBP_ONESTEP_LOADRHS(K, 2); \
traits.madd(A0, B_0, C2, T0); \ traits.madd(A0, B_0, C2, T0); \
traits.madd(A1, B_0, C6, T0); \ traits.madd(A1, B_0, C6, T0); \
traits.madd(A2, B_0, C10, B_0); \ traits.madd(A2, B_0, C10, B_0); \
traits.loadRhs(&blB[3+4*K*RhsProgress], B_0); \ EIGEN_GEBP_ONESTEP_LOADRHS(K, 3); \
traits.madd(A0, B_0, C3 , T0); \ traits.madd(A0, B_0, C3 , T0); \
traits.madd(A1, B_0, C7, T0); \ traits.madd(A1, B_0, C7, T0); \
traits.madd(A2, B_0, C11, B_0); \ traits.madd(A2, B_0, C11, B_0); \
EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX4"); \ EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX4"); \
} while(false) } while(false)
internal::prefetch(blB + 4 * pk * sizeof(RhsScalar)); /* Bug 953 */ internal::prefetch(blB + 4 * pk * sizeof(RhsScalar)); /* Bug 953 */
EIGEN_GEBGP_ONESTEP(0); EIGEN_GEBP_ONESTEP(0);
EIGEN_GEBGP_ONESTEP(1); EIGEN_GEBP_ONESTEP(1);
EIGEN_GEBGP_ONESTEP(2); EIGEN_GEBP_ONESTEP(2);
EIGEN_GEBGP_ONESTEP(3); EIGEN_GEBP_ONESTEP(3);
EIGEN_GEBGP_ONESTEP(4); EIGEN_GEBP_ONESTEP(4);
EIGEN_GEBGP_ONESTEP(5); EIGEN_GEBP_ONESTEP(5);
EIGEN_GEBGP_ONESTEP(6); EIGEN_GEBP_ONESTEP(6);
EIGEN_GEBGP_ONESTEP(7); EIGEN_GEBP_ONESTEP(7);
blB += pk*4*RhsProgress; blB += pk*4*RhsProgress;
blA += pk*3*Traits::LhsProgress; blA += pk*3*Traits::LhsProgress;
@ -853,12 +898,41 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
{ {
RhsPacket B_0, T0; RhsPacket B_0, T0;
LhsPacket A2; LhsPacket A2;
EIGEN_GEBGP_ONESTEP(0); EIGEN_GEBP_ONESTEP(0);
blB += 4*RhsProgress; blB += 4*RhsProgress;
blA += 3*Traits::LhsProgress; blA += 3*Traits::LhsProgress;
} }
#undef EIGEN_GEBGP_ONESTEP
#undef EIGEN_GEBP_ONESTEP
#undef EIGEN_GEBP_ONESTEP_LOADRHS
#undef EIGEN_GEBP_ONESTEP_LOADRHS_NONROTATING
#if EIGEN_ARCH_PREFERS_ROTATING_KERNEL
if (UseRotatingKernel) {
#define EIGEN_GEBP_UNROTATE_RESULT(res0, res1, res2, res3) \
do { \
PacketBlock<ResPacket> resblock; \
resblock.packet[0] = res0; \
resblock.packet[1] = res1; \
resblock.packet[2] = res2; \
resblock.packet[3] = res3; \
ptranspose(resblock); \
resblock.packet[3] = protate<1>(resblock.packet[3]); \
resblock.packet[2] = protate<2>(resblock.packet[2]); \
resblock.packet[1] = protate<3>(resblock.packet[1]); \
ptranspose(resblock); \
res0 = resblock.packet[0]; \
res1 = resblock.packet[1]; \
res2 = resblock.packet[2]; \
res3 = resblock.packet[3]; \
} while (false)
EIGEN_GEBP_UNROTATE_RESULT(C0, C1, C2, C3);
EIGEN_GEBP_UNROTATE_RESULT(C4, C5, C6, C7);
EIGEN_GEBP_UNROTATE_RESULT(C8, C9, C10, C11);
}
#endif
ResPacket R0, R1, R2; ResPacket R0, R1, R2;
ResPacket alphav = pset1<ResPacket>(alpha); ResPacket alphav = pset1<ResPacket>(alpha);
@ -1788,14 +1862,14 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Co
for(; k<peeled_k; k+=PacketSize) { for(; k<peeled_k; k+=PacketSize) {
PacketBlock<Packet,(PacketSize%4)==0?4:PacketSize> kernel; PacketBlock<Packet,(PacketSize%4)==0?4:PacketSize> kernel;
kernel.packet[0] = dm0.loadPacket(k); kernel.packet[0] = dm0.loadPacket(k);
kernel.packet[1] = dm1.loadPacket(k); kernel.packet[1%PacketSize] = dm1.loadPacket(k);
kernel.packet[2] = dm2.loadPacket(k); kernel.packet[2%PacketSize] = dm2.loadPacket(k);
kernel.packet[3] = dm3.loadPacket(k); kernel.packet[3%PacketSize] = dm3.loadPacket(k);
ptranspose(kernel); ptranspose(kernel);
pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel.packet[0])); pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel.packet[0]));
pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel.packet[1])); pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel.packet[1%PacketSize]));
pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel.packet[2])); pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel.packet[2%PacketSize]));
pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel.packet[3])); pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel.packet[3%PacketSize]));
count+=4*PacketSize; count+=4*PacketSize;
} }
} }

View File

@ -217,8 +217,9 @@ struct gemm_functor
: m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking) : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
{} {}
void initParallelSession() const void initParallelSession(Index num_threads) const
{ {
m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads);
m_blocking.allocateA(); m_blocking.allocateA();
} }
@ -276,7 +277,7 @@ class level3_blocking
}; };
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor> template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true> class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true /* == FiniteAtCompileTime */>
: public level3_blocking< : public level3_blocking<
typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type, typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type> typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
@ -299,7 +300,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
public: public:
gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, int /*num_threads*/, bool /*full_rows = false*/) gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, Index /*num_threads*/, bool /*full_rows = false*/)
{ {
this->m_mc = ActualRows; this->m_mc = ActualRows;
this->m_nc = ActualCols; this->m_nc = ActualCols;
@ -307,6 +308,9 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
this->m_blockA = m_staticA; this->m_blockA = m_staticA;
this->m_blockB = m_staticB; this->m_blockB = m_staticB;
} }
void initParallel(Index, Index, Index, Index)
{}
inline void allocateA() {} inline void allocateA() {}
inline void allocateB() {} inline void allocateB() {}
@ -331,7 +335,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
public: public:
gemm_blocking_space(Index rows, Index cols, Index depth, int num_threads, bool l3_blocking) gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking)
{ {
this->m_mc = Transpose ? cols : rows; this->m_mc = Transpose ? cols : rows;
this->m_nc = Transpose ? rows : cols; this->m_nc = Transpose ? rows : cols;
@ -351,6 +355,19 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
m_sizeA = this->m_mc * this->m_kc; m_sizeA = this->m_mc * this->m_kc;
m_sizeB = this->m_kc * this->m_nc; m_sizeB = this->m_kc * this->m_nc;
} }
void initParallel(Index rows, Index cols, Index depth, Index num_threads)
{
this->m_mc = Transpose ? cols : rows;
this->m_nc = Transpose ? rows : cols;
this->m_kc = depth;
eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
Index m = this->m_mc;
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
m_sizeA = this->m_mc * this->m_kc;
m_sizeB = this->m_kc * this->m_nc;
}
void allocateA() void allocateA()
{ {

View File

@ -120,25 +120,28 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
return func(0,rows, 0,cols); return func(0,rows, 0,cols);
Eigen::initParallel(); Eigen::initParallel();
func.initParallelSession(); func.initParallelSession(threads);
if(transpose) if(transpose)
std::swap(rows,cols); std::swap(rows,cols);
Index blockCols = (cols / threads) & ~Index(0x3);
Index blockRows = (rows / threads);
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0); ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
#pragma omp parallel num_threads(threads) #pragma omp parallel num_threads(threads)
{ {
Index i = omp_get_thread_num(); Index i = omp_get_thread_num();
// Note that the actual number of threads might be lower than the number of request ones.
Index actual_threads = omp_get_num_threads();
Index blockCols = (cols / actual_threads) & ~Index(0x3);
Index blockRows = (rows / actual_threads);
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
Index r0 = i*blockRows; Index r0 = i*blockRows;
Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows; Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
Index c0 = i*blockCols; Index c0 = i*blockCols;
Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols; Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
info[i].lhs_start = r0; info[i].lhs_start = r0;
info[i].lhs_length = actualBlockRows; info[i].lhs_length = actualBlockRows;

View File

@ -166,7 +166,7 @@ class BlasLinearMapper {
return ploadt<HalfPacket, AlignmentType>(m_data + i); return ploadt<HalfPacket, AlignmentType>(m_data + i);
} }
EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const { EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
pstoret<Scalar, Packet, AlignmentType>(m_data + i, p); pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
} }

View File

@ -382,6 +382,11 @@
#define EIGEN_HAVE_RVALUE_REFERENCES #define EIGEN_HAVE_RVALUE_REFERENCES
#endif #endif
// Does the compiler support result_of?
#if (__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L))
#define EIGEN_HAS_STD_RESULT_OF 1
#endif
// Does the compiler support variadic templates? // Does the compiler support variadic templates?
#if __cplusplus > 199711L #if __cplusplus > 199711L
#define EIGEN_HAS_VARIADIC_TEMPLATES 1 #define EIGEN_HAS_VARIADIC_TEMPLATES 1

View File

@ -165,6 +165,7 @@ template<typename T> struct result_of {};
struct has_none {int a[1];}; struct has_none {int a[1];};
struct has_std_result_type {int a[2];}; struct has_std_result_type {int a[2];};
struct has_tr1_result {int a[3];}; struct has_tr1_result {int a[3];};
struct has_cxx_eleven_result {int a[4];};
template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)> template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)>
struct unary_result_of_select {typedef ArgType type;}; struct unary_result_of_select {typedef ArgType type;};
@ -175,13 +176,22 @@ struct unary_result_of_select<Func, ArgType, sizeof(has_std_result_type)> {typed
template<typename Func, typename ArgType> template<typename Func, typename ArgType>
struct unary_result_of_select<Func, ArgType, sizeof(has_tr1_result)> {typedef typename Func::template result<Func(ArgType)>::type type;}; struct unary_result_of_select<Func, ArgType, sizeof(has_tr1_result)> {typedef typename Func::template result<Func(ArgType)>::type type;};
#ifdef EIGEN_HAS_STD_RESULT_OF
template<typename Func, typename ArgType>
struct unary_result_of_select<Func, ArgType, sizeof(has_cxx_eleven_result)> {typedef typename std::result_of<Func(ArgType)>::type type;};
#endif
template<typename Func, typename ArgType> template<typename Func, typename ArgType>
struct result_of<Func(ArgType)> { struct result_of<Func(ArgType)> {
template<typename T> template<typename T>
static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
template<typename T> template<typename T>
static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0); static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0);
static has_none testFunctor(...); #ifdef EIGEN_HAS_STD_RESULT_OF
template<typename T>
static has_cxx_eleven_result testFunctor(T const *, typename std::result_of<T(ArgType)>::type const * = 0);
#endif
static has_none testFunctor(...);
// note that the following indirection is needed for gcc-3.3 // note that the following indirection is needed for gcc-3.3
enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))}; enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
@ -199,13 +209,23 @@ template<typename Func, typename ArgType0, typename ArgType1>
struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_tr1_result)> struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_tr1_result)>
{typedef typename Func::template result<Func(ArgType0,ArgType1)>::type type;}; {typedef typename Func::template result<Func(ArgType0,ArgType1)>::type type;};
#ifdef EIGEN_HAS_STD_RESULT_OF
template<typename Func, typename ArgType0, typename ArgType1>
struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_cxx_eleven_result)>
{typedef typename std::result_of<Func(ArgType0, ArgType1)>::type type;};
#endif
template<typename Func, typename ArgType0, typename ArgType1> template<typename Func, typename ArgType0, typename ArgType1>
struct result_of<Func(ArgType0,ArgType1)> { struct result_of<Func(ArgType0,ArgType1)> {
template<typename T> template<typename T>
static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
template<typename T> template<typename T>
static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0); static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0);
static has_none testFunctor(...); #ifdef EIGEN_HAS_STD_RESULT_OF
template<typename T>
static has_cxx_eleven_result testFunctor(T const *, typename std::result_of<T(ArgType0, ArgType1)>::type const * = 0);
#endif
static has_none testFunctor(...);
// note that the following indirection is needed for gcc-3.3 // note that the following indirection is needed for gcc-3.3
enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))}; enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
@ -284,6 +304,14 @@ template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b =
template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); } template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
#endif #endif
// Integer division with rounding up.
// T is assumed to be an integer type with a>=0, and b>0
template<typename T>
T div_ceil(const T &a, const T &b)
{
return (a+b-1) / b;
}
} // end namespace numext } // end namespace numext
} // end namespace Eigen } // end namespace Eigen

View File

@ -93,7 +93,8 @@
THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH, THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY, IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY,
STORAGE_LAYOUT_DOES_NOT_MATCH STORAGE_LAYOUT_DOES_NOT_MATCH,
ROTATION_BY_ILLEGAL_OFFSET
}; };
}; };

View File

@ -292,7 +292,8 @@ const typename SparseMatrixBase<Derived>::ConstInnerVectorReturnType SparseMatri
* is col-major (resp. row-major). * is col-major (resp. row-major).
*/ */
template<typename Derived> template<typename Derived>
Block<Derived,Dynamic,Dynamic,true> SparseMatrixBase<Derived>::innerVectors(Index outerStart, Index outerSize) typename SparseMatrixBase<Derived>::InnerVectorsReturnType
SparseMatrixBase<Derived>::innerVectors(Index outerStart, Index outerSize)
{ {
return Block<Derived,Dynamic,Dynamic,true>(derived(), return Block<Derived,Dynamic,Dynamic,true>(derived(),
IsRowMajor ? outerStart : 0, IsRowMajor ? 0 : outerStart, IsRowMajor ? outerStart : 0, IsRowMajor ? 0 : outerStart,
@ -304,7 +305,8 @@ Block<Derived,Dynamic,Dynamic,true> SparseMatrixBase<Derived>::innerVectors(Inde
* is col-major (resp. row-major). Read-only. * is col-major (resp. row-major). Read-only.
*/ */
template<typename Derived> template<typename Derived>
const Block<const Derived,Dynamic,Dynamic,true> SparseMatrixBase<Derived>::innerVectors(Index outerStart, Index outerSize) const const typename SparseMatrixBase<Derived>::ConstInnerVectorsReturnType
SparseMatrixBase<Derived>::innerVectors(Index outerStart, Index outerSize) const
{ {
return Block<const Derived,Dynamic,Dynamic,true>(derived(), return Block<const Derived,Dynamic,Dynamic,true>(derived(),
IsRowMajor ? outerStart : 0, IsRowMajor ? 0 : outerStart, IsRowMajor ? outerStart : 0, IsRowMajor ? 0 : outerStart,

View File

@ -467,6 +467,8 @@ class SparseMatrix
if(isCompressed()) if(isCompressed())
return; return;
eigen_internal_assert(m_outerIndex!=0 && m_outerSize>0);
Index oldStart = m_outerIndex[1]; Index oldStart = m_outerIndex[1];
m_outerIndex[1] = m_innerNonZeros[0]; m_outerIndex[1] = m_innerNonZeros[0];
for(Index j=1; j<m_outerSize; ++j) for(Index j=1; j<m_outerSize; ++j)

View File

@ -321,8 +321,10 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
const ConstInnerVectorReturnType innerVector(Index outer) const; const ConstInnerVectorReturnType innerVector(Index outer) const;
// set of inner-vectors // set of inner-vectors
Block<Derived,Dynamic,Dynamic,true> innerVectors(Index outerStart, Index outerSize); typedef Block<Derived,Dynamic,Dynamic,true> InnerVectorsReturnType;
const Block<const Derived,Dynamic,Dynamic,true> innerVectors(Index outerStart, Index outerSize) const; typedef Block<const Derived,Dynamic,Dynamic,true> ConstInnerVectorsReturnType;
InnerVectorsReturnType innerVectors(Index outerStart, Index outerSize);
const ConstInnerVectorsReturnType innerVectors(Index outerStart, Index outerSize) const;
DenseMatrixType toDense() const DenseMatrixType toDense() const
{ {

View File

@ -100,7 +100,8 @@ add_custom_target(doc ALL
COMMAND ${CMAKE_COMMAND} -E copy ${Eigen_BINARY_DIR}/doc/html/group__TopicUnalignedArrayAssert.html ${Eigen_BINARY_DIR}/doc/html/TopicUnalignedArrayAssert.html COMMAND ${CMAKE_COMMAND} -E copy ${Eigen_BINARY_DIR}/doc/html/group__TopicUnalignedArrayAssert.html ${Eigen_BINARY_DIR}/doc/html/TopicUnalignedArrayAssert.html
COMMAND ${CMAKE_COMMAND} -E rename html eigen-doc COMMAND ${CMAKE_COMMAND} -E rename html eigen-doc
COMMAND ${CMAKE_COMMAND} -E remove eigen-doc/eigen-doc.tgz COMMAND ${CMAKE_COMMAND} -E remove eigen-doc/eigen-doc.tgz
COMMAND ${CMAKE_COMMAND} -E tar cfz eigen-doc/eigen-doc.tgz eigen-doc COMMAND ${CMAKE_COMMAND} -E tar cfz eigen-doc.tgz eigen-doc
COMMAND ${CMAKE_COMMAND} -E rename eigen-doc.tgz eigen-doc/eigen-doc.tgz
COMMAND ${CMAKE_COMMAND} -E rename eigen-doc html COMMAND ${CMAKE_COMMAND} -E rename eigen-doc html
WORKING_DIRECTORY ${Eigen_BINARY_DIR}/doc) WORKING_DIRECTORY ${Eigen_BINARY_DIR}/doc)

View File

@ -380,10 +380,14 @@ void test_cholesky()
CALL_SUBTEST_3( cholesky_definiteness(Matrix2d()) ); CALL_SUBTEST_3( cholesky_definiteness(Matrix2d()) );
CALL_SUBTEST_4( cholesky(Matrix3f()) ); CALL_SUBTEST_4( cholesky(Matrix3f()) );
CALL_SUBTEST_5( cholesky(Matrix4d()) ); CALL_SUBTEST_5( cholesky(Matrix4d()) );
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE);
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE);
CALL_SUBTEST_2( cholesky(MatrixXd(s,s)) ); CALL_SUBTEST_2( cholesky(MatrixXd(s,s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2); s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2);
CALL_SUBTEST_6( cholesky_cplx(MatrixXcd(s,s)) ); CALL_SUBTEST_6( cholesky_cplx(MatrixXcd(s,s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
} }
CALL_SUBTEST_4( cholesky_verify_assert<Matrix3f>() ); CALL_SUBTEST_4( cholesky_verify_assert<Matrix3f>() );
@ -395,6 +399,5 @@ void test_cholesky()
CALL_SUBTEST_9( LLT<MatrixXf>(10) ); CALL_SUBTEST_9( LLT<MatrixXf>(10) );
CALL_SUBTEST_9( LDLT<MatrixXf>(10) ); CALL_SUBTEST_9( LDLT<MatrixXf>(10) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
TEST_SET_BUT_UNUSED_VARIABLE(nb_temporaries) TEST_SET_BUT_UNUSED_VARIABLE(nb_temporaries)
} }

View File

@ -108,6 +108,7 @@ void test_eigensolver_complex()
CALL_SUBTEST_2( eigensolver(MatrixXcd(s,s)) ); CALL_SUBTEST_2( eigensolver(MatrixXcd(s,s)) );
CALL_SUBTEST_3( eigensolver(Matrix<std::complex<float>, 1, 1>()) ); CALL_SUBTEST_3( eigensolver(Matrix<std::complex<float>, 1, 1>()) );
CALL_SUBTEST_4( eigensolver(Matrix3f()) ); CALL_SUBTEST_4( eigensolver(Matrix3f()) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
} }
CALL_SUBTEST_1( eigensolver_verify_assert(Matrix4cf()) ); CALL_SUBTEST_1( eigensolver_verify_assert(Matrix4cf()) );
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/4); s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/4);

View File

@ -93,6 +93,7 @@ void test_eigensolver_generic()
CALL_SUBTEST_1( eigensolver(Matrix4f()) ); CALL_SUBTEST_1( eigensolver(Matrix4f()) );
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/4); s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/4);
CALL_SUBTEST_2( eigensolver(MatrixXd(s,s)) ); CALL_SUBTEST_2( eigensolver(MatrixXd(s,s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
// some trivial but implementation-wise tricky cases // some trivial but implementation-wise tricky cases
CALL_SUBTEST_2( eigensolver(MatrixXd(1,1)) ); CALL_SUBTEST_2( eigensolver(MatrixXd(1,1)) );

View File

@ -154,15 +154,13 @@ void test_eigensolver_selfadjoint()
CALL_SUBTEST_13( selfadjointeigensolver(Matrix3f()) ); CALL_SUBTEST_13( selfadjointeigensolver(Matrix3f()) );
CALL_SUBTEST_13( selfadjointeigensolver(Matrix3d()) ); CALL_SUBTEST_13( selfadjointeigensolver(Matrix3d()) );
CALL_SUBTEST_2( selfadjointeigensolver(Matrix4d()) ); CALL_SUBTEST_2( selfadjointeigensolver(Matrix4d()) );
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/4);
CALL_SUBTEST_3( selfadjointeigensolver(MatrixXf(s,s)) );
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/4);
CALL_SUBTEST_4( selfadjointeigensolver(MatrixXd(s,s)) );
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/4);
CALL_SUBTEST_5( selfadjointeigensolver(MatrixXcd(s,s)) );
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/4); s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/4);
CALL_SUBTEST_3( selfadjointeigensolver(MatrixXf(s,s)) );
CALL_SUBTEST_4( selfadjointeigensolver(MatrixXd(s,s)) );
CALL_SUBTEST_5( selfadjointeigensolver(MatrixXcd(s,s)) );
CALL_SUBTEST_9( selfadjointeigensolver(Matrix<std::complex<double>,Dynamic,Dynamic,RowMajor>(s,s)) ); CALL_SUBTEST_9( selfadjointeigensolver(Matrix<std::complex<double>,Dynamic,Dynamic,RowMajor>(s,s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
// some trivial but implementation-wise tricky cases // some trivial but implementation-wise tricky cases
CALL_SUBTEST_4( selfadjointeigensolver(MatrixXd(1,1)) ); CALL_SUBTEST_4( selfadjointeigensolver(MatrixXd(1,1)) );

View File

@ -102,12 +102,16 @@ void test_inverse()
CALL_SUBTEST_3( inverse(Matrix3f()) ); CALL_SUBTEST_3( inverse(Matrix3f()) );
CALL_SUBTEST_4( inverse(Matrix4f()) ); CALL_SUBTEST_4( inverse(Matrix4f()) );
CALL_SUBTEST_4( inverse(Matrix<float,4,4,DontAlign>()) ); CALL_SUBTEST_4( inverse(Matrix<float,4,4,DontAlign>()) );
s = internal::random<int>(50,320); s = internal::random<int>(50,320);
CALL_SUBTEST_5( inverse(MatrixXf(s,s)) ); CALL_SUBTEST_5( inverse(MatrixXf(s,s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
s = internal::random<int>(25,100); s = internal::random<int>(25,100);
CALL_SUBTEST_6( inverse(MatrixXcd(s,s)) ); CALL_SUBTEST_6( inverse(MatrixXcd(s,s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
CALL_SUBTEST_7( inverse(Matrix4d()) ); CALL_SUBTEST_7( inverse(Matrix4d()) );
CALL_SUBTEST_7( inverse(Matrix<double,4,4,DontAlign>()) ); CALL_SUBTEST_7( inverse(Matrix<double,4,4,DontAlign>()) );
} }
TEST_SET_BUT_UNUSED_VARIABLE(s)
} }

View File

@ -64,8 +64,7 @@ void test_product_large()
#endif #endif
// Regression test for bug 714: // Regression test for bug 714:
#ifdef EIGEN_HAS_OPENMP #if defined EIGEN_HAS_OPENMP
std::cout << "Testing omp_set_dynamic(1)\n";
omp_set_dynamic(1); omp_set_dynamic(1);
for(int i = 0; i < g_repeat; i++) { for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST_6( product(Matrix<float,Dynamic,Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_6( product(Matrix<float,Dynamic,Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );

View File

@ -129,11 +129,12 @@ void test_product_notemporary()
for(int i = 0; i < g_repeat; i++) { for(int i = 0; i < g_repeat; i++) {
s = internal::random<int>(16,EIGEN_TEST_MAX_SIZE); s = internal::random<int>(16,EIGEN_TEST_MAX_SIZE);
CALL_SUBTEST_1( product_notemporary(MatrixXf(s, s)) ); CALL_SUBTEST_1( product_notemporary(MatrixXf(s, s)) );
s = internal::random<int>(16,EIGEN_TEST_MAX_SIZE);
CALL_SUBTEST_2( product_notemporary(MatrixXd(s, s)) ); CALL_SUBTEST_2( product_notemporary(MatrixXd(s, s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
s = internal::random<int>(16,EIGEN_TEST_MAX_SIZE/2); s = internal::random<int>(16,EIGEN_TEST_MAX_SIZE/2);
CALL_SUBTEST_3( product_notemporary(MatrixXcf(s,s)) ); CALL_SUBTEST_3( product_notemporary(MatrixXcf(s,s)) );
s = internal::random<int>(16,EIGEN_TEST_MAX_SIZE/2);
CALL_SUBTEST_4( product_notemporary(MatrixXcd(s,s)) ); CALL_SUBTEST_4( product_notemporary(MatrixXcd(s,s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
} }
} }

View File

@ -67,14 +67,21 @@ void test_product_selfadjoint()
CALL_SUBTEST_1( product_selfadjoint(Matrix<float, 1, 1>()) ); CALL_SUBTEST_1( product_selfadjoint(Matrix<float, 1, 1>()) );
CALL_SUBTEST_2( product_selfadjoint(Matrix<float, 2, 2>()) ); CALL_SUBTEST_2( product_selfadjoint(Matrix<float, 2, 2>()) );
CALL_SUBTEST_3( product_selfadjoint(Matrix3d()) ); CALL_SUBTEST_3( product_selfadjoint(Matrix3d()) );
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2); s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2);
CALL_SUBTEST_4( product_selfadjoint(MatrixXcf(s, s)) ); CALL_SUBTEST_4( product_selfadjoint(MatrixXcf(s, s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2); s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2);
CALL_SUBTEST_5( product_selfadjoint(MatrixXcd(s,s)) ); CALL_SUBTEST_5( product_selfadjoint(MatrixXcd(s,s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE); s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE);
CALL_SUBTEST_6( product_selfadjoint(MatrixXd(s,s)) ); CALL_SUBTEST_6( product_selfadjoint(MatrixXd(s,s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE); s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE);
CALL_SUBTEST_7( product_selfadjoint(Matrix<float,Dynamic,Dynamic,RowMajor>(s,s)) ); CALL_SUBTEST_7( product_selfadjoint(Matrix<float,Dynamic,Dynamic,RowMajor>(s,s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
} }
TEST_SET_BUT_UNUSED_VARIABLE(s)
} }

View File

@ -125,11 +125,12 @@ void test_product_syrk()
int s; int s;
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE); s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE);
CALL_SUBTEST_1( syrk(MatrixXf(s, s)) ); CALL_SUBTEST_1( syrk(MatrixXf(s, s)) );
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE);
CALL_SUBTEST_2( syrk(MatrixXd(s, s)) ); CALL_SUBTEST_2( syrk(MatrixXd(s, s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2); s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2);
CALL_SUBTEST_3( syrk(MatrixXcf(s, s)) ); CALL_SUBTEST_3( syrk(MatrixXcf(s, s)) );
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2);
CALL_SUBTEST_4( syrk(MatrixXcd(s, s)) ); CALL_SUBTEST_4( syrk(MatrixXcd(s, s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
} }
} }

View File

@ -78,12 +78,14 @@ void test_product_trmv()
CALL_SUBTEST_1( trmv(Matrix<float, 1, 1>()) ); CALL_SUBTEST_1( trmv(Matrix<float, 1, 1>()) );
CALL_SUBTEST_2( trmv(Matrix<float, 2, 2>()) ); CALL_SUBTEST_2( trmv(Matrix<float, 2, 2>()) );
CALL_SUBTEST_3( trmv(Matrix3d()) ); CALL_SUBTEST_3( trmv(Matrix3d()) );
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2); s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2);
CALL_SUBTEST_4( trmv(MatrixXcf(s,s)) ); CALL_SUBTEST_4( trmv(MatrixXcf(s,s)) );
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2);
CALL_SUBTEST_5( trmv(MatrixXcd(s,s)) ); CALL_SUBTEST_5( trmv(MatrixXcd(s,s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE); s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE);
CALL_SUBTEST_6( trmv(Matrix<float,Dynamic,Dynamic,RowMajor>(s, s)) ); CALL_SUBTEST_6( trmv(Matrix<float,Dynamic,Dynamic,RowMajor>(s, s)) );
TEST_SET_BUT_UNUSED_VARIABLE(s)
} }
TEST_SET_BUT_UNUSED_VARIABLE(s);
} }

View File

@ -18,8 +18,6 @@
VERIFY(threw && "should have thrown bad_alloc: " #a); \ VERIFY(threw && "should have thrown bad_alloc: " #a); \
} }
typedef DenseIndex Index;
template<typename MatrixType> template<typename MatrixType>
void triggerMatrixBadAlloc(Index rows, Index cols) void triggerMatrixBadAlloc(Index rows, Index cols)
{ {