mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-02 00:34:14 +08:00
Merged eigen/eigen into default
This commit is contained in:
commit
7ff26ddcbb
@ -972,7 +972,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
|||||||
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
|
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
|
||||||
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
|
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
|
||||||
internal::prefetch(blA+(3*K+16)*LhsProgress); \
|
internal::prefetch(blA+(3*K+16)*LhsProgress); \
|
||||||
if (EIGEN_ARCH_ARM) internal::prefetch(blB+(4*K+16)*RhsProgress); /* Bug 953 */ \
|
if (EIGEN_ARCH_ARM) { internal::prefetch(blB+(4*K+16)*RhsProgress); } /* Bug 953 */ \
|
||||||
traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
|
traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
|
||||||
traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
|
traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
|
||||||
traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
|
traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \
|
||||||
|
@ -786,30 +786,38 @@ class SparseMatrix
|
|||||||
EIGEN_DBG_SPARSE(
|
EIGEN_DBG_SPARSE(
|
||||||
s << "Nonzero entries:\n";
|
s << "Nonzero entries:\n";
|
||||||
if(m.isCompressed())
|
if(m.isCompressed())
|
||||||
|
{
|
||||||
for (Index i=0; i<m.nonZeros(); ++i)
|
for (Index i=0; i<m.nonZeros(); ++i)
|
||||||
s << "(" << m.m_data.value(i) << "," << m.m_data.index(i) << ") ";
|
s << "(" << m.m_data.value(i) << "," << m.m_data.index(i) << ") ";
|
||||||
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
for (Index i=0; i<m.outerSize(); ++i)
|
for (Index i=0; i<m.outerSize(); ++i)
|
||||||
{
|
{
|
||||||
Index p = m.m_outerIndex[i];
|
Index p = m.m_outerIndex[i];
|
||||||
Index pe = m.m_outerIndex[i]+m.m_innerNonZeros[i];
|
Index pe = m.m_outerIndex[i]+m.m_innerNonZeros[i];
|
||||||
Index k=p;
|
Index k=p;
|
||||||
for (; k<pe; ++k)
|
for (; k<pe; ++k) {
|
||||||
s << "(" << m.m_data.value(k) << "," << m.m_data.index(k) << ") ";
|
s << "(" << m.m_data.value(k) << "," << m.m_data.index(k) << ") ";
|
||||||
for (; k<m.m_outerIndex[i+1]; ++k)
|
}
|
||||||
|
for (; k<m.m_outerIndex[i+1]; ++k) {
|
||||||
s << "(_,_) ";
|
s << "(_,_) ";
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
s << std::endl;
|
s << std::endl;
|
||||||
s << std::endl;
|
s << std::endl;
|
||||||
s << "Outer pointers:\n";
|
s << "Outer pointers:\n";
|
||||||
for (Index i=0; i<m.outerSize(); ++i)
|
for (Index i=0; i<m.outerSize(); ++i) {
|
||||||
s << m.m_outerIndex[i] << " ";
|
s << m.m_outerIndex[i] << " ";
|
||||||
|
}
|
||||||
s << " $" << std::endl;
|
s << " $" << std::endl;
|
||||||
if(!m.isCompressed())
|
if(!m.isCompressed())
|
||||||
{
|
{
|
||||||
s << "Inner non zeros:\n";
|
s << "Inner non zeros:\n";
|
||||||
for (Index i=0; i<m.outerSize(); ++i)
|
for (Index i=0; i<m.outerSize(); ++i) {
|
||||||
s << m.m_innerNonZeros[i] << " ";
|
s << m.m_innerNonZeros[i] << " ";
|
||||||
|
}
|
||||||
s << " $" << std::endl;
|
s << " $" << std::endl;
|
||||||
}
|
}
|
||||||
s << std::endl;
|
s << std::endl;
|
||||||
|
@ -114,12 +114,12 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const
|
|||||||
KMADD(c0, a1, b10, t0) \
|
KMADD(c0, a1, b10, t0) \
|
||||||
KMADD(c1, a1, b11, t1) \
|
KMADD(c1, a1, b11, t1) \
|
||||||
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
|
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
|
||||||
if(RK==4) KMADD(c0, a2, b20, t0) \
|
if(RK==4){ KMADD(c0, a2, b20, t0) }\
|
||||||
if(RK==4) KMADD(c1, a2, b21, t1) \
|
if(RK==4){ KMADD(c1, a2, b21, t1) }\
|
||||||
if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \
|
if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
|
||||||
if(RK==4) KMADD(c0, a3, b30, t0) \
|
if(RK==4){ KMADD(c0, a3, b30, t0) }\
|
||||||
if(RK==4) KMADD(c1, a3, b31, t1) \
|
if(RK==4){ KMADD(c1, a3, b31, t1) }\
|
||||||
if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \
|
if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
|
||||||
pstore(C0+i+(I)*PacketSize, c0); \
|
pstore(C0+i+(I)*PacketSize, c0); \
|
||||||
pstore(C1+i+(I)*PacketSize, c1)
|
pstore(C1+i+(I)*PacketSize, c1)
|
||||||
|
|
||||||
@ -131,6 +131,7 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const
|
|||||||
prefetch((A1+i+(5)*PacketSize));
|
prefetch((A1+i+(5)*PacketSize));
|
||||||
if(RK==4) prefetch((A2+i+(5)*PacketSize));
|
if(RK==4) prefetch((A2+i+(5)*PacketSize));
|
||||||
if(RK==4) prefetch((A3+i+(5)*PacketSize));
|
if(RK==4) prefetch((A3+i+(5)*PacketSize));
|
||||||
|
|
||||||
WORK(0);
|
WORK(0);
|
||||||
WORK(1);
|
WORK(1);
|
||||||
WORK(2);
|
WORK(2);
|
||||||
@ -208,10 +209,10 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const
|
|||||||
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
|
a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
|
||||||
KMADD(c0, a1, b10, t0) \
|
KMADD(c0, a1, b10, t0) \
|
||||||
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
|
a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
|
||||||
if(RK==4) KMADD(c0, a2, b20, t0) \
|
if(RK==4){ KMADD(c0, a2, b20, t0) }\
|
||||||
if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \
|
if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
|
||||||
if(RK==4) KMADD(c0, a3, b30, t0) \
|
if(RK==4){ KMADD(c0, a3, b30, t0) }\
|
||||||
if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \
|
if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
|
||||||
pstore(C0+i+(I)*PacketSize, c0);
|
pstore(C0+i+(I)*PacketSize, c0);
|
||||||
|
|
||||||
// agressive vectorization and peeling
|
// agressive vectorization and peeling
|
||||||
|
@ -90,7 +90,7 @@ inline void on_temporary_creation(long int size) {
|
|||||||
#define VERIFY_EVALUATION_COUNT(XPR,N) {\
|
#define VERIFY_EVALUATION_COUNT(XPR,N) {\
|
||||||
nb_temporaries = 0; \
|
nb_temporaries = 0; \
|
||||||
XPR; \
|
XPR; \
|
||||||
if(nb_temporaries!=N) std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; \
|
if(nb_temporaries!=N) { std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; }\
|
||||||
VERIFY( (#XPR) && nb_temporaries==N ); \
|
VERIFY( (#XPR) && nb_temporaries==N ); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user