mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-06 02:34:05 +08:00
bug #936, patch 1/3: some cleanup and renaming for consistency.
This commit is contained in:
parent
759bd92a85
commit
9f99f61e69
@ -22,8 +22,8 @@ namespace internal {
|
|||||||
#define EIGEN_HAS_FUSED_MADD 1
|
#define EIGEN_HAS_FUSED_MADD 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef EIGEN_HAS_FUSE_CJMADD
|
#ifndef EIGEN_HAS_FUSED_CJMADD
|
||||||
#define EIGEN_HAS_FUSE_CJMADD 1
|
#define EIGEN_HAS_FUSED_CJMADD
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
|
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
|
||||||
|
@ -24,8 +24,8 @@ namespace internal {
|
|||||||
#define EIGEN_HAS_FUSED_MADD 1
|
#define EIGEN_HAS_FUSED_MADD 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef EIGEN_HAS_FUSE_CJMADD
|
#ifndef EIGEN_HAS_FUSED_CJMADD
|
||||||
#define EIGEN_HAS_FUSE_CJMADD 1
|
#define EIGEN_HAS_FUSED_CJMADD
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// FIXME NEON has 16 quad registers, but since the current register allocator
|
// FIXME NEON has 16 quad registers, but since the current register allocator
|
||||||
|
@ -120,8 +120,8 @@ inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
|
|||||||
computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
|
computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef EIGEN_HAS_FUSE_CJMADD
|
#ifdef EIGEN_HAS_FUSED_CJMADD
|
||||||
#define MADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
|
#define CJMADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
|
||||||
#else
|
#else
|
||||||
|
|
||||||
// FIXME (a bit overkill maybe ?)
|
// FIXME (a bit overkill maybe ?)
|
||||||
@ -146,8 +146,8 @@ inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
|
|||||||
gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
|
gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T);
|
#define CJMADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T);
|
||||||
// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = padd(C,T);
|
// #define CJMADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = padd(C,T);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Vectorization logic
|
/* Vectorization logic
|
||||||
@ -1402,13 +1402,13 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,mr,nr,ConjugateLhs,ConjugateRhs>
|
|||||||
|
|
||||||
B_0 = blB[0];
|
B_0 = blB[0];
|
||||||
B_1 = blB[1];
|
B_1 = blB[1];
|
||||||
MADD(cj,A0,B_0,C0, B_0);
|
CJMADD(cj,A0,B_0,C0, B_0);
|
||||||
MADD(cj,A0,B_1,C1, B_1);
|
CJMADD(cj,A0,B_1,C1, B_1);
|
||||||
|
|
||||||
B_0 = blB[2];
|
B_0 = blB[2];
|
||||||
B_1 = blB[3];
|
B_1 = blB[3];
|
||||||
MADD(cj,A0,B_0,C2, B_0);
|
CJMADD(cj,A0,B_0,C2, B_0);
|
||||||
MADD(cj,A0,B_1,C3, B_1);
|
CJMADD(cj,A0,B_1,C3, B_1);
|
||||||
|
|
||||||
blB += 4;
|
blB += 4;
|
||||||
}
|
}
|
||||||
@ -1434,7 +1434,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,mr,nr,ConjugateLhs,ConjugateRhs>
|
|||||||
{
|
{
|
||||||
LhsScalar A0 = blA[k];
|
LhsScalar A0 = blA[k];
|
||||||
RhsScalar B_0 = blB[k];
|
RhsScalar B_0 = blB[k];
|
||||||
MADD(cj, A0, B_0, C0, B_0);
|
CJMADD(cj, A0, B_0, C0, B_0);
|
||||||
}
|
}
|
||||||
res[(j2+0)*resStride + i] += alpha*C0;
|
res[(j2+0)*resStride + i] += alpha*C0;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user