mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 03:39:01 +08:00
Small cleanup: Get rid of the macros EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD and CJMADD, which were effectively unused, apart from on x86, where the change results in identically performing code.
This commit is contained in:
parent
52a5f98212
commit
bffd267d17
@ -22,10 +22,6 @@ namespace internal {
|
|||||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
|
||||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
|
// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
|
||||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
||||||
|
@ -28,10 +28,6 @@ namespace internal {
|
|||||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
|
||||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
||||||
#endif
|
#endif
|
||||||
|
@ -24,10 +24,6 @@ namespace internal {
|
|||||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
|
||||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||||
#if EIGEN_ARCH_ARM64
|
#if EIGEN_ARCH_ARM64
|
||||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
||||||
|
@ -22,10 +22,6 @@ namespace internal
|
|||||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
|
||||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
||||||
|
|
||||||
template <typename Scalar, int SVEVectorLength>
|
template <typename Scalar, int SVEVectorLength>
|
||||||
|
@ -22,10 +22,6 @@ namespace internal {
|
|||||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
|
||||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
||||||
#endif
|
#endif
|
||||||
|
@ -349,36 +349,6 @@ inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_
|
|||||||
computeProductBlockingSizes<LhsScalar,RhsScalar,1,Index>(k, m, n, num_threads);
|
computeProductBlockingSizes<LhsScalar,RhsScalar,1,Index>(k, m, n, num_threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
|
||||||
#define CJMADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
|
|
||||||
#else
|
|
||||||
|
|
||||||
// FIXME (a bit overkill maybe ?)
|
|
||||||
|
|
||||||
template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
|
|
||||||
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
|
|
||||||
{
|
|
||||||
c = cj.pmadd(a,b,c);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
|
|
||||||
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
|
|
||||||
{
|
|
||||||
t = b; t = cj.pmul(a,t); c = padd(c,t);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename CJ, typename A, typename B, typename C, typename T>
|
|
||||||
EIGEN_STRONG_INLINE void gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t)
|
|
||||||
{
|
|
||||||
gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define CJMADD(CJ,A,B,C,T) gebp_madd(CJ,A,B,C,T);
|
|
||||||
// #define CJMADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = padd(C,T);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
template <typename RhsPacket, typename RhsPacketx4, int registers_taken>
|
template <typename RhsPacket, typename RhsPacketx4, int registers_taken>
|
||||||
struct RhsPanelHelper {
|
struct RhsPanelHelper {
|
||||||
private:
|
private:
|
||||||
@ -2060,14 +2030,14 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
|||||||
|
|
||||||
B_0 = blB[0];
|
B_0 = blB[0];
|
||||||
B_1 = blB[1];
|
B_1 = blB[1];
|
||||||
CJMADD(cj,A0,B_0,C0, B_0);
|
C0 = cj.pmadd(A0,B_0,C0);
|
||||||
CJMADD(cj,A0,B_1,C1, B_1);
|
C1 = cj.pmadd(A0,B_1,C1);
|
||||||
|
|
||||||
B_0 = blB[2];
|
B_0 = blB[2];
|
||||||
B_1 = blB[3];
|
B_1 = blB[3];
|
||||||
CJMADD(cj,A0,B_0,C2, B_0);
|
C2 = cj.pmadd(A0,B_0,C2);
|
||||||
CJMADD(cj,A0,B_1,C3, B_1);
|
C3 = cj.pmadd(A0,B_1,C3);
|
||||||
|
|
||||||
blB += 4;
|
blB += 4;
|
||||||
}
|
}
|
||||||
res(i, j2 + 0) += alpha * C0;
|
res(i, j2 + 0) += alpha * C0;
|
||||||
@ -2092,7 +2062,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
|||||||
{
|
{
|
||||||
LhsScalar A0 = blA[k];
|
LhsScalar A0 = blA[k];
|
||||||
RhsScalar B_0 = blB[k];
|
RhsScalar B_0 = blB[k];
|
||||||
CJMADD(cj, A0, B_0, C0, B_0);
|
C0 = cj.pmadd(A0, B_0, C0);
|
||||||
}
|
}
|
||||||
res(i, j2) += alpha * C0;
|
res(i, j2) += alpha * C0;
|
||||||
}
|
}
|
||||||
@ -2101,8 +2071,6 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#undef CJMADD
|
|
||||||
|
|
||||||
// pack a block of the lhs
|
// pack a block of the lhs
|
||||||
// The traversal is as follow (mr==4):
|
// The traversal is as follow (mr==4):
|
||||||
// 0 4 8 12 ...
|
// 0 4 8 12 ...
|
||||||
|
Loading…
x
Reference in New Issue
Block a user