diff --git a/CMakeLists.txt b/CMakeLists.txt index 13f9c8f9d..2c1ae428e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -227,7 +227,7 @@ if(NOT MSVC) option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF) if(EIGEN_TEST_NEON) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a8") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mfloat-abi=softfp") message(STATUS "Enabling NEON in tests/examples") endif() diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 967a07df5..e48c064ce 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -290,7 +290,8 @@ template EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& template struct protate_impl { - static Packet run(const Packet& a) { return a; } + // Empty so attempts to use this unimplemented path will fail to compile. + // Only specializations of this template should be used. }; /** \internal \returns a packet with the coefficients rotated to the right in little-endian convention, @@ -299,7 +300,6 @@ struct protate_impl */ template EIGEN_DEVICE_FUNC inline Packet protate(const Packet& a) { - EIGEN_STATIC_ASSERT(offset < unpacket_traits::size, ROTATION_BY_ILLEGAL_OFFSET); return offset ? protate_impl::run(a) : a; } diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index e9af45f22..8dd1e1370 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -76,12 +76,12 @@ typedef uint32x4_t Packet4ui; template<> struct packet_traits : default_packet_traits { typedef Packet4f type; - typedef Packet2f half; + typedef Packet4f half; // Packet2f intrinsics not implemented yet enum { Vectorizable = 1, AlignedOnScalar = 1, size = 4, - HasHalfPacket=1, + HasHalfPacket=0, // Packet2f intrinsics not implemented yet HasDiv = 1, // FIXME check the Has* @@ -95,12 +95,12 @@ template<> struct packet_traits : default_packet_traits template<> struct packet_traits : default_packet_traits { typedef Packet4i type; - typedef Packet2i half; + typedef Packet4i half; // Packet2i intrinsics not implemented yet enum { Vectorizable = 1, AlignedOnScalar = 1, size=4, - HasHalfPacket=1 + HasHalfPacket=0 // Packet2i intrinsics not implemented yet // FIXME check the Has* }; }; diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 06c7f362e..0fc7289be 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -155,7 +155,7 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads // In unit tests we do not want to use extra large matrices, // so we reduce the cache size to check the blocking strategy is not flawed #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS - l1 = 4*1024; + l1 = 9*1024; l2 = 32*1024; l3 = 512*1024; #endif @@ -164,7 +164,7 @@ void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads // Perhaps it would make more sense to consider k*n*m?? // Note that for very tiny problem, this function should be bypassed anyway // because we use the coefficient-based implementation for them. - if(std::max(k,std::max(m,n))<48) + if((std::max)(k,(std::max)(m,n))<48) return; typedef typename Traits::ResScalar ResScalar; @@ -800,6 +800,80 @@ protected: conj_helper cj; }; +// helper for the rotating kernel below +template +struct PossiblyRotatingKernelHelper +{ + // default implementation, not rotating + + typedef typename GebpKernel::Traits Traits; + typedef typename Traits::RhsScalar RhsScalar; + typedef typename Traits::RhsPacket RhsPacket; + typedef typename Traits::AccPacket AccPacket; + + const Traits& traits; + PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {} + + + template + void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const + { + traits.loadRhs(from + (Index+4*K)*Traits::RhsProgress, to); + } + + void unrotateResult(AccPacket&, + AccPacket&, + AccPacket&, + AccPacket&) + { + } +}; + +// rotating implementation +template +struct PossiblyRotatingKernelHelper +{ + typedef typename GebpKernel::Traits Traits; + typedef typename Traits::RhsScalar RhsScalar; + typedef typename Traits::RhsPacket RhsPacket; + typedef typename Traits::AccPacket AccPacket; + + const Traits& traits; + PossiblyRotatingKernelHelper(const Traits& t) : traits(t) {} + + template + void loadOrRotateRhs(RhsPacket& to, const RhsScalar* from) const + { + if (Index == 0) { + to = pload(from + 4*K*Traits::RhsProgress); + } else { + EIGEN_ASM_COMMENT("Do not reorder code, we're very tight on registers"); + to = protate<1>(to); + } + } + + void unrotateResult(AccPacket& res0, + AccPacket& res1, + AccPacket& res2, + AccPacket& res3) + { + PacketBlock resblock; + resblock.packet[0] = res0; + resblock.packet[1] = res1; + resblock.packet[2] = res2; + resblock.packet[3] = res3; + ptranspose(resblock); + resblock.packet[3] = protate<1>(resblock.packet[3]); + resblock.packet[2] = protate<2>(resblock.packet[2]); + resblock.packet[1] = protate<3>(resblock.packet[1]); + ptranspose(resblock); + res0 = resblock.packet[0]; + res1 = resblock.packet[1]; + res2 = resblock.packet[2]; + res3 = resblock.packet[3]; + } +}; + /* optimized GEneral packed Block * packed Panel product kernel * * Mixing type logic: C += A * B @@ -833,6 +907,16 @@ struct gebp_kernel ResPacketSize = Traits::ResPacketSize }; + + static const bool UseRotatingKernel = + EIGEN_ARCH_ARM && + internal::is_same::value && + internal::is_same::value && + internal::is_same::value && + Traits::LhsPacketSize == 4 && + Traits::RhsPacketSize == 4 && + Traits::ResPacketSize == 4; + EIGEN_DONT_INLINE void operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha, @@ -866,6 +950,8 @@ void gebp_kernel=3*Traits::LhsProgress) { + PossiblyRotatingKernelHelper possiblyRotatingKernelHelper(traits); + // loops on each largest micro horizontal panel of lhs (3*Traits::LhsProgress x depth) for(Index i=0; i(&blB[(0+4*K)*RhsProgress]); \ - } else { \ - EIGEN_ASM_COMMENT("Do not reorder code, we're very tight on registers"); \ - B_0 = protate<1>(B_0); \ - } \ - } else { \ - EIGEN_GEBP_ONESTEP_LOADRHS_NONROTATING(K,N); \ - } \ - } while (false) -#else -#define EIGEN_GEBP_ONESTEP_LOADRHS(K,N) \ - EIGEN_GEBP_ONESTEP_LOADRHS_NONROTATING(K,N) -#endif - #define EIGEN_GEBP_ONESTEP(K) \ do { \ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \ @@ -947,19 +1002,19 @@ void gebp_kernel(B_0, blB); \ traits.madd(A0, B_0, C0, T0); \ traits.madd(A1, B_0, C4, T0); \ traits.madd(A2, B_0, C8, B_0); \ - EIGEN_GEBP_ONESTEP_LOADRHS(K, 1); \ + possiblyRotatingKernelHelper.template loadOrRotateRhs(B_0, blB); \ traits.madd(A0, B_0, C1, T0); \ traits.madd(A1, B_0, C5, T0); \ traits.madd(A2, B_0, C9, B_0); \ - EIGEN_GEBP_ONESTEP_LOADRHS(K, 2); \ + possiblyRotatingKernelHelper.template loadOrRotateRhs(B_0, blB); \ traits.madd(A0, B_0, C2, T0); \ traits.madd(A1, B_0, C6, T0); \ traits.madd(A2, B_0, C10, B_0); \ - EIGEN_GEBP_ONESTEP_LOADRHS(K, 3); \ + possiblyRotatingKernelHelper.template loadOrRotateRhs(B_0, blB); \ traits.madd(A0, B_0, C3 , T0); \ traits.madd(A1, B_0, C7, T0); \ traits.madd(A2, B_0, C11, B_0); \ @@ -992,34 +1047,10 @@ void gebp_kernel resblock; \ - resblock.packet[0] = res0; \ - resblock.packet[1] = res1; \ - resblock.packet[2] = res2; \ - resblock.packet[3] = res3; \ - ptranspose(resblock); \ - resblock.packet[3] = protate<1>(resblock.packet[3]); \ - resblock.packet[2] = protate<2>(resblock.packet[2]); \ - resblock.packet[1] = protate<3>(resblock.packet[1]); \ - ptranspose(resblock); \ - res0 = resblock.packet[0]; \ - res1 = resblock.packet[1]; \ - res2 = resblock.packet[2]; \ - res3 = resblock.packet[3]; \ - } while (false) - - EIGEN_GEBP_UNROTATE_RESULT(C0, C1, C2, C3); - EIGEN_GEBP_UNROTATE_RESULT(C4, C5, C6, C7); - EIGEN_GEBP_UNROTATE_RESULT(C8, C9, C10, C11); - } -#endif + possiblyRotatingKernelHelper.unrotateResult(C0, C1, C2, C3); + possiblyRotatingKernelHelper.unrotateResult(C4, C5, C6, C7); + possiblyRotatingKernelHelper.unrotateResult(C8, C9, C10, C11); ResPacket R0, R1, R2; ResPacket alphav = pset1(alpha); diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 5e16b775b..7538a0633 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -93,8 +93,7 @@ THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG, IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY, - STORAGE_LAYOUT_DOES_NOT_MATCH, - ROTATION_BY_ILLEGAL_OFFSET + STORAGE_LAYOUT_DOES_NOT_MATCH }; }; diff --git a/blas/xerbla.cpp b/blas/xerbla.cpp index 8775b88cd..c373e8699 100644 --- a/blas/xerbla.cpp +++ b/blas/xerbla.cpp @@ -1,7 +1,7 @@ #include -#if (defined __GNUC__) && (!defined __MINGW32__) +#if (defined __GNUC__) && (!defined __MINGW32__) && (!defined __CYGWIN__) #define EIGEN_WEAK_LINKING __attribute__ ((weak)) #else #define EIGEN_WEAK_LINKING diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index b4ab95dbc..f8aec777d 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -502,6 +502,10 @@ macro(ei_set_build_string) set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-64bit) endif() + if(EIGEN_TEST_CXX11) + set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-cxx11) + endif() + if(EIGEN_BUILD_STRING_SUFFIX) set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-${EIGEN_BUILD_STRING_SUFFIX}) endif() diff --git a/scripts/buildtests.in b/scripts/buildtests.in index 7026373cf..d2fd10276 100755 --- a/scripts/buildtests.in +++ b/scripts/buildtests.in @@ -14,9 +14,9 @@ targets_to_make=`echo "$TESTSLIST" | egrep "$1" | xargs echo` if [ -n "${EIGEN_MAKE_ARGS:+x}" ] then - make $targets_to_make ${EIGEN_MAKE_ARGS} + @CMAKE_MAKE_PROGRAM@ $targets_to_make ${EIGEN_MAKE_ARGS} else - make $targets_to_make + @CMAKE_MAKE_PROGRAM@ $targets_to_make @EIGEN_TEST_BUILD_FLAGS@ fi exit $? diff --git a/test/main.h b/test/main.h index 1f937690c..ecf0c6924 100644 --- a/test/main.h +++ b/test/main.h @@ -42,13 +42,19 @@ #include #include #include +#if __cplusplus >= 201103L +#include +#ifdef EIGEN_USE_THREADS +#include +#endif +#endif // To test that all calls from Eigen code to std::min() and std::max() are // protected by parenthesis against macro expansion, the min()/max() macros // are defined here and any not-parenthesized min/max call will cause a // compiler error. -//#define min(A,B) please_protect_your_min_with_parentheses -//#define max(A,B) please_protect_your_max_with_parentheses +#define min(A,B) please_protect_your_min_with_parentheses +#define max(A,B) please_protect_your_max_with_parentheses #define FORBIDDEN_IDENTIFIER (this_identifier_is_forbidden_to_avoid_clashes) this_identifier_is_forbidden_to_avoid_clashes // B0 is defined in POSIX header termios.h diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 1eb37bf1c..bb2f8b977 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -97,7 +97,7 @@ struct EvalRange { Index i = first; static const int PacketSize = unpacket_traits::size; - if (last - first > PacketSize) { + if (last - first >= PacketSize) { eigen_assert(first % PacketSize == 0); Index lastPacket = last - (last % PacketSize); for (; i < lastPacket; i += PacketSize) { diff --git a/unsupported/Eigen/MPRealSupport b/unsupported/Eigen/MPRealSupport index 8e42965a3..89036886b 100644 --- a/unsupported/Eigen/MPRealSupport +++ b/unsupported/Eigen/MPRealSupport @@ -141,20 +141,32 @@ int main() public: typedef mpfr::mpreal ResScalar; enum { + Vectorizable = false, + LhsPacketSize = 1, + RhsPacketSize = 1, + ResPacketSize = 1, + NumberOfRegisters = 1, nr = 1, mr = 1, LhsProgress = 1, RhsProgress = 1 }; + typedef ResScalar LhsPacket; + typedef ResScalar RhsPacket; + typedef ResScalar ResPacket; + }; - template - struct gebp_kernel + + + template + struct gebp_kernel { typedef mpfr::mpreal mpreal; EIGEN_DONT_INLINE - void operator()(mpreal* res, Index resStride, const mpreal* blockA, const mpreal* blockB, Index rows, Index depth, Index cols, mpreal alpha, + void operator()(const DataMapper& res, const mpreal* blockA, const mpreal* blockB, + Index rows, Index depth, Index cols, const mpreal& alpha, Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0) { if(rows==0 || cols==0 || depth==0) @@ -170,8 +182,6 @@ int main() { for(Index j=0; j()) { mat2(i,j,k) = mat1(i,j,k); } } diff --git a/unsupported/test/mpreal/mpreal.h b/unsupported/test/mpreal/mpreal.h index dddda7dd9..7d6f4e79f 100644 --- a/unsupported/test/mpreal/mpreal.h +++ b/unsupported/test/mpreal/mpreal.h @@ -57,7 +57,8 @@ #include // Options -#define MPREAL_HAVE_INT64_SUPPORT // Enable int64_t support if possible. Available only for MSVC 2010 & GCC. +// FIXME HAVE_INT64_SUPPORT leads to clashes with long int and int64_t on some systems. +//#define MPREAL_HAVE_INT64_SUPPORT // Enable int64_t support if possible. Available only for MSVC 2010 & GCC. #define MPREAL_HAVE_MSVC_DEBUGVIEW // Enable Debugger Visualizer for "Debug" builds in MSVC. #define MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS // Enable extended std::numeric_limits specialization. // Meaning that "digits", "round_style" and similar members are defined as functions, not constants.