From 684d76eba3244d5ae75602b8ec06e81ef74da79b Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 24 Nov 2009 15:12:43 -0500 Subject: [PATCH] add SSE4 support, start with integer multiplication --- CMakeLists.txt | 22 ++++++++++++++----- Eigen/Core | 6 ++++++ Eigen/src/Core/arch/SSE/PacketMath.h | 4 ++++ cmake/EigenTesting.cmake | 32 +++++++++++++++++++--------- 4 files changed, 49 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8932fff2a..33e7584c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,31 +63,43 @@ if(CMAKE_COMPILER_IS_GNUCXX) if(NOT EIGEN_TEST_LIB) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic") - endif(NOT EIGEN_TEST_LIB) + endif() option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF) if(EIGEN_TEST_SSE2) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2") message("Enabling SSE2 in tests/examples") - endif(EIGEN_TEST_SSE2) + endif() option(EIGEN_TEST_SSE3 "Enable/Disable SSE3 in tests/examples" OFF) if(EIGEN_TEST_SSE3) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3") message("Enabling SSE3 in tests/examples") - endif(EIGEN_TEST_SSE3) + endif() option(EIGEN_TEST_SSSE3 "Enable/Disable SSSE3 in tests/examples" OFF) if(EIGEN_TEST_SSSE3) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3") message("Enabling SSSE3 in tests/examples") - endif(EIGEN_TEST_SSSE3) + endif() + + option(EIGEN_TEST_SSE4_1 "Enable/Disable SSE4.1 in tests/examples" OFF) + if(EIGEN_TEST_SSE4_1) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1") + message("Enabling SSE4.1 in tests/examples") + endif() + + option(EIGEN_TEST_SSE4_2 "Enable/Disable SSE4.2 in tests/examples" OFF) + if(EIGEN_TEST_SSE4_2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") + message("Enabling SSE4.2 in tests/examples") + endif() option(EIGEN_TEST_ALTIVEC "Enable/Disable altivec in tests/examples" OFF) if(EIGEN_TEST_ALTIVEC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec") message("Enabling AltiVec in tests/examples") - endif(EIGEN_TEST_ALTIVEC) + endif() endif(CMAKE_SYSTEM_NAME MATCHES Linux) endif(CMAKE_COMPILER_IS_GNUCXX) diff --git a/Eigen/Core b/Eigen/Core index 2968e36c6..e095aee11 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -67,6 +67,12 @@ #ifdef __SSSE3__ #include #endif + #ifdef __SSE4_1__ + #include + #endif + #ifdef __SSE4_2__ + #include + #endif #elif defined __ALTIVEC__ #define EIGEN_VECTORIZE #define EIGEN_VECTORIZE_ALTIVEC diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 60ccadc21..69f6979bd 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -118,6 +118,9 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul(const Packet4f& a, con template<> EIGEN_STRONG_INLINE Packet2d ei_pmul(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i ei_pmul(const Packet4i& a, const Packet4i& b) { +#ifdef __SSE4_1__ + return _mm_mullo_epi32(a,b); +#else // this version is slightly faster than 4 scalar products return ei_vec4i_swizzle1( ei_vec4i_swizzle2( @@ -126,6 +129,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pmul(const Packet4i& a, con ei_vec4i_swizzle1(b,1,0,3,2)), 0,2,0,2), 0,2,1,3); +#endif } template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); } diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index 42f7454f4..43ae53cc1 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -147,33 +147,45 @@ macro(ei_testing_print_summary) if(EIGEN_TEST_SSE2) message("SSE2: ON") - else(EIGEN_TEST_SSE2) + else() message("SSE2: Using architecture defaults") - endif(EIGEN_TEST_SSE2) + endif() if(EIGEN_TEST_SSE3) message("SSE3: ON") - else(EIGEN_TEST_SSE3) + else() message("SSE3: Using architecture defaults") - endif(EIGEN_TEST_SSE3) + endif() if(EIGEN_TEST_SSSE3) message("SSSE3: ON") - else(EIGEN_TEST_SSSE3) + else() message("SSSE3: Using architecture defaults") - endif(EIGEN_TEST_SSSE3) + endif() + + if(EIGEN_TEST_SSE4_1) + message("SSE4.1: ON") + else() + message("SSE4.1: Using architecture defaults") + endif() + + if(EIGEN_TEST_SSE4_2) + message("SSE4.2: ON") + else() + message("SSE4.2: Using architecture defaults") + endif() if(EIGEN_TEST_ALTIVEC) message("Altivec: Using architecture defaults") - else(EIGEN_TEST_ALTIVEC) + else() message("Altivec: Using architecture defaults") - endif(EIGEN_TEST_ALTIVEC) + endif() if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION) message("Explicit vec: OFF") - else(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION) + else() message("Explicit vec: Using architecture defaults") - endif(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION) + endif() message("\n${EIGEN_TESTING_SUMMARY}") # message("CXX: ${CMAKE_CXX_COMPILER}")