mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-18 12:23:13 +08:00
add SSE4 support, start with integer multiplication
This commit is contained in:
parent
abdb2a2bd5
commit
684d76eba3
@ -63,31 +63,43 @@ if(CMAKE_COMPILER_IS_GNUCXX)
|
|||||||
|
|
||||||
if(NOT EIGEN_TEST_LIB)
|
if(NOT EIGEN_TEST_LIB)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
|
||||||
endif(NOT EIGEN_TEST_LIB)
|
endif()
|
||||||
|
|
||||||
option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
|
option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
|
||||||
if(EIGEN_TEST_SSE2)
|
if(EIGEN_TEST_SSE2)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
|
||||||
message("Enabling SSE2 in tests/examples")
|
message("Enabling SSE2 in tests/examples")
|
||||||
endif(EIGEN_TEST_SSE2)
|
endif()
|
||||||
|
|
||||||
option(EIGEN_TEST_SSE3 "Enable/Disable SSE3 in tests/examples" OFF)
|
option(EIGEN_TEST_SSE3 "Enable/Disable SSE3 in tests/examples" OFF)
|
||||||
if(EIGEN_TEST_SSE3)
|
if(EIGEN_TEST_SSE3)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
|
||||||
message("Enabling SSE3 in tests/examples")
|
message("Enabling SSE3 in tests/examples")
|
||||||
endif(EIGEN_TEST_SSE3)
|
endif()
|
||||||
|
|
||||||
option(EIGEN_TEST_SSSE3 "Enable/Disable SSSE3 in tests/examples" OFF)
|
option(EIGEN_TEST_SSSE3 "Enable/Disable SSSE3 in tests/examples" OFF)
|
||||||
if(EIGEN_TEST_SSSE3)
|
if(EIGEN_TEST_SSSE3)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3")
|
||||||
message("Enabling SSSE3 in tests/examples")
|
message("Enabling SSSE3 in tests/examples")
|
||||||
endif(EIGEN_TEST_SSSE3)
|
endif()
|
||||||
|
|
||||||
|
option(EIGEN_TEST_SSE4_1 "Enable/Disable SSE4.1 in tests/examples" OFF)
|
||||||
|
if(EIGEN_TEST_SSE4_1)
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
|
||||||
|
message("Enabling SSE4.1 in tests/examples")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
option(EIGEN_TEST_SSE4_2 "Enable/Disable SSE4.2 in tests/examples" OFF)
|
||||||
|
if(EIGEN_TEST_SSE4_2)
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
|
||||||
|
message("Enabling SSE4.2 in tests/examples")
|
||||||
|
endif()
|
||||||
|
|
||||||
option(EIGEN_TEST_ALTIVEC "Enable/Disable altivec in tests/examples" OFF)
|
option(EIGEN_TEST_ALTIVEC "Enable/Disable altivec in tests/examples" OFF)
|
||||||
if(EIGEN_TEST_ALTIVEC)
|
if(EIGEN_TEST_ALTIVEC)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
|
||||||
message("Enabling AltiVec in tests/examples")
|
message("Enabling AltiVec in tests/examples")
|
||||||
endif(EIGEN_TEST_ALTIVEC)
|
endif()
|
||||||
|
|
||||||
endif(CMAKE_SYSTEM_NAME MATCHES Linux)
|
endif(CMAKE_SYSTEM_NAME MATCHES Linux)
|
||||||
endif(CMAKE_COMPILER_IS_GNUCXX)
|
endif(CMAKE_COMPILER_IS_GNUCXX)
|
||||||
|
@ -67,6 +67,12 @@
|
|||||||
#ifdef __SSSE3__
|
#ifdef __SSSE3__
|
||||||
#include <tmmintrin.h>
|
#include <tmmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef __SSE4_1__
|
||||||
|
#include <smmintrin.h>
|
||||||
|
#endif
|
||||||
|
#ifdef __SSE4_2__
|
||||||
|
#include <nmmintrin.h>
|
||||||
|
#endif
|
||||||
#elif defined __ALTIVEC__
|
#elif defined __ALTIVEC__
|
||||||
#define EIGEN_VECTORIZE
|
#define EIGEN_VECTORIZE
|
||||||
#define EIGEN_VECTORIZE_ALTIVEC
|
#define EIGEN_VECTORIZE_ALTIVEC
|
||||||
|
@ -118,6 +118,9 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, con
|
|||||||
template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
|
template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
|
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
|
||||||
{
|
{
|
||||||
|
#ifdef __SSE4_1__
|
||||||
|
return _mm_mullo_epi32(a,b);
|
||||||
|
#else
|
||||||
// this version is slightly faster than 4 scalar products
|
// this version is slightly faster than 4 scalar products
|
||||||
return ei_vec4i_swizzle1(
|
return ei_vec4i_swizzle1(
|
||||||
ei_vec4i_swizzle2(
|
ei_vec4i_swizzle2(
|
||||||
@ -126,6 +129,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, con
|
|||||||
ei_vec4i_swizzle1(b,1,0,3,2)),
|
ei_vec4i_swizzle1(b,1,0,3,2)),
|
||||||
0,2,0,2),
|
0,2,0,2),
|
||||||
0,2,1,3);
|
0,2,1,3);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
|
template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
|
||||||
|
@ -147,33 +147,45 @@ macro(ei_testing_print_summary)
|
|||||||
|
|
||||||
if(EIGEN_TEST_SSE2)
|
if(EIGEN_TEST_SSE2)
|
||||||
message("SSE2: ON")
|
message("SSE2: ON")
|
||||||
else(EIGEN_TEST_SSE2)
|
else()
|
||||||
message("SSE2: Using architecture defaults")
|
message("SSE2: Using architecture defaults")
|
||||||
endif(EIGEN_TEST_SSE2)
|
endif()
|
||||||
|
|
||||||
if(EIGEN_TEST_SSE3)
|
if(EIGEN_TEST_SSE3)
|
||||||
message("SSE3: ON")
|
message("SSE3: ON")
|
||||||
else(EIGEN_TEST_SSE3)
|
else()
|
||||||
message("SSE3: Using architecture defaults")
|
message("SSE3: Using architecture defaults")
|
||||||
endif(EIGEN_TEST_SSE3)
|
endif()
|
||||||
|
|
||||||
if(EIGEN_TEST_SSSE3)
|
if(EIGEN_TEST_SSSE3)
|
||||||
message("SSSE3: ON")
|
message("SSSE3: ON")
|
||||||
else(EIGEN_TEST_SSSE3)
|
else()
|
||||||
message("SSSE3: Using architecture defaults")
|
message("SSSE3: Using architecture defaults")
|
||||||
endif(EIGEN_TEST_SSSE3)
|
endif()
|
||||||
|
|
||||||
|
if(EIGEN_TEST_SSE4_1)
|
||||||
|
message("SSE4.1: ON")
|
||||||
|
else()
|
||||||
|
message("SSE4.1: Using architecture defaults")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(EIGEN_TEST_SSE4_2)
|
||||||
|
message("SSE4.2: ON")
|
||||||
|
else()
|
||||||
|
message("SSE4.2: Using architecture defaults")
|
||||||
|
endif()
|
||||||
|
|
||||||
if(EIGEN_TEST_ALTIVEC)
|
if(EIGEN_TEST_ALTIVEC)
|
||||||
message("Altivec: Using architecture defaults")
|
message("Altivec: Using architecture defaults")
|
||||||
else(EIGEN_TEST_ALTIVEC)
|
else()
|
||||||
message("Altivec: Using architecture defaults")
|
message("Altivec: Using architecture defaults")
|
||||||
endif(EIGEN_TEST_ALTIVEC)
|
endif()
|
||||||
|
|
||||||
if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
|
if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
|
||||||
message("Explicit vec: OFF")
|
message("Explicit vec: OFF")
|
||||||
else(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
|
else()
|
||||||
message("Explicit vec: Using architecture defaults")
|
message("Explicit vec: Using architecture defaults")
|
||||||
endif(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
|
endif()
|
||||||
|
|
||||||
message("\n${EIGEN_TESTING_SUMMARY}")
|
message("\n${EIGEN_TESTING_SUMMARY}")
|
||||||
# message("CXX: ${CMAKE_CXX_COMPILER}")
|
# message("CXX: ${CMAKE_CXX_COMPILER}")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user