mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-23 01:59:38 +08:00
bug #1572: use c++11 atomic instead of volatile if c++11 is available, and disable multi-threaded GEMM on non-x86 without c++11.
This commit is contained in:
parent
add5757488
commit
40797dbea3
@ -8,6 +8,7 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
|
|||||||
message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ")
|
message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
# Alias Eigen_*_DIR to Eigen3_*_DIR:
|
# Alias Eigen_*_DIR to Eigen3_*_DIR:
|
||||||
|
|
||||||
set(Eigen_SOURCE_DIR ${Eigen3_SOURCE_DIR})
|
set(Eigen_SOURCE_DIR ${Eigen3_SOURCE_DIR})
|
||||||
@ -107,7 +108,7 @@ if(NOT WIN32 OR NOT CMAKE_HOST_SYSTEM_NAME MATCHES Windows)
|
|||||||
option(EIGEN_BUILD_PKGCONFIG "Build pkg-config .pc file for Eigen" ON)
|
option(EIGEN_BUILD_PKGCONFIG "Build pkg-config .pc file for Eigen" ON)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(CMAKE_INCLUDE_CURRENT_DIR ON)
|
set(CMAKE_INCLUDE_CURRENT_DIR OFF)
|
||||||
|
|
||||||
option(EIGEN_SPLIT_LARGE_TESTS "Split large tests into smaller executables" ON)
|
option(EIGEN_SPLIT_LARGE_TESTS "Split large tests into smaller executables" ON)
|
||||||
|
|
||||||
@ -377,7 +378,7 @@ option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tens
|
|||||||
|
|
||||||
set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture level to target when compiling CUDA code")
|
set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture level to target when compiling CUDA code")
|
||||||
|
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
|
||||||
# Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR
|
# Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR
|
||||||
if(EIGEN_INCLUDE_INSTALL_DIR)
|
if(EIGEN_INCLUDE_INSTALL_DIR)
|
||||||
|
@ -108,7 +108,7 @@ static void run(Index rows, Index cols, Index depth,
|
|||||||
// i.e., we test that info[tid].users equals 0.
|
// i.e., we test that info[tid].users equals 0.
|
||||||
// Then, we set info[tid].users to the number of threads to mark that all other threads are going to use it.
|
// Then, we set info[tid].users to the number of threads to mark that all other threads are going to use it.
|
||||||
while(info[tid].users!=0) {}
|
while(info[tid].users!=0) {}
|
||||||
info[tid].users += threads;
|
info[tid].users = threads;
|
||||||
|
|
||||||
pack_lhs(blockA+info[tid].lhs_start*actual_kc, lhs.getSubMapper(info[tid].lhs_start,k), actual_kc, info[tid].lhs_length);
|
pack_lhs(blockA+info[tid].lhs_start*actual_kc, lhs.getSubMapper(info[tid].lhs_start,k), actual_kc, info[tid].lhs_length);
|
||||||
|
|
||||||
@ -146,7 +146,9 @@ static void run(Index rows, Index cols, Index depth,
|
|||||||
// Release all the sub blocks A'_i of A' for the current thread,
|
// Release all the sub blocks A'_i of A' for the current thread,
|
||||||
// i.e., we simply decrement the number of users by 1
|
// i.e., we simply decrement the number of users by 1
|
||||||
for(Index i=0; i<threads; ++i)
|
for(Index i=0; i<threads; ++i)
|
||||||
|
#if !EIGEN_HAS_CXX11_ATOMIC
|
||||||
#pragma omp atomic
|
#pragma omp atomic
|
||||||
|
#endif
|
||||||
info[i].users -= 1;
|
info[i].users -= 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -10,6 +10,10 @@
|
|||||||
#ifndef EIGEN_PARALLELIZER_H
|
#ifndef EIGEN_PARALLELIZER_H
|
||||||
#define EIGEN_PARALLELIZER_H
|
#define EIGEN_PARALLELIZER_H
|
||||||
|
|
||||||
|
#if EIGEN_HAS_CXX11_ATOMIC
|
||||||
|
#include <atomic>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
@ -75,8 +79,17 @@ template<typename Index> struct GemmParallelInfo
|
|||||||
{
|
{
|
||||||
GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
|
GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
|
||||||
|
|
||||||
|
// volatile is not enough on all architectures (see bug 1572)
|
||||||
|
// to guarantee that when thread A says to thread B that it is
|
||||||
|
// done with packing a block, then all writes have been really
|
||||||
|
// carried out... C++11 memory model+atomic guarantees this.
|
||||||
|
#if EIGEN_HAS_CXX11_ATOMIC
|
||||||
|
std::atomic<Index> sync;
|
||||||
|
std::atomic<int> users;
|
||||||
|
#else
|
||||||
Index volatile sync;
|
Index volatile sync;
|
||||||
int volatile users;
|
int volatile users;
|
||||||
|
#endif
|
||||||
|
|
||||||
Index lhs_start;
|
Index lhs_start;
|
||||||
Index lhs_length;
|
Index lhs_length;
|
||||||
@ -87,7 +100,10 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
|
|||||||
{
|
{
|
||||||
// TODO when EIGEN_USE_BLAS is defined,
|
// TODO when EIGEN_USE_BLAS is defined,
|
||||||
// we should still enable OMP for other scalar types
|
// we should still enable OMP for other scalar types
|
||||||
#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
|
// Without C++11, we have to disable GEMM's parallelization on
|
||||||
|
// non x86 architectures because there volatile is not enough for our purpose.
|
||||||
|
// See bug 1572.
|
||||||
|
#if (! defined(EIGEN_HAS_OPENMP)) || defined(EIGEN_USE_BLAS) || ((!EIGEN_HAS_CXX11_ATOMIC) && !(EIGEN_ARCH_i386_OR_x86_64))
|
||||||
// FIXME the transpose variable is only needed to properly split
|
// FIXME the transpose variable is only needed to properly split
|
||||||
// the matrix product when multithreading is enabled. This is a temporary
|
// the matrix product when multithreading is enabled. This is a temporary
|
||||||
// fix to support row-major destination matrices. This whole
|
// fix to support row-major destination matrices. This whole
|
||||||
|
@ -537,7 +537,7 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Does the compiler support type_trais?
|
// Does the compiler support type_traits?
|
||||||
#ifndef EIGEN_HAS_TYPE_TRAITS
|
#ifndef EIGEN_HAS_TYPE_TRAITS
|
||||||
#if EIGEN_MAX_CPP_VER>=11 && (EIGEN_HAS_CXX11 || EIGEN_COMP_MSVC >= 1700)
|
#if EIGEN_MAX_CPP_VER>=11 && (EIGEN_HAS_CXX11 || EIGEN_COMP_MSVC >= 1700)
|
||||||
#define EIGEN_HAS_TYPE_TRAITS 1
|
#define EIGEN_HAS_TYPE_TRAITS 1
|
||||||
@ -617,6 +617,16 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef EIGEN_HAS_CXX11_ATOMIC
|
||||||
|
#if EIGEN_MAX_CPP_VER>=11 && \
|
||||||
|
(__has_feature(cxx_atomic) \
|
||||||
|
|| (__cplusplus > 201103L) \
|
||||||
|
|| ((__cplusplus >= 201103L) && (EIGEN_COMP_MSVC==0 || EIGEN_COMP_MSVC >= 1700)))
|
||||||
|
#define EIGEN_HAS_CXX11_ATOMIC 1
|
||||||
|
#else
|
||||||
|
#define EIGEN_HAS_CXX11_ATOMIC 0
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_CUDACC) && EIGEN_HAS_CONSTEXPR
|
#if defined(EIGEN_CUDACC) && EIGEN_HAS_CONSTEXPR
|
||||||
// While available already with c++11, this is useful mostly starting with c++14 and relaxed constexpr rules
|
// While available already with c++11, this is useful mostly starting with c++14 and relaxed constexpr rules
|
||||||
|
@ -387,7 +387,6 @@ if(CUDA_FOUND)
|
|||||||
if(EIGEN_TEST_CUDA_CLANG)
|
if(EIGEN_TEST_CUDA_CLANG)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_30")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_30")
|
||||||
endif()
|
endif()
|
||||||
cuda_include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
|
||||||
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
|
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
|
||||||
|
|
||||||
ei_add_test(gpu_basic)
|
ei_add_test(gpu_basic)
|
||||||
@ -416,7 +415,6 @@ if (EIGEN_TEST_HIP)
|
|||||||
|
|
||||||
if (${HIP_PLATFORM} STREQUAL "hcc")
|
if (${HIP_PLATFORM} STREQUAL "hcc")
|
||||||
|
|
||||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
|
||||||
include_directories(${HIP_PATH}/include)
|
include_directories(${HIP_PATH}/include)
|
||||||
|
|
||||||
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
|
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user