diff --git a/CMakeLists.txt b/CMakeLists.txt index 0547ee681..dbf0999ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,7 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ") endif() + # Alias Eigen_*_DIR to Eigen3_*_DIR: set(Eigen_SOURCE_DIR ${Eigen3_SOURCE_DIR}) @@ -107,7 +108,7 @@ if(NOT WIN32 OR NOT CMAKE_HOST_SYSTEM_NAME MATCHES Windows) option(EIGEN_BUILD_PKGCONFIG "Build pkg-config .pc file for Eigen" ON) endif() -set(CMAKE_INCLUDE_CURRENT_DIR ON) +set(CMAKE_INCLUDE_CURRENT_DIR OFF) option(EIGEN_SPLIT_LARGE_TESTS "Split large tests into smaller executables" ON) @@ -377,7 +378,7 @@ option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tens set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture level to target when compiling CUDA code") -include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) # Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR if(EIGEN_INCLUDE_INSTALL_DIR) diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index dc47cf7cf..bd7b6ff2a 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -108,7 +108,7 @@ static void run(Index rows, Index cols, Index depth, // i.e., we test that info[tid].users equals 0. // Then, we set info[tid].users to the number of threads to mark that all other threads are going to use it. while(info[tid].users!=0) {} - info[tid].users += threads; + info[tid].users = threads; pack_lhs(blockA+info[tid].lhs_start*actual_kc, lhs.getSubMapper(info[tid].lhs_start,k), actual_kc, info[tid].lhs_length); @@ -146,7 +146,9 @@ static void run(Index rows, Index cols, Index depth, // Release all the sub blocks A'_i of A' for the current thread, // i.e., we simply decrement the number of users by 1 for(Index i=0; i +#endif + namespace Eigen { namespace internal { @@ -75,8 +79,17 @@ template struct GemmParallelInfo { GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {} + // volatile is not enough on all architectures (see bug 1572) + // to guarantee that when thread A says to thread B that it is + // done with packing a block, then all writes have been really + // carried out... C++11 memory model+atomic guarantees this. +#if EIGEN_HAS_CXX11_ATOMIC + std::atomic sync; + std::atomic users; +#else Index volatile sync; int volatile users; +#endif Index lhs_start; Index lhs_length; @@ -87,7 +100,10 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, { // TODO when EIGEN_USE_BLAS is defined, // we should still enable OMP for other scalar types -#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS) + // Without C++11, we have to disable GEMM's parallelization on + // non x86 architectures because there volatile is not enough for our purpose. + // See bug 1572. +#if (! defined(EIGEN_HAS_OPENMP)) || defined(EIGEN_USE_BLAS) || ((!EIGEN_HAS_CXX11_ATOMIC) && !(EIGEN_ARCH_i386_OR_x86_64)) // FIXME the transpose variable is only needed to properly split // the matrix product when multithreading is enabled. This is a temporary // fix to support row-major destination matrices. This whole diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 46ca0193a..fc4c0815c 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -537,7 +537,7 @@ #endif #endif -// Does the compiler support type_trais? +// Does the compiler support type_traits? #ifndef EIGEN_HAS_TYPE_TRAITS #if EIGEN_MAX_CPP_VER>=11 && (EIGEN_HAS_CXX11 || EIGEN_COMP_MSVC >= 1700) #define EIGEN_HAS_TYPE_TRAITS 1 @@ -617,6 +617,16 @@ #endif #endif +#ifndef EIGEN_HAS_CXX11_ATOMIC + #if EIGEN_MAX_CPP_VER>=11 && \ + (__has_feature(cxx_atomic) \ + || (__cplusplus > 201103L) \ + || ((__cplusplus >= 201103L) && (EIGEN_COMP_MSVC==0 || EIGEN_COMP_MSVC >= 1700))) + #define EIGEN_HAS_CXX11_ATOMIC 1 + #else + #define EIGEN_HAS_CXX11_ATOMIC 0 + #endif +#endif #if defined(EIGEN_CUDACC) && EIGEN_HAS_CONSTEXPR // While available already with c++11, this is useful mostly starting with c++14 and relaxed constexpr rules diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index aacfa22bb..b4730cff0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -387,7 +387,6 @@ if(CUDA_FOUND) if(EIGEN_TEST_CUDA_CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_30") endif() - cuda_include_directories(${CMAKE_CURRENT_BINARY_DIR}) set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") ei_add_test(gpu_basic) @@ -416,7 +415,6 @@ if (EIGEN_TEST_HIP) if (${HIP_PLATFORM} STREQUAL "hcc") - include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${HIP_PATH}/include) set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")