mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-22 01:29:35 +08:00
Fix NVCC+ICC issues.
NVCC does not understand `__forceinline`, so we need to use `inline` when compiling for GPU. ICC specializes `std::complex` operators for `float` and `double` by default, which cannot be used on device and conflict with Eigen's workaround in CUDA/Complex.h. This can be prevented by defining `_OVERRIDE_COMPLEX_SPECIALIZATION_` before including `<complex>`. Added this define to the tests and to `Eigen/Core`, but this will not work if the user includes `<complex>` before `<Eigen/Core>`. ICC also seems to generate a duplicate `Map` symbol in `PlainObjectBase`: ``` error: "Map" has already been declared in the current scope static ConstMapType Map(const Scalar *data) ``` I tracked this down to `friend class Eigen::Map`. Putting the `friend` statements at the bottom of the class seems to resolve this issue. Fixes #2180
This commit is contained in:
parent
14487ed14e
commit
d24f9f9b55
@ -40,6 +40,13 @@
|
||||
#pragma GCC optimize ("-fno-ipa-cp-clone")
|
||||
#endif
|
||||
|
||||
// Prevent ICC from specializing std::complex operators that silently fail
|
||||
// on device. This allows us to use our own device-compatible specializations
|
||||
// instead.
|
||||
#if defined(EIGEN_COMP_ICC) && defined(EIGEN_GPU_COMPILE_PHASE) \
|
||||
&& !defined(_OVERRIDE_COMPLEX_SPECIALIZATION_)
|
||||
#define _OVERRIDE_COMPLEX_SPECIALIZATION_ 1
|
||||
#endif
|
||||
#include <complex>
|
||||
|
||||
// this include file manages BLAS and MKL related macros
|
||||
|
@ -118,16 +118,8 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
using Base::IsVectorAtCompileTime;
|
||||
using Base::Flags;
|
||||
|
||||
template<typename PlainObjectType, int MapOptions, typename StrideType> friend class Eigen::Map;
|
||||
friend class Eigen::Map<Derived, Unaligned>;
|
||||
typedef Eigen::Map<Derived, Unaligned> MapType;
|
||||
friend class Eigen::Map<const Derived, Unaligned>;
|
||||
typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
|
||||
#if EIGEN_MAX_ALIGN_BYTES>0
|
||||
// for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice.
|
||||
friend class Eigen::Map<Derived, AlignedMax>;
|
||||
friend class Eigen::Map<const Derived, AlignedMax>;
|
||||
#endif
|
||||
typedef Eigen::Map<Derived, AlignedMax> AlignedMapType;
|
||||
typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType;
|
||||
template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
|
||||
@ -988,6 +980,17 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
||||
}
|
||||
|
||||
enum { IsPlainObjectBase = 1 };
|
||||
#endif
|
||||
public:
|
||||
// These apparently need to be down here for nvcc+icc to prevent duplicate
|
||||
// Map symbol.
|
||||
template<typename PlainObjectType, int MapOptions, typename StrideType> friend class Eigen::Map;
|
||||
friend class Eigen::Map<Derived, Unaligned>;
|
||||
friend class Eigen::Map<const Derived, Unaligned>;
|
||||
#if EIGEN_MAX_ALIGN_BYTES>0
|
||||
// for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice.
|
||||
friend class Eigen::Map<Derived, AlignedMax>;
|
||||
friend class Eigen::Map<const Derived, AlignedMax>;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -12,9 +12,6 @@
|
||||
#define EIGEN_COMPLEX_CUDA_H
|
||||
|
||||
// clang-format off
|
||||
|
||||
#if defined(EIGEN_CUDACC) && defined(EIGEN_GPU_COMPILE_PHASE)
|
||||
|
||||
// Many std::complex methods such as operator+, operator-, operator* and
|
||||
// operator/ are not constexpr. Due to this, GCC and older versions of clang do
|
||||
// not treat them as device functions and thus Eigen functors making use of
|
||||
@ -22,6 +19,17 @@
|
||||
// operators and functors for complex types when building for CUDA to enable
|
||||
// their use on-device.
|
||||
|
||||
#if defined(EIGEN_CUDACC) && defined(EIGEN_GPU_COMPILE_PHASE)
|
||||
|
||||
// ICC already specializes std::complex<float> and std::complex<double>
|
||||
// operators, preventing us from making them device functions here.
|
||||
// This will lead to silent runtime errors if the operators are used on device.
|
||||
//
|
||||
// To allow std::complex operator use on device, define _OVERRIDE_COMPLEX_SPECIALIZATION_
|
||||
// prior to first inclusion of <complex>. This prevents ICC from adding
|
||||
// its own specializations, so our custom ones below can be used instead.
|
||||
#if !(defined(EIGEN_COMP_ICC) && defined(_USE_COMPLEX_SPECIALIZATION_))
|
||||
|
||||
// Import Eigen's internal operator specializations.
|
||||
#define EIGEN_USING_STD_COMPLEX_OPERATORS \
|
||||
using Eigen::complex_operator_detail::operator+; \
|
||||
@ -244,6 +252,8 @@ EIGEN_USING_STD_COMPLEX_OPERATORS
|
||||
} // namespace internal
|
||||
} // namespace Eigen
|
||||
|
||||
#endif
|
||||
#endif // !(EIGEN_COMP_ICC && _USE_COMPLEX_SPECIALIZATION_)
|
||||
|
||||
#endif // EIGEN_CUDACC && EIGEN_GPU_COMPILE_PHASE
|
||||
|
||||
#endif // EIGEN_COMPLEX_CUDA_H
|
||||
|
@ -905,7 +905,7 @@
|
||||
// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
|
||||
// but GCC is still doing fine with just inline.
|
||||
#ifndef EIGEN_STRONG_INLINE
|
||||
#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
|
||||
#if (EIGEN_COMP_MSVC || EIGEN_COMP_ICC) && !defined(EIGEN_GPUCC)
|
||||
#define EIGEN_STRONG_INLINE __forceinline
|
||||
#else
|
||||
#define EIGEN_STRONG_INLINE inline
|
||||
|
@ -40,6 +40,8 @@
|
||||
// definitions.
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
// Disable ICC's std::complex operator specializations so we can use our own.
|
||||
#define _OVERRIDE_COMPLEX_SPECIALIZATION_ 1
|
||||
#include <complex>
|
||||
#include <deque>
|
||||
#include <queue>
|
||||
|
Loading…
x
Reference in New Issue
Block a user