From 815fa0dbf63f112f98fe3ac38483e0248caf9eec Mon Sep 17 00:00:00 2001 From: Jonas Adler Date: Wed, 22 Jul 2015 12:29:18 +0200 Subject: [PATCH] Fixed some compiler bugs in NVCC, now compiles with CUDA. (chtz: Manually joined sevaral commits to keep the history clean) --- Eigen/Core | 8 ++- Eigen/src/Core/DenseBase.h | 49 +++++++++++++++--- Eigen/src/Core/DenseStorage.h | 78 ++++++++++++++--------------- Eigen/src/Core/Replicate.h | 17 +------ Eigen/src/Core/Reverse.h | 9 +--- Eigen/src/Core/VectorwiseOp.h | 77 +++++++++++----------------- Eigen/src/Core/util/Macros.h | 9 +++- Eigen/src/Core/util/Memory.h | 6 +-- Eigen/src/Core/util/Meta.h | 4 ++ doc/snippets/compile_snippet.cpp.in | 5 ++ 10 files changed, 139 insertions(+), 123 deletions(-) diff --git a/Eigen/Core b/Eigen/Core index de94b5b75..91713a43e 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -24,9 +24,15 @@ #ifdef EIGEN_INTERNAL_DEBUGGING #undef EIGEN_INTERNAL_DEBUGGING #endif - + // Do not try to vectorize on CUDA! + #ifndef EIGEN_DONT_VECTORIZE #define EIGEN_DONT_VECTORIZE + #endif + + #ifdef EIGEN_EXCEPTIONS + #undef EIGEN_EXCEPTIONS + #endif // All functions callable from CUDA code must be qualified with __device__ #define EIGEN_DEVICE_FUNC __host__ __device__ diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index c603b5a6d..dc22a128e 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -491,9 +491,29 @@ template class DenseBase typedef VectorwiseOp ColwiseReturnType; typedef const VectorwiseOp ConstColwiseReturnType; - EIGEN_DEVICE_FUNC ConstRowwiseReturnType rowwise() const; + /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations + * + * Example: \include MatrixBase_rowwise.cpp + * Output: \verbinclude MatrixBase_rowwise.out + * + * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting + */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const { + return ConstRowwiseReturnType(derived()); + } EIGEN_DEVICE_FUNC RowwiseReturnType rowwise(); - EIGEN_DEVICE_FUNC ConstColwiseReturnType colwise() const; + + /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations + * + * Example: \include MatrixBase_colwise.cpp + * Output: \verbinclude MatrixBase_colwise.out + * + * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting + */ + EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const { + return ConstColwiseReturnType(derived()); + } EIGEN_DEVICE_FUNC ColwiseReturnType colwise(); typedef CwiseNullaryOp,PlainObject> RandomReturnType; @@ -519,14 +539,31 @@ template class DenseBase template EIGEN_DEVICE_FUNC const Replicate replicate() const; + /** + * \return an expression of the replication of \c *this + * + * Example: \include MatrixBase_replicate_int_int.cpp + * Output: \verbinclude MatrixBase_replicate_int_int.out + * + * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate + */ + //Code moved here due to a CUDA compiler bug EIGEN_DEVICE_FUNC - const Replicate replicate(Index rowFacor,Index colFactor) const; + const Replicate replicate(Index rowFactor, Index colFactor) const + { + return Replicate(derived(), rowFactor, colFactor); + } typedef Reverse ReverseReturnType; typedef const Reverse ConstReverseReturnType; - ReverseReturnType reverse(); - ConstReverseReturnType reverse() const; - void reverseInPlace(); + EIGEN_DEVICE_FUNC ReverseReturnType reverse(); + /** This is the const version of reverse(). */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const + { + return ConstReverseReturnType(derived()); + } + EIGEN_DEVICE_FUNC void reverseInPlace(); #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase # include "../plugins/BlockMethods.h" diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h index 80c4c6e8e..5eb434c6d 100644 --- a/Eigen/src/Core/DenseStorage.h +++ b/Eigen/src/Core/DenseStorage.h @@ -270,10 +270,10 @@ template class DenseStorage class DenseStorage class DenseStorage class DenseStorage class DenseStorage class DenseStorage class DenseStorage(size)), m_rows(rows), m_cols(cols) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0); } - DenseStorage(const DenseStorage& other) + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(internal::conditional_aligned_new_auto(other.m_rows*other.m_cols)) , m_rows(other.m_rows) , m_cols(other.m_cols) { internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data); } - DenseStorage& operator=(const DenseStorage& other) + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { if (this != &other) { @@ -405,8 +405,8 @@ template class DenseStorage(m_data, m_rows*m_cols); } - void swap(DenseStorage& other) + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, m_rows*m_cols); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} @@ -416,7 +416,7 @@ template class DenseStorage class DenseStorage(size)), m_cols(cols) + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto(size)), m_cols(cols) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0); EIGEN_UNUSED_VARIABLE(rows); } - DenseStorage(const DenseStorage& other) + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(internal::conditional_aligned_new_auto(_Rows*other.m_cols)) , m_cols(other.m_cols) { internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data); } - DenseStorage& operator=(const DenseStorage& other) + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { if (this != &other) { @@ -481,16 +481,16 @@ template class DenseStorage(m_data, _Rows*m_cols); } - void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Rows*m_cols); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} - void conservativeResize(Index size, Index, Index cols) + EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols) { m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, _Rows*m_cols); m_cols = cols; } - EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols) { if(size != _Rows*m_cols) { @@ -515,19 +515,19 @@ template class DenseStorage(size)), m_rows(rows) + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto(size)), m_rows(rows) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols); EIGEN_UNUSED_VARIABLE(cols); } - DenseStorage(const DenseStorage& other) + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(internal::conditional_aligned_new_auto(other.m_rows*_Cols)) , m_rows(other.m_rows) { internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data); } - DenseStorage& operator=(const DenseStorage& other) + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { if (this != &other) { @@ -554,8 +554,8 @@ template class DenseStorage(m_data, _Cols*m_rows); } - void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Cols*m_rows); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} void conservativeResize(Index size, Index rows, Index) @@ -563,7 +563,7 @@ template class DenseStorage(m_data, size, m_rows*_Cols); m_rows = rows; } - EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index) { if(size != m_rows*_Cols) { diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h index 518c52e57..bec598310 100644 --- a/Eigen/src/Core/Replicate.h +++ b/Eigen/src/Core/Replicate.h @@ -114,27 +114,12 @@ template class Replicate */ template template -inline const Replicate +const Replicate DenseBase::replicate() const { return Replicate(derived()); } -/** - * \return an expression of the replication of \c *this - * - * Example: \include MatrixBase_replicate_int_int.cpp - * Output: \verbinclude MatrixBase_replicate_int_int.out - * - * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate - */ -template -inline const Replicate -DenseBase::replicate(Index rowFactor,Index colFactor) const -{ - return Replicate(derived(),rowFactor,colFactor); -} - /** * \return an expression of the replication of each column (or row) of \c *this * diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h index ef301e66d..8e7f6e927 100644 --- a/Eigen/src/Core/Reverse.h +++ b/Eigen/src/Core/Reverse.h @@ -120,13 +120,8 @@ DenseBase::reverse() return ReverseReturnType(derived()); } -/** This is the const version of reverse(). */ -template -inline const typename DenseBase::ConstReverseReturnType -DenseBase::reverse() const -{ - return ConstReverseReturnType(derived()); -} + +//reverse const overload moved DenseBase.h due to a CUDA compiler bug /** This is the "in place" version of reverse: it reverses \c *this. * diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index 0cc5eff16..fd0ea412d 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -82,7 +82,7 @@ class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr class VectorwiseOp }; enum { - IsVertical = (Direction==Vertical) ? 1 : 0, - IsHorizontal = (Direction==Horizontal) ? 1 : 0 + isVertical = (Direction==Vertical) ? 1 : 0, + isHorizontal = (Direction==Horizontal) ? 1 : 0 }; protected: /** \internal * \returns the i-th subvector according to the \c Direction */ - typedef typename internal::conditional::type SubVector; EIGEN_DEVICE_FUNC @@ -206,12 +206,12 @@ template class VectorwiseOp * \returns the number of subvectors in the direction \c Direction */ EIGEN_DEVICE_FUNC Index subVectors() const - { return Direction==Vertical?m_matrix.cols():m_matrix.rows(); } + { return isVertical?m_matrix.cols():m_matrix.rows(); } template struct ExtendedType { typedef Replicate Type; + isVertical ? 1 : ExpressionType::RowsAtCompileTime, + isHorizontal ? 1 : ExpressionType::ColsAtCompileTime> Type; }; /** \internal @@ -221,20 +221,20 @@ template class VectorwiseOp typename ExtendedType::Type extendedTo(const DenseBase& other) const { - EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxColsAtCompileTime==1), + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxColsAtCompileTime==1), YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED) - EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxRowsAtCompileTime==1), + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxRowsAtCompileTime==1), YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED) return typename ExtendedType::Type (other.derived(), - Direction==Vertical ? 1 : m_matrix.rows(), - Direction==Horizontal ? 1 : m_matrix.cols()); + isVertical ? 1 : m_matrix.rows(), + isHorizontal ? 1 : m_matrix.cols()); } template struct OppositeExtendedType { typedef Replicate Type; + isHorizontal ? 1 : ExpressionType::RowsAtCompileTime, + isVertical ? 1 : ExpressionType::ColsAtCompileTime> Type; }; /** \internal @@ -244,18 +244,17 @@ template class VectorwiseOp typename OppositeExtendedType::Type extendedToOpposite(const DenseBase& other) const { - EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxColsAtCompileTime==1), + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxColsAtCompileTime==1), YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED) - EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxRowsAtCompileTime==1), + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxRowsAtCompileTime==1), YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED) return typename OppositeExtendedType::Type (other.derived(), - Direction==Horizontal ? 1 : m_matrix.rows(), - Direction==Vertical ? 1 : m_matrix.cols()); + isHorizontal ? 1 : m_matrix.rows(), + isVertical ? 1 : m_matrix.cols()); } public: - EIGEN_DEVICE_FUNC explicit inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {} @@ -271,8 +270,8 @@ template class VectorwiseOp * \sa class VectorwiseOp, DenseBase::colwise(), DenseBase::rowwise() */ template - const typename ReduxReturnType::Type EIGEN_DEVICE_FUNC + const typename ReduxReturnType::Type redux(const BinaryOp& func = BinaryOp()) const { return typename ReduxReturnType::Type(_expression(), internal::member_redux(func)); } @@ -447,7 +446,7 @@ template class VectorwiseOp const ReverseReturnType reverse() const { return ReverseReturnType( _expression() ); } - typedef Replicate ReplicateReturnType; + typedef Replicate ReplicateReturnType; EIGEN_DEVICE_FUNC const ReplicateReturnType replicate(Index factor) const; @@ -460,12 +459,13 @@ template class VectorwiseOp * \sa VectorwiseOp::replicate(Index), DenseBase::replicate(), class Replicate */ // NOTE implemented here because of sunstudio's compilation errors - template const Replicate + // isVertical*Factor+isHorizontal instead of (isVertical?Factor:1) to handle CUDA bug with ternary operator + template const Replicate EIGEN_DEVICE_FUNC replicate(Index factor = Factor) const { - return Replicate - (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1); + return Replicate + (_expression(),isVertical?factor:1,isHorizontal?factor:1); } /////////// Artithmetic operators /////////// @@ -556,6 +556,7 @@ template class VectorwiseOp CwiseBinaryOp, const ExpressionTypeNestedCleaned, const typename ExtendedType::Type> + EIGEN_DEVICE_FUNC operator*(const DenseBase& other) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) @@ -637,19 +638,8 @@ template class VectorwiseOp ExpressionTypeNested m_matrix; }; -/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations - * - * Example: \include MatrixBase_colwise.cpp - * Output: \verbinclude MatrixBase_colwise.out - * - * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting - */ -template -inline const typename DenseBase::ConstColwiseReturnType -DenseBase::colwise() const -{ - return ConstColwiseReturnType(derived()); -} +//const colwise moved to DenseBase.h due to CUDA compiler bug + /** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations * @@ -662,19 +652,8 @@ DenseBase::colwise() return ColwiseReturnType(derived()); } -/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations - * - * Example: \include MatrixBase_rowwise.cpp - * Output: \verbinclude MatrixBase_rowwise.out - * - * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting - */ -template -inline const typename DenseBase::ConstRowwiseReturnType -DenseBase::rowwise() const -{ - return ConstRowwiseReturnType(derived()); -} +//const rowwise moved to DenseBase.h due to CUDA compiler bug + /** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations * diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index d187257a6..b90c88ed4 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -760,8 +760,13 @@ namespace Eigen { # define EIGEN_TRY try # define EIGEN_CATCH(X) catch (X) #else -# define EIGEN_THROW_X(X) std::abort() -# define EIGEN_THROW std::abort() +# ifdef __CUDA_ARCH__ +# define EIGEN_THROW_X(X) asm("trap;") return {} +# define EIGEN_THROW asm("trap;"); return {} +# else +# define EIGEN_THROW_X(X) std::abort() +# define EIGEN_THROW std::abort() +# endif # define EIGEN_TRY if (true) # define EIGEN_CATCH(X) else #endif diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 62f329984..73287d6ca 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -559,18 +559,18 @@ inline Index first_multiple(Index size, Index base) // use memcpy on trivial types, i.e., on types that does not require an initialization ctor. template struct smart_copy_helper; -template void smart_copy(const T* start, const T* end, T* target) +template EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target) { smart_copy_helper::RequireInitialization>::run(start, end, target); } template struct smart_copy_helper { - static inline void run(const T* start, const T* end, T* target) + EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) { memcpy(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); } }; template struct smart_copy_helper { - static inline void run(const T* start, const T* end, T* target) + EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) { std::copy(start, end, target); } }; diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 974f11516..7c8932511 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -117,6 +117,10 @@ template struct enable_if { typedef T type; }; #if defined(__CUDA_ARCH__) +#if !defined(__FLT_EPSILON__) +#define __FLT_EPSILON__ FLT_EPSILON +#define __DBL_EPSILON__ DBL_EPSILON +#endif namespace device { diff --git a/doc/snippets/compile_snippet.cpp.in b/doc/snippets/compile_snippet.cpp.in index 82ae89162..fdae39bcf 100644 --- a/doc/snippets/compile_snippet.cpp.in +++ b/doc/snippets/compile_snippet.cpp.in @@ -1,6 +1,11 @@ #include #include +#ifndef M_PI +#define M_PI 3.1415926535897932384626433832795 +#endif + + using namespace Eigen; using namespace std;