diff --git a/CMakeLists.txt b/CMakeLists.txt index ed1cba4f9..53d0ef4bd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -176,7 +176,7 @@ if(NOT MSVC) ei_add_cxx_compiler_flag("-Wall") ei_add_cxx_compiler_flag("-Wextra") #ei_add_cxx_compiler_flag("-Weverything") # clang - + ei_add_cxx_compiler_flag("-Wundef") ei_add_cxx_compiler_flag("-Wcast-align") ei_add_cxx_compiler_flag("-Wchar-subscripts") @@ -191,29 +191,29 @@ if(NOT MSVC) ei_add_cxx_compiler_flag("-Wc++11-extensions") ei_add_cxx_compiler_flag("-Wdouble-promotion") # ei_add_cxx_compiler_flag("-Wconversion") - + # -Wshadow is insanely too strict with gcc, hopefully it will become usable with gcc 6 # if(NOT CMAKE_COMPILER_IS_GNUCXX OR (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "5.0.0")) if(NOT CMAKE_COMPILER_IS_GNUCXX) ei_add_cxx_compiler_flag("-Wshadow") endif() - + ei_add_cxx_compiler_flag("-Wno-psabi") ei_add_cxx_compiler_flag("-Wno-variadic-macros") ei_add_cxx_compiler_flag("-Wno-long-long") - + ei_add_cxx_compiler_flag("-fno-check-new") ei_add_cxx_compiler_flag("-fno-common") ei_add_cxx_compiler_flag("-fstrict-aliasing") ei_add_cxx_compiler_flag("-wd981") # disable ICC's "operands are evaluated in unspecified order" remark ei_add_cxx_compiler_flag("-wd2304") # disable ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor - - + + # The -ansi flag must be added last, otherwise it is also used as a linker flag by check_cxx_compiler_flag making it fails # Moreover we should not set both -strict-ansi and -ansi check_cxx_compiler_flag("-strict-ansi" COMPILER_SUPPORT_STRICTANSI) ei_add_cxx_compiler_flag("-Qunused-arguments") # disable clang warning: argument unused during compilation: '-ansi' - + if(COMPILER_SUPPORT_STRICTANSI) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -strict-ansi") else() @@ -224,7 +224,7 @@ if(NOT MSVC) ei_add_cxx_compiler_flag("-pie") ei_add_cxx_compiler_flag("-fPIE") endif() - + set(CMAKE_REQUIRED_FLAGS "") option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF) @@ -398,6 +398,7 @@ if(EIGEN_TEST_NO_EXCEPTIONS) message(STATUS "Disabling exceptions in tests/examples") endif() +set(EIGEN_CUDA_CXX_FLAGS "" CACHE STRING "Additional flags to pass to the cuda compiler.") set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture level to target when compiling CUDA code") include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) @@ -600,11 +601,11 @@ if (NOT CMAKE_VERSION VERSION_LESS 3.0) else (NOT CMAKE_VERSION VERSION_LESS 3.0) # Fallback to legacy Eigen3Config.cmake without the imported target - + # If CMakePackageConfigHelpers module is available (CMake >= 2.8.8) - # create a relocatable Config file, otherwise leave the hardcoded paths + # create a relocatable Config file, otherwise leave the hardcoded paths include(CMakePackageConfigHelpers OPTIONAL RESULT_VARIABLE CPCH_PATH) - + if(CPCH_PATH) configure_package_config_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigLegacy.cmake.in @@ -613,7 +614,7 @@ else (NOT CMAKE_VERSION VERSION_LESS 3.0) INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR} NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components ) - else() + else() # The PACKAGE_* variables are defined by the configure_package_config_file # but without it we define them manually to the hardcoded paths set(PACKAGE_INIT "") diff --git a/Eigen/Core b/Eigen/Core index 524c2f51d..2ae6f0824 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -123,7 +123,7 @@ #endif #endif -#ifndef EIGEN_DONT_VECTORIZE +#if !defined(EIGEN_DONT_VECTORIZE) && !defined(EIGEN_CUDACC) #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER) diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index 15ccf24f1..cc42c2549 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -44,7 +44,7 @@ namespace internal { * decomposition to determine whether a system of equations has a solution. * * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. - * + * * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT */ template class LDLT @@ -558,7 +558,7 @@ LDLT& LDLT::rankUpdate(const MatrixBase template -void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const +EIGEN_DEVICE_FUNC void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const { eigen_assert(rhs.rows() == rows()); // dst = P b diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h index e1624d21b..27b8ac4a9 100644 --- a/Eigen/src/Cholesky/LLT.h +++ b/Eigen/src/Cholesky/LLT.h @@ -475,7 +475,7 @@ LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, c #ifndef EIGEN_PARSED_BY_DOXYGEN template template -void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const +EIGEN_DEVICE_FUNC void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const { dst = rhs; solveInPlace(dst); diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 53806ba33..87619f4e3 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -16,7 +16,7 @@ namespace Eigen { template template -EIGEN_STRONG_INLINE Derived& DenseBase +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase ::lazyAssign(const DenseBase& other) { enum{ @@ -29,7 +29,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase eigen_assert(rows() == other.rows() && cols() == other.cols()); internal::call_assignment_no_alias(derived(),other.derived()); - + return derived(); } diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index dbe435d86..1f366616c 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -17,7 +17,7 @@ namespace Eigen { // This implementation is based on Assign.h namespace internal { - + /*************************************************************************** * Part 1 : the logic deciding a strategy for traversal and unrolling * ***************************************************************************/ @@ -29,12 +29,12 @@ struct copy_using_evaluator_traits { typedef typename DstEvaluator::XprType Dst; typedef typename Dst::Scalar DstScalar; - + enum { DstFlags = DstEvaluator::Flags, SrcFlags = SrcEvaluator::Flags }; - + public: enum { DstAlignment = DstEvaluator::Alignment, @@ -135,7 +135,7 @@ public: ? int(CompleteUnrolling) : int(NoUnrolling) ) : int(Traversal) == int(LinearTraversal) - ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) + ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) #if EIGEN_UNALIGNED_VECTORIZE : int(Traversal) == int(SliceVectorizedTraversal) @@ -195,7 +195,7 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling // FIXME: this is not very clean, perhaps this information should be provided by the kernel? typedef typename Kernel::DstEvaluatorType DstEvaluatorType; typedef typename DstEvaluatorType::XprType DstXprType; - + enum { outer = Index / DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime @@ -261,7 +261,7 @@ struct copy_using_evaluator_innervec_CompleteUnrolling typedef typename Kernel::DstEvaluatorType DstEvaluatorType; typedef typename DstEvaluatorType::XprType DstXprType; typedef typename Kernel::PacketType PacketType; - + enum { outer = Index / DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime, @@ -426,7 +426,7 @@ struct dense_assignment_loop::size, alignedSize = (size/packetSize)*packetSize }; @@ -599,14 +599,14 @@ protected: typedef typename DstEvaluatorTypeT::XprType DstXprType; typedef typename SrcEvaluatorTypeT::XprType SrcXprType; public: - + typedef DstEvaluatorTypeT DstEvaluatorType; typedef SrcEvaluatorTypeT SrcEvaluatorType; typedef typename DstEvaluatorType::Scalar Scalar; typedef copy_using_evaluator_traits AssignmentTraits; typedef typename AssignmentTraits::PacketType PacketType; - - + + EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) { @@ -614,58 +614,58 @@ public: AssignmentTraits::debug(); #endif } - + EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); } EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); } EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); } EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); } EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); } EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); } - + EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; } EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } - + /// Assign src(row,col) to dst(row,col) through the assignment functor. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) { m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); } - + /// \sa assignCoeff(Index,Index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) { m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); } - + /// \sa assignCoeff(Index,Index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) { - Index row = rowIndexByOuterInner(outer, inner); - Index col = colIndexByOuterInner(outer, inner); + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); assignCoeff(row, col); } - - + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) { m_functor.template assignPacket(&m_dst.coeffRef(row,col), m_src.template packet(row,col)); } - + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) { m_functor.template assignPacket(&m_dst.coeffRef(index), m_src.template packet(index)); } - + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) { - Index row = rowIndexByOuterInner(outer, inner); + Index row = rowIndexByOuterInner(outer, inner); Index col = colIndexByOuterInner(outer, inner); assignPacket(row, col); } - + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; @@ -688,7 +688,7 @@ public: { return m_dstExpr.data(); } - + protected: DstEvaluatorType& m_dst; const SrcEvaluatorType& m_src; @@ -734,7 +734,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType resize_if_allowed(dst, src, func); DstEvaluatorType dstEvaluator(dst); - + typedef generic_dense_assignment_kernel Kernel; Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); @@ -762,7 +762,7 @@ struct EigenBase2EigenBase {}; template struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; template<> struct AssignmentKind { typedef Dense2Dense Kind; }; - + // This is the main assignment class template< typename DstXprType, typename SrcXprType, typename Functor, typename Kind = typename AssignmentKind< typename evaluator_traits::Shape , typename evaluator_traits::Shape >::Kind, @@ -787,7 +787,7 @@ void call_assignment(const Dst& dst, const Src& src) { call_assignment(dst, src, internal::assign_op()); } - + // Deal with "assume-aliasing" template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -827,12 +827,12 @@ void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) typedef typename internal::conditional, Dst>::type ActualDstTypeCleaned; typedef typename internal::conditional, Dst&>::type ActualDstType; ActualDstType actualDst(dst); - + // TODO check whether this is the right place to perform these checks: EIGEN_STATIC_ASSERT_LVALUE(Dst) EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src) EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); - + Assignment::run(actualDst, src, func); } template @@ -869,13 +869,12 @@ template void check_for_aliasing(const Dst &dst, con template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> struct Assignment { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { #ifndef EIGEN_NO_DEBUG internal::check_for_aliasing(dst, src); #endif - + call_dense_assignment_loop(dst, src, func); } }; @@ -887,8 +886,7 @@ struct Assignment template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> struct Assignment { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { Index dstRows = src.rows(); Index dstCols = src.cols(); diff --git a/Eigen/src/Core/CommaInitializer.h b/Eigen/src/Core/CommaInitializer.h index d218e9814..cb06caaa0 100644 --- a/Eigen/src/Core/CommaInitializer.h +++ b/Eigen/src/Core/CommaInitializer.h @@ -11,7 +11,7 @@ #ifndef EIGEN_COMMAINITIALIZER_H #define EIGEN_COMMAINITIALIZER_H -namespace Eigen { +namespace Eigen { /** \class CommaInitializer * \ingroup Core_Module @@ -44,7 +44,7 @@ struct CommaInitializer m_xpr.block(0, 0, other.rows(), other.cols()) = other; } - /* Copy/Move constructor which transfers ownership. This is crucial in + /* Copy/Move constructor which transfers ownership. This is crucial in * absence of return value optimization to avoid assertions during destruction. */ // FIXME in C++11 mode this could be replaced by a proper RValue constructor EIGEN_DEVICE_FUNC @@ -135,13 +135,13 @@ struct CommaInitializer * * Example: \include MatrixBase_set.cpp * Output: \verbinclude MatrixBase_set.out - * + * * \note According the c++ standard, the argument expressions of this comma initializer are evaluated in arbitrary order. * * \sa CommaInitializer::finished(), class CommaInitializer */ template -inline CommaInitializer DenseBase::operator<< (const Scalar& s) +EIGEN_DEVICE_FUNC inline CommaInitializer DenseBase::operator<< (const Scalar& s) { return CommaInitializer(*static_cast(this), s); } @@ -149,7 +149,7 @@ inline CommaInitializer DenseBase::operator<< (const Scalar& s /** \sa operator<<(const Scalar&) */ template template -inline CommaInitializer +EIGEN_DEVICE_FUNC inline CommaInitializer DenseBase::operator<<(const DenseBase& other) { return CommaInitializer(*static_cast(this), other); diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index a36765e39..e2f4980ec 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -74,7 +74,7 @@ class CwiseBinaryOpImpl; * \sa MatrixBase::binaryExpr(const MatrixBase &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp */ template -class CwiseBinaryOp : +class CwiseBinaryOp : public CwiseBinaryOpImpl< BinaryOp, LhsType, RhsType, typename internal::cwise_promote_storage_type::StorageKind, @@ -83,7 +83,7 @@ class CwiseBinaryOp : internal::no_assignment_operator { public: - + typedef typename internal::remove_all::type Functor; typedef typename internal::remove_all::type Lhs; typedef typename internal::remove_all::type Rhs; @@ -158,7 +158,7 @@ public: */ template template -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & MatrixBase::operator-=(const MatrixBase &other) { call_assignment(derived(), other.derived(), internal::sub_assign_op()); @@ -171,7 +171,7 @@ MatrixBase::operator-=(const MatrixBase &other) */ template template -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & MatrixBase::operator+=(const MatrixBase& other) { call_assignment(derived(), other.derived(), internal::add_assign_op()); diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 006b9f444..0c157ae0a 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -126,12 +126,12 @@ DenseBase::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f * * Here is an example with C++11 random generators: \include random_cpp11.cpp * Output: \verbinclude random_cpp11.out - * + * * \sa class CwiseNullaryOp */ template template -EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> DenseBase::NullaryExpr(Index size, const CustomNullaryOp& func) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -170,7 +170,7 @@ DenseBase::NullaryExpr(const CustomNullaryOp& func) * \sa class CwiseNullaryOp */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Constant(Index rows, Index cols, const Scalar& value) { return DenseBase::NullaryExpr(rows, cols, internal::scalar_constant_op(value)); diff --git a/Eigen/src/Core/Diagonal.h b/Eigen/src/Core/Diagonal.h index afcaf3575..387eec980 100644 --- a/Eigen/src/Core/Diagonal.h +++ b/Eigen/src/Core/Diagonal.h @@ -11,7 +11,7 @@ #ifndef EIGEN_DIAGONAL_H #define EIGEN_DIAGONAL_H -namespace Eigen { +namespace Eigen { /** \class Diagonal * \ingroup Core_Module @@ -149,8 +149,8 @@ template class Diagonal } EIGEN_DEVICE_FUNC - inline const typename internal::remove_all::type& - nestedExpression() const + inline const typename internal::remove_all::type& + nestedExpression() const { return m_matrix; } @@ -187,7 +187,7 @@ template class Diagonal * * \sa class Diagonal */ template -inline typename MatrixBase::DiagonalReturnType +EIGEN_DEVICE_FUNC inline typename MatrixBase::DiagonalReturnType MatrixBase::diagonal() { return DiagonalReturnType(derived()); @@ -195,7 +195,7 @@ MatrixBase::diagonal() /** This is the const version of diagonal(). */ template -inline typename MatrixBase::ConstDiagonalReturnType +EIGEN_DEVICE_FUNC inline typename MatrixBase::ConstDiagonalReturnType MatrixBase::diagonal() const { return ConstDiagonalReturnType(derived()); @@ -213,7 +213,7 @@ MatrixBase::diagonal() const * * \sa MatrixBase::diagonal(), class Diagonal */ template -inline typename MatrixBase::DiagonalDynamicIndexReturnType +EIGEN_DEVICE_FUNC inline typename MatrixBase::DiagonalDynamicIndexReturnType MatrixBase::diagonal(Index index) { return DiagonalDynamicIndexReturnType(derived(), index); @@ -221,7 +221,7 @@ MatrixBase::diagonal(Index index) /** This is the const version of diagonal(Index). */ template -inline typename MatrixBase::ConstDiagonalDynamicIndexReturnType +EIGEN_DEVICE_FUNC inline typename MatrixBase::ConstDiagonalDynamicIndexReturnType MatrixBase::diagonal(Index index) const { return ConstDiagonalDynamicIndexReturnType(derived(), index); @@ -240,7 +240,7 @@ MatrixBase::diagonal(Index index) const * \sa MatrixBase::diagonal(), class Diagonal */ template template -inline typename MatrixBase::template DiagonalIndexReturnType::Type +EIGEN_DEVICE_FUNC inline typename MatrixBase::template DiagonalIndexReturnType::Type MatrixBase::diagonal() { return typename DiagonalIndexReturnType::Type(derived()); @@ -249,7 +249,7 @@ MatrixBase::diagonal() /** This is the const version of diagonal(). */ template template -inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type +EIGEN_DEVICE_FUNC inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type MatrixBase::diagonal() const { return typename ConstDiagonalIndexReturnType::Type(derived()); diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index ecfdce8ef..4c80686d9 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -11,7 +11,7 @@ #ifndef EIGEN_DIAGONALMATRIX_H #define EIGEN_DIAGONALMATRIX_H -namespace Eigen { +namespace Eigen { #ifndef EIGEN_PARSED_BY_DOXYGEN template @@ -44,7 +44,7 @@ class DiagonalBase : public EigenBase EIGEN_DEVICE_FUNC DenseMatrixType toDenseMatrix() const { return derived(); } - + EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); } EIGEN_DEVICE_FUNC @@ -70,7 +70,7 @@ class DiagonalBase : public EigenBase { return InverseReturnType(diagonal().cwiseInverse()); } - + EIGEN_DEVICE_FUNC inline const DiagonalWrapper operator*(const Scalar& scalar) const @@ -273,7 +273,7 @@ class DiagonalWrapper * \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal() **/ template -inline const DiagonalWrapper +EIGEN_DEVICE_FUNC inline const DiagonalWrapper MatrixBase::asDiagonal() const { return DiagonalWrapper(derived()); @@ -318,20 +318,20 @@ template<> struct AssignmentKind { typedef Diagonal2De template< typename DstXprType, typename SrcXprType, typename Functor> struct Assignment { - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { Index dstRows = src.rows(); Index dstCols = src.cols(); if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) dst.resize(dstRows, dstCols); - + dst.setZero(); dst.diagonal() = src.diagonal(); } - + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) { dst.diagonal() += src.diagonal(); } - + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) { dst.diagonal() -= src.diagonal(); } }; diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 24d7bb80d..03b6bff16 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -10,7 +10,7 @@ #ifndef EIGEN_DOT_H #define EIGEN_DOT_H -namespace Eigen { +namespace Eigen { namespace internal { @@ -78,7 +78,7 @@ MatrixBase::dot(const MatrixBase& other) const typedef internal::scalar_conj_product_op func; EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar); #endif - + eigen_assert(size() == other.size()); return internal::dot_nocheck::run(*this, other); @@ -93,7 +93,7 @@ MatrixBase::dot(const MatrixBase& other) const * \sa dot(), norm(), lpNorm() */ template -EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real MatrixBase::squaredNorm() const +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real MatrixBase::squaredNorm() const { return numext::real((*this).cwiseAbs2().sum()); } @@ -105,7 +105,7 @@ EIGEN_STRONG_INLINE typename NumTraits::Scala * \sa lpNorm(), dot(), squaredNorm() */ template -EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real MatrixBase::norm() const +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real MatrixBase::norm() const { return numext::sqrt(squaredNorm()); } @@ -120,7 +120,7 @@ EIGEN_STRONG_INLINE typename NumTraits::Scala * \sa norm(), normalize() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::PlainObject +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::PlainObject MatrixBase::normalized() const { typedef typename internal::nested_eval::type _Nested; @@ -142,7 +142,7 @@ MatrixBase::normalized() const * \sa norm(), normalized() */ template -EIGEN_STRONG_INLINE void MatrixBase::normalize() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void MatrixBase::normalize() { RealScalar z = squaredNorm(); // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU @@ -163,7 +163,7 @@ EIGEN_STRONG_INLINE void MatrixBase::normalize() * \sa stableNorm(), stableNormalize(), normalized() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::PlainObject +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::PlainObject MatrixBase::stableNormalized() const { typedef typename internal::nested_eval::type _Nested; @@ -188,7 +188,7 @@ MatrixBase::stableNormalized() const * \sa stableNorm(), stableNormalized(), normalize() */ template -EIGEN_STRONG_INLINE void MatrixBase::stableNormalize() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void MatrixBase::stableNormalize() { RealScalar w = cwiseAbs().maxCoeff(); RealScalar z = (derived()/w).squaredNorm(); @@ -260,9 +260,9 @@ struct lpNorm_selector template template #ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename NumTraits::Scalar>::Real +EIGEN_DEVICE_FUNC inline typename NumTraits::Scalar>::Real #else -MatrixBase::RealScalar +EIGEN_DEVICE_FUNC MatrixBase::RealScalar #endif MatrixBase::lpNorm() const { diff --git a/Eigen/src/Core/Fuzzy.h b/Eigen/src/Core/Fuzzy.h index 3e403a09d..d848dae04 100644 --- a/Eigen/src/Core/Fuzzy.h +++ b/Eigen/src/Core/Fuzzy.h @@ -11,7 +11,7 @@ #ifndef EIGEN_FUZZY_H #define EIGEN_FUZZY_H -namespace Eigen { +namespace Eigen { namespace internal { @@ -100,7 +100,7 @@ struct isMuchSmallerThan_scalar_selector */ template template -bool DenseBase::isApprox( +EIGEN_DEVICE_FUNC bool DenseBase::isApprox( const DenseBase& other, const RealScalar& prec ) const @@ -122,7 +122,7 @@ bool DenseBase::isApprox( * \sa isApprox(), isMuchSmallerThan(const DenseBase&, RealScalar) const */ template -bool DenseBase::isMuchSmallerThan( +EIGEN_DEVICE_FUNC bool DenseBase::isMuchSmallerThan( const typename NumTraits::Real& other, const RealScalar& prec ) const @@ -142,7 +142,7 @@ bool DenseBase::isMuchSmallerThan( */ template template -bool DenseBase::isMuchSmallerThan( +EIGEN_DEVICE_FUNC bool DenseBase::isMuchSmallerThan( const DenseBase& other, const RealScalar& prec ) const diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 6f0cc80e9..588356306 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -207,12 +207,12 @@ template<> struct gemv_dense_selector typedef typename Rhs::Scalar RhsScalar; typedef typename Dest::Scalar ResScalar; typedef typename Dest::RealScalar RealScalar; - + typedef internal::blas_traits LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; typedef internal::blas_traits RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; - + typedef Map, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits::size)> MappedDest; ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); @@ -300,7 +300,7 @@ template<> struct gemv_dense_selector typedef typename Lhs::Scalar LhsScalar; typedef typename Rhs::Scalar RhsScalar; typedef typename Dest::Scalar ResScalar; - + typedef internal::blas_traits LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; typedef internal::blas_traits RhsBlasTraits; @@ -386,7 +386,7 @@ template<> struct gemv_dense_selector */ template template -inline const Product +EIGEN_DEVICE_FUNC inline const Product MatrixBase::operator*(const MatrixBase &other) const { // A note regarding the function declaration: In MSVC, this function will sometimes @@ -428,7 +428,7 @@ MatrixBase::operator*(const MatrixBase &other) const */ template template -const Product +EIGEN_DEVICE_FUNC const Product MatrixBase::lazyProduct(const MatrixBase &other) const { enum { diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 4cf4120af..a6878bb5a 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -237,7 +237,7 @@ ploaddup(const typename unpacket_traits::type* from) { return *from; } * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]} * Currently, this function is only used in matrix products. - * For packet-size smaller or equal to 4, this function is equivalent to pload1 + * For packet-size smaller or equal to 4, this function is equivalent to pload1 */ template EIGEN_DEVICE_FUNC inline Packet ploadquad(const typename unpacket_traits::type* from) @@ -359,77 +359,77 @@ template EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet ***************************/ /** \internal \returns the sine of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet& a) { using std::sin; return sin(a); } /** \internal \returns the cosine of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet& a) { using std::cos; return cos(a); } /** \internal \returns the tan of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptan(const Packet& a) { using std::tan; return tan(a); } /** \internal \returns the arc sine of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin(const Packet& a) { using std::asin; return asin(a); } /** \internal \returns the arc cosine of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos(const Packet& a) { using std::acos; return acos(a); } /** \internal \returns the arc tangent of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan(const Packet& a) { using std::atan; return atan(a); } /** \internal \returns the hyperbolic sine of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psinh(const Packet& a) { using std::sinh; return sinh(a); } /** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); } /** \internal \returns the hyperbolic tan of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); } /** \internal \returns the exp of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) { using std::exp; return exp(a); } /** \internal \returns the log of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) { using std::log; return log(a); } /** \internal \returns the log1p of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet& a) { return numext::log1p(a); } /** \internal \returns the log10 of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet& a) { using std::log10; return log10(a); } /** \internal \returns the square-root of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); } /** \internal \returns the reciprocal square-root of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet prsqrt(const Packet& a) { return pdiv(pset1(1), psqrt(a)); } /** \internal \returns the rounded value of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pround(const Packet& a) { using numext::round; return round(a); } /** \internal \returns the floor of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pfloor(const Packet& a) { using numext::floor; return floor(a); } /** \internal \returns the ceil of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } /*************************************************************************** @@ -494,14 +494,14 @@ struct palign_impl /** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements * of \a first and \a Offset first elements of \a second. - * + * * This function is currently only used to optimize matrix-vector products on unligned matrices. * It takes 2 packets that represent a contiguous memory array, and returns a packet starting * at the position \a Offset. For instance, for packets of 4 elements, we have: * Input: * - first = {f0,f1,f2,f3} * - second = {s0,s1,s2,s3} - * Output: + * Output: * - if Offset==0 then {f0,f1,f2,f3} * - if Offset==1 then {f1,f2,f3,s0} * - if Offset==2 then {f2,f3,s0,s1} diff --git a/Eigen/src/Core/NestByValue.h b/Eigen/src/Core/NestByValue.h index 13adf070e..1fab7a5b6 100644 --- a/Eigen/src/Core/NestByValue.h +++ b/Eigen/src/Core/NestByValue.h @@ -99,7 +99,7 @@ template class NestByValue /** \returns an expression of the temporary version of *this. */ template -inline const NestByValue +EIGEN_DEVICE_FUNC inline const NestByValue DenseBase::nestByValue() const { return NestByValue(derived()); diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index bce1310c9..efe48d59a 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -14,7 +14,7 @@ #define EIGEN_PRODUCTEVALUATORS_H namespace Eigen { - + namespace internal { /** \internal @@ -22,19 +22,19 @@ namespace internal { * Since products require special treatments to handle all possible cases, * we simply deffer the evaluation logic to a product_evaluator class * which offers more partial specialization possibilities. - * + * * \sa class product_evaluator */ template -struct evaluator > +struct evaluator > : public product_evaluator > { typedef Product XprType; typedef product_evaluator Base; - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {} }; - + // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" // TODO we should apply that rule only if that's really helpful template @@ -62,12 +62,12 @@ struct evaluator, template -struct evaluator, DiagIndex> > +struct evaluator, DiagIndex> > : public evaluator, DiagIndex> > { typedef Diagonal, DiagIndex> XprType; typedef evaluator, DiagIndex> > Base; - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(Diagonal, DiagIndex>( Product(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), @@ -108,23 +108,23 @@ struct product_evaluator, ProductTag, LhsShape, RhsSh : m_result(xpr.rows(), xpr.cols()) { ::new (static_cast(this)) Base(m_result); - + // FIXME shall we handle nested_eval here?, // if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.) // typedef typename internal::nested_eval::type LhsNested; // typedef typename internal::nested_eval::type RhsNested; // typedef typename internal::remove_all::type LhsNestedCleaned; // typedef typename internal::remove_all::type RhsNestedCleaned; -// +// // const LhsNested lhs(xpr.lhs()); // const RhsNested rhs(xpr.rhs()); -// +// // generic_product_impl::evalTo(m_result, lhs, rhs); generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); } - -protected: + +protected: PlainObject m_result; }; @@ -137,7 +137,7 @@ struct Assignment, internal::assign_op::type> { typedef Product SrcXprType; - static EIGEN_STRONG_INLINE + static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { Index dstRows = src.rows(); @@ -155,7 +155,7 @@ struct Assignment, internal::add_assign_op< typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> { typedef Product SrcXprType; - static EIGEN_STRONG_INLINE + static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); @@ -170,7 +170,7 @@ struct Assignment, internal::sub_assign_op< typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> { typedef Product SrcXprType; - static EIGEN_STRONG_INLINE + static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); @@ -190,7 +190,7 @@ struct Assignment, const CwiseNullaryOp,Plain>, const Product > SrcXprType; - static EIGEN_STRONG_INLINE + static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) { call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func); @@ -250,13 +250,13 @@ struct generic_product_impl { dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); } - + template static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); } - + template static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } @@ -298,7 +298,7 @@ struct generic_product_impl { template struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; typedef typename Product::Scalar Scalar; - + // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose struct set { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; struct add { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; @@ -310,31 +310,31 @@ struct generic_product_impl dst.const_cast_derived() += m_scale * src; } }; - + template static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major()); } - + template static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major()); } - + template static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major()); } - + template static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major()); } - + }; @@ -343,7 +343,7 @@ template struct generic_product_impl_base { typedef typename Product::Scalar Scalar; - + template static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } @@ -355,7 +355,7 @@ struct generic_product_impl_base template static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } - + template static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } @@ -385,12 +385,12 @@ struct generic_product_impl }; template -struct generic_product_impl +struct generic_product_impl { typedef typename Product::Scalar Scalar; - + template - static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // Same as: dst.noalias() = lhs.lazyProduct(rhs); // but easier on the compiler side @@ -403,7 +403,7 @@ struct generic_product_impl // dst.noalias() += lhs.lazyProduct(rhs); call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); } - + template static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { @@ -435,8 +435,8 @@ struct generic_product_impl { call_assignment_no_alias(dst, lhs.lazyProduct(rhs), func); } - - + + // template // static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) // { dst.noalias() += alpha * lhs.lazyProduct(rhs); } @@ -497,7 +497,7 @@ struct product_evaluator, ProductTag, DenseShape, typedef typename internal::nested_eval::type LhsNested; typedef typename internal::nested_eval::type RhsNested; - + typedef typename internal::remove_all::type LhsNestedCleaned; typedef typename internal::remove_all::type RhsNestedCleaned; @@ -516,7 +516,7 @@ struct product_evaluator, ProductTag, DenseShape, typedef typename find_best_packet::type RhsVecPacketType; enum { - + LhsCoeffReadCost = LhsEtorType::CoeffReadCost, RhsCoeffReadCost = RhsEtorType::CoeffReadCost, CoeffReadCost = InnerSize==0 ? NumTraits::ReadCost @@ -525,10 +525,10 @@ struct product_evaluator, ProductTag, DenseShape, + (InnerSize - 1) * NumTraits::AddCost, Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, - + LhsFlags = LhsEtorType::Flags, RhsFlags = RhsEtorType::Flags, - + LhsRowMajor = LhsFlags & RowMajorBit, RhsRowMajor = RhsFlags & RowMajorBit, @@ -538,7 +538,7 @@ struct product_evaluator, ProductTag, DenseShape, // Here, we don't care about alignment larger than the usable packet size. LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))), RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))), - + SameType = is_same::value, CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1), @@ -553,7 +553,7 @@ struct product_evaluator, ProductTag, DenseShape, // TODO enable vectorization for mixed types | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), - + LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), @@ -572,7 +572,7 @@ struct product_evaluator, ProductTag, DenseShape, && (LhsFlags & RhsFlags & ActualPacketAccessBit) && (InnerSize % packet_traits::size == 0) }; - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const { return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); @@ -611,7 +611,7 @@ struct product_evaluator, ProductTag, DenseShape, protected: typename internal::add_const_on_value_type::type m_lhs; typename internal::add_const_on_value_type::type m_rhs; - + LhsEtorType m_lhsImpl; RhsEtorType m_rhsImpl; @@ -730,7 +730,7 @@ struct generic_product_impl : generic_product_impl_base > { typedef typename Product::Scalar Scalar; - + template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { @@ -744,7 +744,7 @@ struct generic_product_impl : generic_product_impl_base > { typedef typename Product::Scalar Scalar; - + template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { @@ -765,7 +765,7 @@ struct generic_product_impl : generic_product_impl_base > { typedef typename Product::Scalar Scalar; - + template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { @@ -778,7 +778,7 @@ struct generic_product_impl : generic_product_impl_base > { typedef typename Product::Scalar Scalar; - + template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { @@ -790,7 +790,7 @@ struct generic_product_impl /*************************************************************************** * Diagonal products ***************************************************************************/ - + template struct diagonal_product_evaluator_base : evaluator_base @@ -799,7 +799,7 @@ struct diagonal_product_evaluator_base public: enum { CoeffReadCost = NumTraits::MulCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost, - + MatrixFlags = evaluator::Flags, DiagFlags = evaluator::Flags, _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor, @@ -817,14 +817,14 @@ public: || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft) || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight) }; - + diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) : m_diagImpl(diag), m_matImpl(mat) { EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const { if(AsScalarProduct) @@ -832,7 +832,7 @@ public: else return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); } - + protected: template EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const @@ -840,7 +840,7 @@ protected: return internal::pmul(m_matImpl.template packet(row, col), internal::pset1(m_diagImpl.coeff(id))); } - + template EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const { @@ -851,7 +851,7 @@ protected: return internal::pmul(m_matImpl.template packet(row, col), m_diagImpl.template packet(id)); } - + evaluator m_diagImpl; evaluator m_matImpl; }; @@ -866,10 +866,10 @@ struct product_evaluator, ProductTag, DiagonalSha using Base::m_matImpl; using Base::coeff; typedef typename Base::Scalar Scalar; - + typedef Product XprType; typedef typename XprType::PlainObject PlainObject; - + enum { StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; @@ -878,12 +878,12 @@ struct product_evaluator, ProductTag, DiagonalSha : Base(xpr.rhs(), xpr.lhs().diagonal()) { } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const { return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col); } - + #ifndef __CUDACC__ template EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const @@ -893,7 +893,7 @@ struct product_evaluator, ProductTag, DiagonalSha return this->template packet_impl(row,col, row, typename internal::conditional::type()); } - + template EIGEN_STRONG_INLINE PacketType packet(Index idx) const { @@ -912,22 +912,22 @@ struct product_evaluator, ProductTag, DenseShape, using Base::m_matImpl; using Base::coeff; typedef typename Base::Scalar Scalar; - + typedef Product XprType; typedef typename XprType::PlainObject PlainObject; - + enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) { } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const { return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col); } - + #ifndef __CUDACC__ template EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const @@ -935,7 +935,7 @@ struct product_evaluator, ProductTag, DenseShape, return this->template packet_impl(row,col, col, typename internal::conditional::type()); } - + template EIGEN_STRONG_INLINE PacketType packet(Index idx) const { @@ -1017,7 +1017,7 @@ template struct generic_product_impl { template - static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) { permutation_matrix_product::run(dst, lhs, rhs); } @@ -1027,7 +1027,7 @@ template struct generic_product_impl { template - static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) { permutation_matrix_product::run(dst, rhs, lhs); } @@ -1037,7 +1037,7 @@ template struct generic_product_impl, Rhs, PermutationShape, MatrixShape, ProductTag> { template - static void evalTo(Dest& dst, const Inverse& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC void evalTo(Dest& dst, const Inverse& lhs, const Rhs& rhs) { permutation_matrix_product::run(dst, lhs.nestedExpression(), rhs); } @@ -1047,7 +1047,7 @@ template struct generic_product_impl, MatrixShape, PermutationShape, ProductTag> { template - static void evalTo(Dest& dst, const Lhs& lhs, const Inverse& rhs) + static EIGEN_DEVICE_FUNC void evalTo(Dest& dst, const Lhs& lhs, const Inverse& rhs) { permutation_matrix_product::run(dst, rhs.nestedExpression(), lhs); } @@ -1069,7 +1069,7 @@ struct transposition_matrix_product { typedef typename nested_eval::type MatrixType; typedef typename remove_all::type MatrixTypeCleaned; - + template static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr) { @@ -1094,7 +1094,7 @@ template struct generic_product_impl { template - static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) { transposition_matrix_product::run(dst, lhs, rhs); } @@ -1104,7 +1104,7 @@ template struct generic_product_impl { template - static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) { transposition_matrix_product::run(dst, rhs, lhs); } @@ -1115,7 +1115,7 @@ template struct generic_product_impl, Rhs, TranspositionsShape, MatrixShape, ProductTag> { template - static void evalTo(Dest& dst, const Transpose& lhs, const Rhs& rhs) + static EIGEN_DEVICE_FUNC void evalTo(Dest& dst, const Transpose& lhs, const Rhs& rhs) { transposition_matrix_product::run(dst, lhs.nestedExpression(), rhs); } @@ -1125,7 +1125,7 @@ template struct generic_product_impl, MatrixShape, TranspositionsShape, ProductTag> { template - static void evalTo(Dest& dst, const Lhs& lhs, const Transpose& rhs) + static EIGEN_DEVICE_FUNC void evalTo(Dest& dst, const Lhs& lhs, const Transpose& rhs) { transposition_matrix_product::run(dst, rhs.nestedExpression(), lhs); } diff --git a/Eigen/src/Core/Random.h b/Eigen/src/Core/Random.h index 6faf789c7..536a452c6 100644 --- a/Eigen/src/Core/Random.h +++ b/Eigen/src/Core/Random.h @@ -10,7 +10,7 @@ #ifndef EIGEN_RANDOM_H #define EIGEN_RANDOM_H -namespace Eigen { +namespace Eigen { namespace internal { @@ -29,16 +29,16 @@ struct functor_traits > * * Numbers are uniformly spread through their whole definition range for integer types, * and in the [-1:1] range for floating point scalar types. - * + * * The parameters \a rows and \a cols are the number of rows and of columns of * the returned matrix. Must be compatible with this MatrixBase type. * * \not_reentrant - * + * * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, * it is redundant to pass \a rows and \a cols as arguments, so Random() should be used * instead. - * + * * * Example: \include MatrixBase_random_int_int.cpp * Output: \verbinclude MatrixBase_random_int_int.out @@ -46,7 +46,7 @@ struct functor_traits > * This expression has the "evaluate before nesting" flag so that it will be evaluated into * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected * behavior with expressions involving random matrices. - * + * * See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators. * * \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random() @@ -93,7 +93,7 @@ DenseBase::Random(Index size) * * Numbers are uniformly spread through their whole definition range for integer types, * and in the [-1:1] range for floating point scalar types. - * + * * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you * need to use the variants taking size arguments. * @@ -103,7 +103,7 @@ DenseBase::Random(Index size) * This expression has the "evaluate before nesting" flag so that it will be evaluated into * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected * behavior with expressions involving random matrices. - * + * * \not_reentrant * * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index) @@ -119,16 +119,16 @@ DenseBase::Random() * * Numbers are uniformly spread through their whole definition range for integer types, * and in the [-1:1] range for floating point scalar types. - * + * * \not_reentrant - * + * * Example: \include MatrixBase_setRandom.cpp * Output: \verbinclude MatrixBase_setRandom.out * * \sa class CwiseNullaryOp, setRandom(Index), setRandom(Index,Index) */ template -inline Derived& DenseBase::setRandom() +EIGEN_DEVICE_FUNC inline Derived& DenseBase::setRandom() { return *this = Random(rows(), cols()); } @@ -137,7 +137,7 @@ inline Derived& DenseBase::setRandom() * * Numbers are uniformly spread through their whole definition range for integer types, * and in the [-1:1] range for floating point scalar types. - * + * * \only_for_vectors * \not_reentrant * @@ -160,7 +160,7 @@ PlainObjectBase::setRandom(Index newSize) * and in the [-1:1] range for floating point scalar types. * * \not_reentrant - * + * * \param rows the new number of rows * \param cols the new number of columns * diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 760e9f861..b4817eaff 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -11,7 +11,7 @@ #ifndef EIGEN_REDUX_H #define EIGEN_REDUX_H -namespace Eigen { +namespace Eigen { namespace internal { @@ -60,7 +60,7 @@ public: enum { Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling }; - + #ifdef EIGEN_DEBUG_ASSIGN static void debug() { @@ -128,7 +128,7 @@ template struct redux_novec_unroller { typedef typename Derived::Scalar Scalar; - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); } }; @@ -215,7 +215,7 @@ struct redux_impl static Scalar run(const Derived &mat, const Func& func) { const Index size = mat.size(); - + const Index packetSize = redux_traits::PacketSize; const int packetAlignment = unpacket_traits::alignment; enum { @@ -336,12 +336,12 @@ class redux_evaluator public: typedef _XprType XprType; EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} - + typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; typedef typename XprType::PacketReturnType PacketReturnType; - + enum { MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime, MaxColsAtCompileTime = XprType::MaxColsAtCompileTime, @@ -353,7 +353,7 @@ public: CoeffReadCost = evaluator::CoeffReadCost, Alignment = evaluator::Alignment }; - + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); } @@ -375,17 +375,17 @@ public: template PacketType packet(Index index) const { return m_evaluator.template packet(index); } - + EIGEN_DEVICE_FUNC CoeffReturnType coeffByOuterInner(Index outer, Index inner) const { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } - + template PacketType packetByOuterInner(Index outer, Index inner) const { return m_evaluator.template packet(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } - + const XprType & nestedExpression() const { return m_xpr; } - + protected: internal::evaluator m_evaluator; const XprType &m_xpr; @@ -407,14 +407,14 @@ protected: */ template template -EIGEN_STRONG_INLINE typename internal::traits::Scalar +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::redux(const Func& func) const { eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); typedef typename internal::redux_evaluator ThisEvaluator; ThisEvaluator thisEval(derived()); - + return internal::redux_impl::run(thisEval, func); } @@ -422,7 +422,7 @@ DenseBase::redux(const Func& func) const * \warning the result is undefined if \c *this contains NaN. */ template -EIGEN_STRONG_INLINE typename internal::traits::Scalar +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::minCoeff() const { return derived().redux(Eigen::internal::scalar_min_op()); @@ -432,7 +432,7 @@ DenseBase::minCoeff() const * \warning the result is undefined if \c *this contains NaN. */ template -EIGEN_STRONG_INLINE typename internal::traits::Scalar +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::maxCoeff() const { return derived().redux(Eigen::internal::scalar_max_op()); @@ -445,7 +445,7 @@ DenseBase::maxCoeff() const * \sa trace(), prod(), mean() */ template -EIGEN_STRONG_INLINE typename internal::traits::Scalar +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::sum() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) @@ -458,7 +458,7 @@ DenseBase::sum() const * \sa trace(), prod(), sum() */ template -EIGEN_STRONG_INLINE typename internal::traits::Scalar +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::mean() const { #ifdef __INTEL_COMPILER @@ -479,7 +479,7 @@ DenseBase::mean() const * \sa sum(), mean(), trace() */ template -EIGEN_STRONG_INLINE typename internal::traits::Scalar +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::prod() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) @@ -494,7 +494,7 @@ DenseBase::prod() const * \sa diagonal(), sum() */ template -EIGEN_STRONG_INLINE typename internal::traits::Scalar +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar MatrixBase::trace() const { return derived().diagonal().sum(); diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h index 9960ef884..4111da420 100644 --- a/Eigen/src/Core/Replicate.h +++ b/Eigen/src/Core/Replicate.h @@ -10,7 +10,7 @@ #ifndef EIGEN_REPLICATE_H #define EIGEN_REPLICATE_H -namespace Eigen { +namespace Eigen { namespace internal { template @@ -35,7 +35,7 @@ struct traits > IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1 : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0 : (MatrixType::Flags & RowMajorBit) ? 1 : 0, - + // FIXME enable DirectAccess with negative strides? Flags = IsRowMajor ? RowMajorBit : 0 }; @@ -95,8 +95,8 @@ template class Replicate EIGEN_DEVICE_FUNC const _MatrixTypeNested& nestedExpression() const - { - return m_matrix; + { + return m_matrix; } protected: @@ -115,7 +115,7 @@ template class Replicate */ template template -const Replicate +EIGEN_DEVICE_FUNC const Replicate DenseBase::replicate() const { return Replicate(derived()); @@ -130,7 +130,7 @@ DenseBase::replicate() const * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate */ template -const typename VectorwiseOp::ReplicateReturnType +EIGEN_DEVICE_FUNC const typename VectorwiseOp::ReplicateReturnType VectorwiseOp::replicate(Index factor) const { return typename VectorwiseOp::ReplicateReturnType diff --git a/Eigen/src/Core/ReturnByValue.h b/Eigen/src/Core/ReturnByValue.h index c44b7673b..090053bbb 100644 --- a/Eigen/src/Core/ReturnByValue.h +++ b/Eigen/src/Core/ReturnByValue.h @@ -79,7 +79,7 @@ template class ReturnByValue template template -Derived& DenseBase::operator=(const ReturnByValue& other) +EIGEN_DEVICE_FUNC Derived& DenseBase::operator=(const ReturnByValue& other) { other.evalTo(derived()); return derived(); @@ -90,7 +90,7 @@ namespace internal { // Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that // when a ReturnByValue expression is assigned, the evaluator is not constructed. // TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world - + template struct evaluator > : public evaluator::ReturnType> @@ -98,7 +98,7 @@ struct evaluator > typedef ReturnByValue XprType; typedef typename internal::traits::ReturnType PlainObject; typedef evaluator Base; - + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : m_result(xpr.rows(), xpr.cols()) { diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h index 0640cda2a..8b3f24020 100644 --- a/Eigen/src/Core/Reverse.h +++ b/Eigen/src/Core/Reverse.h @@ -12,7 +12,7 @@ #ifndef EIGEN_REVERSE_H #define EIGEN_REVERSE_H -namespace Eigen { +namespace Eigen { namespace internal { @@ -44,7 +44,7 @@ template struct reverse_packet_cond static inline PacketType run(const PacketType& x) { return x; } }; -} // end namespace internal +} // end namespace internal /** \class Reverse * \ingroup Core_Module @@ -98,7 +98,7 @@ template class Reverse } EIGEN_DEVICE_FUNC const typename internal::remove_all::type& - nestedExpression() const + nestedExpression() const { return m_matrix; } @@ -114,7 +114,7 @@ template class Reverse * */ template -inline typename DenseBase::ReverseReturnType +EIGEN_DEVICE_FUNC inline typename DenseBase::ReverseReturnType DenseBase::reverse() { return ReverseReturnType(derived()); @@ -136,7 +136,7 @@ DenseBase::reverse() * * \sa VectorwiseOp::reverseInPlace(), reverse() */ template -inline void DenseBase::reverseInPlace() +EIGEN_DEVICE_FUNC inline void DenseBase::reverseInPlace() { if(cols()>rows()) { @@ -161,7 +161,7 @@ inline void DenseBase::reverseInPlace() } namespace internal { - + template struct vectorwise_reverse_inplace_impl; @@ -201,7 +201,7 @@ struct vectorwise_reverse_inplace_impl * * \sa DenseBase::reverseInPlace(), reverse() */ template -void VectorwiseOp::reverseInPlace() +EIGEN_DEVICE_FUNC void VectorwiseOp::reverseInPlace() { internal::vectorwise_reverse_inplace_impl::run(_expression().const_cast_derived()); } diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index b2e51f37a..24f50b079 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h @@ -10,7 +10,7 @@ #ifndef EIGEN_SELFADJOINTMATRIX_H #define EIGEN_SELFADJOINTMATRIX_H -namespace Eigen { +namespace Eigen { /** \class SelfAdjointView * \ingroup Core_Module @@ -58,7 +58,7 @@ template class SelfAdjointView typedef MatrixTypeNestedCleaned NestedExpression; /** \brief The type of coefficients in this matrix */ - typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::Scalar Scalar; typedef typename MatrixType::StorageIndex StorageIndex; typedef typename internal::remove_all::type MatrixConjugateReturnType; @@ -131,7 +131,7 @@ template class SelfAdjointView { return Product(lhs.derived(),rhs); } - + friend EIGEN_DEVICE_FUNC const SelfAdjointView operator*(const Scalar& s, const SelfAdjointView& mat) @@ -287,17 +287,17 @@ protected: using Base::m_src; using Base::m_functor; public: - + typedef typename Base::DstEvaluatorType DstEvaluatorType; typedef typename Base::SrcEvaluatorType SrcEvaluatorType; typedef typename Base::Scalar Scalar; typedef typename Base::AssignmentTraits AssignmentTraits; - - + + EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) : Base(dst, src, func, dstExpr) {} - + EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col) { eigen_internal_assert(row!=col); @@ -305,12 +305,12 @@ public: m_functor.assignCoeff(m_dst.coeffRef(row,col), tmp); m_functor.assignCoeff(m_dst.coeffRef(col,row), numext::conj(tmp)); } - + EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id) { Base::assignCoeff(id,id); } - + EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index, Index) { eigen_internal_assert(false && "should never be called"); } }; @@ -324,7 +324,7 @@ public: /** This is the const version of MatrixBase::selfadjointView() */ template template -typename MatrixBase::template ConstSelfAdjointViewReturnType::Type +EIGEN_DEVICE_FUNC typename MatrixBase::template ConstSelfAdjointViewReturnType::Type MatrixBase::selfadjointView() const { return typename ConstSelfAdjointViewReturnType::Type(derived()); @@ -341,7 +341,7 @@ MatrixBase::selfadjointView() const */ template template -typename MatrixBase::template SelfAdjointViewReturnType::Type +EIGEN_DEVICE_FUNC typename MatrixBase::template SelfAdjointViewReturnType::Type MatrixBase::selfadjointView() { return typename SelfAdjointViewReturnType::Type(derived()); diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h index a8daea511..169214bb2 100644 --- a/Eigen/src/Core/Solve.h +++ b/Eigen/src/Core/Solve.h @@ -13,7 +13,7 @@ namespace Eigen { template class SolveImpl; - + /** \class Solve * \ingroup Core_Module * @@ -64,11 +64,11 @@ class Solve : public SolveImpl::PlainObject PlainObject; typedef typename internal::traits::StorageIndex StorageIndex; - + Solve(const Decomposition &dec, const RhsType &rhs) : m_dec(dec), m_rhs(rhs) {} - + EIGEN_DEVICE_FUNC Index rows() const { return m_dec.cols(); } EIGEN_DEVICE_FUNC Index cols() const { return m_rhs.cols(); } @@ -87,14 +87,14 @@ class SolveImpl : public MatrixBase > { typedef Solve Derived; - + public: - + typedef MatrixBase > Base; EIGEN_DENSE_PUBLIC_INTERFACE(Derived) private: - + Scalar coeff(Index row, Index col) const; Scalar coeff(Index i) const; }; @@ -119,15 +119,15 @@ struct evaluator > typedef evaluator Base; enum { Flags = Base::Flags | EvalBeforeNestingBit }; - + EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve) : m_result(solve.rows(), solve.cols()) { ::new (static_cast(this)) Base(m_result); solve.dec()._solve_impl(solve.rhs(), m_result); } - -protected: + +protected: PlainObject m_result; }; @@ -137,7 +137,7 @@ template, internal::assign_op, Dense2Dense> { typedef Solve SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { Index dstRows = src.rows(); Index dstCols = src.cols(); @@ -153,7 +153,7 @@ template,RhsType>, internal::assign_op, Dense2Dense> { typedef Solve,RhsType> SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { Index dstRows = src.rows(); Index dstCols = src.cols(); @@ -170,13 +170,13 @@ struct Assignment, Dense2Dense> { typedef Solve, const Transpose >,RhsType> SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { Index dstRows = src.rows(); Index dstCols = src.cols(); if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) dst.resize(dstRows, dstCols); - + src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed(src.rhs(), dst); } }; diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index fd0acb1a5..ced5c9fc0 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -10,7 +10,7 @@ #ifndef EIGEN_SOLVETRIANGULAR_H #define EIGEN_SOLVETRIANGULAR_H -namespace Eigen { +namespace Eigen { namespace internal { @@ -64,7 +64,7 @@ struct triangular_solver_selector ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhs,rhs.size(), (useRhsDirectly ? rhs.data() : 0)); - + if(!useRhsDirectly) MappedRhs(actualRhs,rhs.size()) = rhs; @@ -148,7 +148,7 @@ struct triangular_solver_selector { { Transpose trLhs(lhs); Transpose trRhs(rhs); - + triangular_solver_unroller,Transpose, ((Mode&Upper)==Upper ? Lower : Upper) | (Mode&UnitDiag), 0,Rhs::SizeAtCompileTime>::run(trLhs,trRhs); @@ -164,7 +164,7 @@ struct triangular_solver_selector { #ifndef EIGEN_PARSED_BY_DOXYGEN template template -void TriangularViewImpl::solveInPlace(const MatrixBase& _other) const +EIGEN_DEVICE_FUNC void TriangularViewImpl::solveInPlace(const MatrixBase& _other) const { OtherDerived& other = _other.const_cast_derived(); eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) ); @@ -187,7 +187,7 @@ void TriangularViewImpl::solveInPlace(const MatrixBase template -const internal::triangular_solve_retval,Other> +EIGEN_DEVICE_FUNC const internal::triangular_solve_retval,Other> TriangularViewImpl::solve(const MatrixBase& other) const { return internal::triangular_solve_retval(derived(), other.derived()); diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index 960dc4510..cec1be7c9 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -11,7 +11,7 @@ #ifndef EIGEN_TRANSPOSE_H #define EIGEN_TRANSPOSE_H -namespace Eigen { +namespace Eigen { namespace internal { template @@ -170,7 +170,7 @@ template class TransposeImpl * * \sa transposeInPlace(), adjoint() */ template -inline Transpose +EIGEN_DEVICE_FUNC inline Transpose DenseBase::transpose() { return TransposeReturnType(derived()); @@ -182,7 +182,7 @@ DenseBase::transpose() * * \sa transposeInPlace(), adjoint() */ template -inline typename DenseBase::ConstTransposeReturnType +EIGEN_DEVICE_FUNC inline typename DenseBase::ConstTransposeReturnType DenseBase::transpose() const { return ConstTransposeReturnType(derived()); @@ -208,7 +208,7 @@ DenseBase::transpose() const * * \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class internal::scalar_conjugate_op */ template -inline const typename MatrixBase::AdjointReturnType +EIGEN_DEVICE_FUNC inline const typename MatrixBase::AdjointReturnType MatrixBase::adjoint() const { return AdjointReturnType(this->transpose()); @@ -278,12 +278,12 @@ struct inplace_transpose_selector { // non squ * Notice however that this method is only useful if you want to replace a matrix by its own transpose. * If you just need the transpose of a matrix, use transpose(). * - * \note if the matrix is not square, then \c *this must be a resizable matrix. + * \note if the matrix is not square, then \c *this must be a resizable matrix. * This excludes (non-square) fixed-size matrices, block-expressions and maps. * * \sa transpose(), adjoint(), adjointInPlace() */ template -inline void DenseBase::transposeInPlace() +EIGEN_DEVICE_FUNC inline void DenseBase::transposeInPlace() { eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic)) && "transposeInPlace() called on a non-square non-resizable matrix"); @@ -314,7 +314,7 @@ inline void DenseBase::transposeInPlace() * * \sa transpose(), adjoint(), transposeInPlace() */ template -inline void MatrixBase::adjointInPlace() +EIGEN_DEVICE_FUNC inline void MatrixBase::adjointInPlace() { derived() = adjoint().eval(); } diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 9abb7e31a..3161482e8 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -11,12 +11,12 @@ #ifndef EIGEN_TRIANGULARMATRIX_H #define EIGEN_TRIANGULARMATRIX_H -namespace Eigen { +namespace Eigen { namespace internal { - + template struct triangular_solve_retval; - + } /** \class TriangularBase @@ -34,16 +34,16 @@ template class TriangularBase : public EigenBase ColsAtCompileTime = internal::traits::ColsAtCompileTime, MaxRowsAtCompileTime = internal::traits::MaxRowsAtCompileTime, MaxColsAtCompileTime = internal::traits::MaxColsAtCompileTime, - + SizeAtCompileTime = (internal::size_at_compile_time::RowsAtCompileTime, internal::traits::ColsAtCompileTime>::ret), /**< This is equal to the number of coefficients, i.e. the number of * rows times the number of columns, or to \a Dynamic if this is not * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */ - + MaxSizeAtCompileTime = (internal::size_at_compile_time::MaxRowsAtCompileTime, internal::traits::MaxColsAtCompileTime>::ret) - + }; typedef typename internal::traits::Scalar Scalar; typedef typename internal::traits::StorageKind StorageKind; @@ -63,7 +63,7 @@ template class TriangularBase : public EigenBase inline Index outerStride() const { return derived().outerStride(); } EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().innerStride(); } - + // dummy resize function void resize(Index rows, Index cols) { @@ -155,7 +155,7 @@ template class TriangularBase : public EigenBase * \param MatrixType the type of the object in which we are taking the triangular part * \param Mode the kind of triangular matrix expression to construct. Can be #Upper, * #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower. - * This is in fact a bit field; it must have either #Upper or #Lower, + * This is in fact a bit field; it must have either #Upper or #Lower, * and additionally it may have #UnitDiag or #ZeroDiag or neither. * * This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular @@ -197,7 +197,7 @@ template class TriangularView typedef typename internal::traits::MatrixTypeNestedNonRef MatrixTypeNestedNonRef; typedef typename internal::remove_all::type MatrixConjugateReturnType; - + public: typedef typename internal::traits::StorageKind StorageKind; @@ -216,7 +216,7 @@ template class TriangularView EIGEN_DEVICE_FUNC explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix) {} - + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TriangularView) /** \copydoc EigenBase::rows() */ @@ -233,7 +233,7 @@ template class TriangularView /** \returns a reference to the nested expression */ EIGEN_DEVICE_FUNC NestedExpression& nestedExpression() { return m_matrix; } - + typedef TriangularView ConjugateReturnType; /** \sa MatrixBase::conjugate() const */ EIGEN_DEVICE_FUNC @@ -255,7 +255,7 @@ template class TriangularView typename MatrixType::TransposeReturnType tmp(m_matrix); return TransposeReturnType(tmp); } - + typedef TriangularView ConstTransposeReturnType; /** \sa MatrixBase::transpose() const */ EIGEN_DEVICE_FUNC @@ -266,10 +266,10 @@ template class TriangularView template EIGEN_DEVICE_FUNC - inline const Solve + inline const Solve solve(const MatrixBase& other) const { return Solve(*this, other.derived()); } - + // workaround MSVC ICE #if EIGEN_COMP_MSVC template @@ -313,7 +313,7 @@ template class TriangularView else return m_matrix.diagonal().prod(); } - + protected: MatrixTypeNested m_matrix; @@ -375,7 +375,7 @@ template class TriangularViewImpl<_Mat internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); return derived(); } - + /** \sa MatrixBase::operator*=() */ EIGEN_DEVICE_FUNC TriangularViewType& operator*=(const typename internal::traits::Scalar& other) { return *this = derived().nestedExpression() * other; } @@ -556,7 +556,7 @@ template class TriangularViewImpl<_Mat // FIXME should we keep that possibility template template -inline TriangularView& +EIGEN_DEVICE_FUNC inline TriangularView& TriangularViewImpl::operator=(const MatrixBase& other) { internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op()); @@ -566,7 +566,7 @@ TriangularViewImpl::operator=(const MatrixBase template -void TriangularViewImpl::lazyAssign(const MatrixBase& other) +EIGEN_DEVICE_FUNC void TriangularViewImpl::lazyAssign(const MatrixBase& other) { internal::call_assignment_no_alias(derived(), other.template triangularView()); } @@ -575,7 +575,7 @@ void TriangularViewImpl::lazyAssign(const MatrixBase template -inline TriangularView& +EIGEN_DEVICE_FUNC inline TriangularView& TriangularViewImpl::operator=(const TriangularBase& other) { eigen_assert(Mode == int(OtherDerived::Mode)); @@ -585,7 +585,7 @@ TriangularViewImpl::operator=(const TriangularBase template -void TriangularViewImpl::lazyAssign(const TriangularBase& other) +EIGEN_DEVICE_FUNC void TriangularViewImpl::lazyAssign(const TriangularBase& other) { eigen_assert(Mode == int(OtherDerived::Mode)); internal::call_assignment_no_alias(derived(), other.derived()); @@ -600,7 +600,7 @@ void TriangularViewImpl::lazyAssign(const TriangularBas * If the matrix is triangular, the opposite part is set to zero. */ template template -void TriangularBase::evalTo(MatrixBase &other) const +EIGEN_DEVICE_FUNC void TriangularBase::evalTo(MatrixBase &other) const { evalToLazy(other.derived()); } @@ -626,7 +626,7 @@ void TriangularBase::evalTo(MatrixBase &other) const */ template template -typename MatrixBase::template TriangularViewReturnType::Type +EIGEN_DEVICE_FUNC typename MatrixBase::template TriangularViewReturnType::Type MatrixBase::triangularView() { return typename TriangularViewReturnType::Type(derived()); @@ -635,7 +635,7 @@ MatrixBase::triangularView() /** This is the const version of MatrixBase::triangularView() */ template template -typename MatrixBase::template ConstTriangularViewReturnType::Type +EIGEN_DEVICE_FUNC typename MatrixBase::template ConstTriangularViewReturnType::Type MatrixBase::triangularView() const { return typename ConstTriangularViewReturnType::Type(derived()); @@ -700,7 +700,7 @@ bool MatrixBase::isLowerTriangular(const RealScalar& prec) const namespace internal { - + // TODO currently a triangular expression has the form TriangularView<.,.> // in the future triangular-ness should be defined by the expression traits // such that Transpose > is valid. (currently TriangularBase::transpose() is overloaded to make it work) @@ -728,7 +728,7 @@ struct Dense2Triangular {}; template struct triangular_assignment_loop; - + /** \internal Specialization of the dense assignment kernel for triangular matrices. * The main difference is that the triangular, diagonal, and opposite parts are processed through three different functions. * \tparam UpLo must be either Lower or Upper @@ -745,17 +745,17 @@ protected: using Base::m_src; using Base::m_functor; public: - + typedef typename Base::DstEvaluatorType DstEvaluatorType; typedef typename Base::SrcEvaluatorType SrcEvaluatorType; typedef typename Base::Scalar Scalar; typedef typename Base::AssignmentTraits AssignmentTraits; - - + + EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) : Base(dst, src, func, dstExpr) {} - + #ifdef EIGEN_INTERNAL_DEBUGGING EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col) { @@ -765,16 +765,16 @@ public: #else using Base::assignCoeff; #endif - + EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id) { if(Mode==UnitDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(1)); else if(Mode==ZeroDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(0)); else if(Mode==0) Base::assignCoeff(id,id); } - + EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index row, Index col) - { + { eigen_internal_assert(row!=col); if(SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(row,col), Scalar(0)); @@ -795,17 +795,17 @@ void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src, con if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) dst.resize(dstRows, dstCols); DstEvaluatorType dstEvaluator(dst); - + typedef triangular_dense_assignment_kernel< Mode&(Lower|Upper),Mode&(UnitDiag|ZeroDiag|SelfAdjoint),SetOpposite, DstEvaluatorType,SrcEvaluatorType,Functor> Kernel; Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); - + enum { unroll = DstXprType::SizeAtCompileTime != Dynamic && SrcEvaluatorType::CoeffReadCost < HugeCost && DstXprType::SizeAtCompileTime * (DstEvaluatorType::CoeffReadCost+SrcEvaluatorType::CoeffReadCost) / 2 <= EIGEN_UNROLLING_LIMIT }; - + triangular_assignment_loop::run(kernel); } @@ -827,8 +827,8 @@ struct Assignment EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { eigen_assert(int(DstXprType::Mode) == int(SrcXprType::Mode)); - - call_triangular_assignment_loop(dst, src, func); + + call_triangular_assignment_loop(dst, src, func); } }; @@ -837,7 +837,7 @@ struct Assignment { EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { - call_triangular_assignment_loop(dst, src, func); + call_triangular_assignment_loop(dst, src, func); } }; @@ -846,7 +846,7 @@ struct Assignment { EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { - call_triangular_assignment_loop(dst, src, func); + call_triangular_assignment_loop(dst, src, func); } }; @@ -857,19 +857,19 @@ struct triangular_assignment_loop // FIXME: this is not very clean, perhaps this information should be provided by the kernel? typedef typename Kernel::DstEvaluatorType DstEvaluatorType; typedef typename DstEvaluatorType::XprType DstXprType; - + enum { col = (UnrollCount-1) / DstXprType::RowsAtCompileTime, row = (UnrollCount-1) % DstXprType::RowsAtCompileTime }; - + typedef typename Kernel::Scalar Scalar; EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) { triangular_assignment_loop::run(kernel); - + if(row==col) kernel.assignDiagonalCoeff(row); else if( ((Mode&Lower) && row>col) || ((Mode&Upper) && row } else i = maxi; - + if(i * If the matrix is triangular, the opposite part is set to zero. */ template template -void TriangularBase::evalToLazy(MatrixBase &other) const +EIGEN_DEVICE_FUNC void TriangularBase::evalToLazy(MatrixBase &other) const { other.derived().resize(this->rows(), this->cols()); internal::call_triangular_assignment_loop(other.derived(), derived().nestedExpression()); } namespace internal { - + // Triangular = Product template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> struct Assignment, internal::assign_op::Scalar>, Dense2Triangular> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { Index dstRows = src.rows(); Index dstCols = src.cols(); @@ -961,7 +961,7 @@ template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> struct Assignment, internal::add_assign_op::Scalar>, Dense2Triangular> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { dst._assignProduct(src, 1, 1); } @@ -972,7 +972,7 @@ template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> struct Assignment, internal::sub_assign_op::Scalar>, Dense2Triangular> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { dst._assignProduct(src, -1, 1); } diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index 4fe267e9f..893bc796f 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -670,7 +670,7 @@ template class VectorwiseOp * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting */ template -inline typename DenseBase::ColwiseReturnType +EIGEN_DEVICE_FUNC inline typename DenseBase::ColwiseReturnType DenseBase::colwise() { return ColwiseReturnType(derived()); @@ -684,7 +684,7 @@ DenseBase::colwise() * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting */ template -inline typename DenseBase::RowwiseReturnType +EIGEN_DEVICE_FUNC inline typename DenseBase::RowwiseReturnType DenseBase::rowwise() { return RowwiseReturnType(derived()); diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index a567002ab..084533499 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -167,10 +167,10 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu(const d return make_double2(from[0], from[1]); } -template<> EIGEN_STRONG_INLINE float4 ploaddup(const float* from) { +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploaddup(const float* from) { return make_float4(from[0], from[0], from[1], from[1]); } -template<> EIGEN_STRONG_INLINE double2 ploaddup(const double* from) { +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploaddup(const double* from) { return make_double2(from[0], from[0]); } diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index 27f8f5044..daf5a24f2 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -10,7 +10,7 @@ #ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H #define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H -namespace Eigen { +namespace Eigen { template struct selfadjoint_rank1_update; @@ -27,7 +27,7 @@ namespace internal { // forward declarations (defined at the end of this file) template struct tribb_kernel; - + /* Optimized matrix-matrix product evaluating only one triangular half */ template static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta) { typedef typename MatrixType::Scalar Scalar; - + typedef typename internal::remove_all::type Lhs; typedef internal::blas_traits LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; typedef typename internal::remove_all::type _ActualLhs; typename internal::add_const_on_value_type::type actualLhs = LhsBlasTraits::extract(prod.lhs()); - + typedef typename internal::remove_all::type Rhs; typedef internal::blas_traits RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; @@ -230,18 +230,18 @@ struct general_product_to_triangular_selector UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1, UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1 }; - + internal::gemv_static_vector_if static_lhs; ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(), (UseLhsDirectly ? const_cast(actualLhs.data()) : static_lhs.data())); if(!UseLhsDirectly) Map(actualLhsPtr, actualLhs.size()) = actualLhs; - + internal::gemv_static_vector_if static_rhs; ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(), (UseRhsDirectly ? const_cast(actualRhs.data()) : static_rhs.data())); if(!UseRhsDirectly) Map(actualRhsPtr, actualRhs.size()) = actualRhs; - - + + selfadjoint_rank1_update::IsComplex, RhsBlasTraits::NeedToConjugate && NumTraits::IsComplex> @@ -259,7 +259,7 @@ struct general_product_to_triangular_selector typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; typedef typename internal::remove_all::type _ActualLhs; typename internal::add_const_on_value_type::type actualLhs = LhsBlasTraits::extract(prod.lhs()); - + typedef typename internal::remove_all::type Rhs; typedef internal::blas_traits RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; @@ -302,13 +302,13 @@ struct general_product_to_triangular_selector template template -TriangularView<_MatrixType,_Mode>& TriangularViewImpl<_MatrixType,_Mode,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta) +EIGEN_DEVICE_FUNC TriangularView<_MatrixType,_Mode>& TriangularViewImpl<_MatrixType,_Mode,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta) { EIGEN_STATIC_ASSERT((_Mode&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED); eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols()); - + general_product_to_triangular_selector<_MatrixType, ProductType, _Mode, internal::traits::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta); - + return derived(); } diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h index ef12c98f6..8fd3b5e2e 100644 --- a/Eigen/src/Core/products/SelfadjointProduct.h +++ b/Eigen/src/Core/products/SelfadjointProduct.h @@ -16,7 +16,7 @@ * It corresponds to the level 3 SYRK and level 2 SYR Blas routines. **********************************************************************/ -namespace Eigen { +namespace Eigen { template @@ -68,10 +68,10 @@ struct selfadjoint_product_selector ei_declare_aligned_stack_constructed_variable(Scalar, actualOtherPtr, other.size(), (UseOtherDirectly ? const_cast(actualOther.data()) : static_other.data())); - + if(!UseOtherDirectly) Map(actualOtherPtr, actualOther.size()) = actualOther; - + selfadjoint_rank1_update::IsComplex, (!OtherBlasTraits::NeedToConjugate) && NumTraits::IsComplex> @@ -120,7 +120,7 @@ struct selfadjoint_product_selector template template -SelfAdjointView& SelfAdjointView +EIGEN_DEVICE_FUNC SelfAdjointView& SelfAdjointView ::rankUpdate(const MatrixBase& u, const Scalar& alpha) { selfadjoint_product_selector::run(_expression().const_cast_derived(), u.derived(), alpha); diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h index 2ae364111..99a31434c 100644 --- a/Eigen/src/Core/products/SelfadjointRank2Update.h +++ b/Eigen/src/Core/products/SelfadjointRank2Update.h @@ -10,7 +10,7 @@ #ifndef EIGEN_SELFADJOINTRANK2UPTADE_H #define EIGEN_SELFADJOINTRANK2UPTADE_H -namespace Eigen { +namespace Eigen { namespace internal { @@ -57,7 +57,7 @@ template struct conj_expr_if template template -SelfAdjointView& SelfAdjointView +EIGEN_DEVICE_FUNC SelfAdjointView& SelfAdjointView ::rankUpdate(const MatrixBase& u, const MatrixBase& v, const Scalar& alpha) { typedef internal::blas_traits UBlasTraits; diff --git a/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h b/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h index e4e426071..7ba4c42e0 100644 --- a/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +++ b/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h @@ -11,7 +11,7 @@ #ifndef EIGEN_MATRIXBASEEIGENVALUES_H #define EIGEN_MATRIXBASEEIGENVALUES_H -namespace Eigen { +namespace Eigen { namespace internal { @@ -42,13 +42,13 @@ struct eigenvalues_selector } // end namespace internal -/** \brief Computes the eigenvalues of a matrix +/** \brief Computes the eigenvalues of a matrix * \returns Column vector containing the eigenvalues. * * \eigenvalues_module * This function computes the eigenvalues with the help of the EigenSolver * class (for real matrices) or the ComplexEigenSolver class (for complex - * matrices). + * matrices). * * The eigenvalues are repeated according to their algebraic multiplicity, * so there are as many eigenvalues as rows in the matrix. @@ -83,8 +83,8 @@ MatrixBase::eigenvalues() const * * \sa SelfAdjointEigenSolver::eigenvalues(), MatrixBase::eigenvalues() */ -template -inline typename SelfAdjointView::EigenvaluesReturnType +template +EIGEN_DEVICE_FUNC inline typename SelfAdjointView::EigenvaluesReturnType SelfAdjointView::eigenvalues() const { PlainObject thisAsMatrix(*this); @@ -147,7 +147,7 @@ MatrixBase::operatorNorm() const * \sa eigenvalues(), MatrixBase::operatorNorm() */ template -inline typename SelfAdjointView::RealScalar +EIGEN_DEVICE_FUNC inline typename SelfAdjointView::RealScalar SelfAdjointView::operatorNorm() const { return eigenvalues().cwiseAbs().maxCoeff(); diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index 03b6af706..896a0c60f 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -53,7 +53,7 @@ template struct traits > * Output: \verbinclude class_FullPivLU.out * * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. - * + * * \sa MatrixBase::fullPivLu(), MatrixBase::determinant(), MatrixBase::inverse() */ template class FullPivLU @@ -744,7 +744,7 @@ struct image_retval > #ifndef EIGEN_PARSED_BY_DOXYGEN template template -void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const +EIGEN_DEVICE_FUNC void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const { /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}. * So we proceed as follows: @@ -792,7 +792,7 @@ void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const template template -void FullPivLU<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const +EIGEN_DEVICE_FUNC void FullPivLU<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const { /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}, * and since permutations are real and unitary, we can write this @@ -864,7 +864,7 @@ struct Assignment >, internal::assign_ { typedef FullPivLU LuType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } diff --git a/Eigen/src/LU/InverseImpl.h b/Eigen/src/LU/InverseImpl.h index f49f23360..10f0340f3 100644 --- a/Eigen/src/LU/InverseImpl.h +++ b/Eigen/src/LU/InverseImpl.h @@ -11,7 +11,7 @@ #ifndef EIGEN_INVERSE_IMPL_H #define EIGEN_INVERSE_IMPL_H -namespace Eigen { +namespace Eigen { namespace internal { @@ -72,7 +72,7 @@ struct compute_inverse_and_det_with_check ****************************/ template -EIGEN_DEVICE_FUNC +EIGEN_DEVICE_FUNC inline void compute_inverse_size2_helper( const MatrixType& matrix, const typename ResultType::Scalar& invdet, ResultType& result) @@ -122,7 +122,7 @@ struct compute_inverse_and_det_with_check ****************************/ template -EIGEN_DEVICE_FUNC +EIGEN_DEVICE_FUNC inline typename MatrixType::Scalar cofactor_3x3(const MatrixType& m) { enum { @@ -200,7 +200,7 @@ struct compute_inverse_and_det_with_check ****************************/ template -EIGEN_DEVICE_FUNC +EIGEN_DEVICE_FUNC inline const typename Derived::Scalar general_det3_helper (const MatrixBase& matrix, int i1, int i2, int i3, int j1, int j2, int j3) { @@ -209,7 +209,7 @@ inline const typename Derived::Scalar general_det3_helper } template -EIGEN_DEVICE_FUNC +EIGEN_DEVICE_FUNC inline typename MatrixType::Scalar cofactor_4x4(const MatrixType& matrix) { enum { @@ -290,13 +290,13 @@ template struct Assignment, internal::assign_op, Dense2Dense> { typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { Index dstRows = src.rows(); Index dstCols = src.cols(); if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) dst.resize(dstRows, dstCols); - + const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime); EIGEN_ONLY_USED_FOR_DEBUG(Size); eigen_assert(( (Size<=1) || (Size>4) || (extract_data(src.nestedExpression())!=extract_data(dst))) @@ -304,14 +304,14 @@ struct Assignment, internal::assign_op::type ActualXprType; typedef typename internal::remove_all::type ActualXprTypeCleanded; - + ActualXprType actual_xpr(src.nestedExpression()); - + compute_inverse::run(actual_xpr, dst); } }; - + } // end namespace internal /** \lu_module diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index 6b10f39fa..47518d24e 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -69,7 +69,7 @@ struct enable_if_ref,Derived> { * The data of the LU decomposition can be directly accessed through the methods matrixLU(), permutationP(). * * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. - * + * * \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU */ template class PartialPivLU @@ -572,7 +572,7 @@ struct Assignment >, internal::assi { typedef PartialPivLU LuType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h index a7b47d55d..69624cc89 100644 --- a/Eigen/src/QR/ColPivHouseholderQR.h +++ b/Eigen/src/QR/ColPivHouseholderQR.h @@ -42,7 +42,7 @@ template struct traits > * numerical stability. It is slower than HouseholderQR, and faster than FullPivHouseholderQR. * * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. - * + * * \sa MatrixBase::colPivHouseholderQr() */ template class ColPivHouseholderQR @@ -582,7 +582,7 @@ void ColPivHouseholderQR::computeInPlace() #ifndef EIGEN_PARSED_BY_DOXYGEN template template -void ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const +EIGEN_DEVICE_FUNC void ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const { eigen_assert(rhs.rows() == rows()); @@ -618,7 +618,7 @@ struct Assignment >, interna { typedef ColPivHouseholderQR QrType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } diff --git a/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/Eigen/src/QR/CompleteOrthogonalDecomposition.h index 34c637b70..3403ab28e 100644 --- a/Eigen/src/QR/CompleteOrthogonalDecomposition.h +++ b/Eigen/src/QR/CompleteOrthogonalDecomposition.h @@ -41,7 +41,7 @@ struct traits > * size rank-by-rank. \b A may be rank deficient. * * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. - * + * * \sa MatrixBase::completeOrthogonalDecomposition() */ template @@ -489,7 +489,7 @@ void CompleteOrthogonalDecomposition::applyZAdjointOnTheLeftInPlace( #ifndef EIGEN_PARSED_BY_DOXYGEN template template -void CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl( +EIGEN_DEVICE_FUNC void CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl( const RhsType& rhs, DstType& dst) const { eigen_assert(rhs.rows() == this->rows()); @@ -532,7 +532,7 @@ struct Assignment CodType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.rows())); } diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h index e489bddc2..ff6bdde85 100644 --- a/Eigen/src/QR/FullPivHouseholderQR.h +++ b/Eigen/src/QR/FullPivHouseholderQR.h @@ -11,7 +11,7 @@ #ifndef EIGEN_FULLPIVOTINGHOUSEHOLDERQR_H #define EIGEN_FULLPIVOTINGHOUSEHOLDERQR_H -namespace Eigen { +namespace Eigen { namespace internal { @@ -40,18 +40,18 @@ struct traits > * \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition * * This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b P', \b Q and \b R - * such that + * such that * \f[ * \mathbf{P} \, \mathbf{A} \, \mathbf{P}' = \mathbf{Q} \, \mathbf{R} * \f] - * by using Householder transformations. Here, \b P and \b P' are permutation matrices, \b Q a unitary matrix + * by using Householder transformations. Here, \b P and \b P' are permutation matrices, \b Q a unitary matrix * and \b R an upper triangular matrix. * * This decomposition performs a very prudent full pivoting in order to be rank-revealing and achieve optimal * numerical stability. The trade-off is that it is slower than HouseholderQR and ColPivHouseholderQR. * * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. - * + * * \sa MatrixBase::fullPivHouseholderQr() */ template class FullPivHouseholderQR @@ -114,12 +114,12 @@ template class FullPivHouseholderQR * * This constructor computes the QR factorization of the matrix \a matrix by calling * the method compute(). It is a short cut for: - * + * * \code * FullPivHouseholderQR qr(matrix.rows(), matrix.cols()); * qr.compute(matrix); * \endcode - * + * * \sa compute() */ template @@ -317,9 +317,9 @@ template class FullPivHouseholderQR inline Index rows() const { return m_qr.rows(); } inline Index cols() const { return m_qr.cols(); } - + /** \returns a const reference to the vector of Householder coefficients used to represent the factor \c Q. - * + * * For advanced uses only. */ const HCoeffsType& hCoeffs() const { return m_hCoeffs; } @@ -392,7 +392,7 @@ template class FullPivHouseholderQR * diagonal coefficient of U. */ RealScalar maxPivot() const { return m_maxpivot; } - + #ifndef EIGEN_PARSED_BY_DOXYGEN template EIGEN_DEVICE_FUNC @@ -400,14 +400,14 @@ template class FullPivHouseholderQR #endif protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } - + void computeInPlace(); - + MatrixType m_qr; HCoeffsType m_hCoeffs; IntDiagSizeVectorType m_rows_transpositions; @@ -463,7 +463,7 @@ void FullPivHouseholderQR::computeInPlace() Index cols = m_qr.cols(); Index size = (std::min)(rows,cols); - + m_hCoeffs.resize(size); m_temp.resize(cols); @@ -539,7 +539,7 @@ void FullPivHouseholderQR::computeInPlace() #ifndef EIGEN_PARSED_BY_DOXYGEN template template -void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const +EIGEN_DEVICE_FUNC void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const { eigen_assert(rhs.rows() == rows()); const Index l_rank = rank(); @@ -574,14 +574,14 @@ void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType #endif namespace internal { - + template struct Assignment >, internal::assign_op::Scalar>, Dense2Dense> { typedef FullPivHouseholderQR QrType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) - { + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } }; diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h index 3513d995c..0c34053bb 100644 --- a/Eigen/src/QR/HouseholderQR.h +++ b/Eigen/src/QR/HouseholderQR.h @@ -12,7 +12,7 @@ #ifndef EIGEN_QR_H #define EIGEN_QR_H -namespace Eigen { +namespace Eigen { /** \ingroup QR_Module * @@ -24,7 +24,7 @@ namespace Eigen { * \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition * * This class performs a QR decomposition of a matrix \b A into matrices \b Q and \b R - * such that + * such that * \f[ * \mathbf{A} = \mathbf{Q} \, \mathbf{R} * \f] @@ -85,12 +85,12 @@ template class HouseholderQR * * This constructor computes the QR factorization of the matrix \a matrix by calling * the method compute(). It is a short cut for: - * + * * \code * HouseholderQR qr(matrix.rows(), matrix.cols()); * qr.compute(matrix); * \endcode - * + * * \sa compute() */ template @@ -204,13 +204,13 @@ template class HouseholderQR inline Index rows() const { return m_qr.rows(); } inline Index cols() const { return m_qr.cols(); } - + /** \returns a const reference to the vector of Householder coefficients used to represent the factor \c Q. - * + * * For advanced uses only. */ const HCoeffsType& hCoeffs() const { return m_hCoeffs; } - + #ifndef EIGEN_PARSED_BY_DOXYGEN template EIGEN_DEVICE_FUNC @@ -218,14 +218,14 @@ template class HouseholderQR #endif protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } void computeInPlace(); - + MatrixType m_qr; HCoeffsType m_hCoeffs; RowVectorType m_temp; @@ -347,7 +347,7 @@ struct householder_qr_inplace_blocked #ifndef EIGEN_PARSED_BY_DOXYGEN template template -void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const +EIGEN_DEVICE_FUNC void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const { const Index rank = (std::min)(rows(), cols()); eigen_assert(rhs.rows() == rows()); @@ -379,7 +379,7 @@ template void HouseholderQR::computeInPlace() { check_template_parameters(); - + Index rows = m_qr.rows(); Index cols = m_qr.cols(); Index size = (std::min)(rows,cols); diff --git a/Eigen/src/SVD/SVDBase.h b/Eigen/src/SVD/SVDBase.h index 53da28488..8c9b23e16 100644 --- a/Eigen/src/SVD/SVDBase.h +++ b/Eigen/src/SVD/SVDBase.h @@ -34,12 +34,12 @@ namespace Eigen { * * Singular values are always sorted in decreasing order. * - * + * * You can ask for only \em thin \a U or \a V to be computed, meaning the following. In case of a rectangular n-by-p matrix, letting \a m be the * smaller value among \a n and \a p, there are only \a m singular vectors; the remaining columns of \a U and \a V do not correspond to actual * singular vectors. Asking for \em thin \a U or \a V means asking for only their \a m first columns to be formed. So \a U is then a n-by-m matrix, * and \a V is then a p-by-m matrix. Notice that thin \a U and \a V are all you need for (least squares) solving. - * + * * If the input matrix has inf or nan coefficients, the result of the computation is undefined, but the computation is guaranteed to * terminate in finite (and reasonable) time. * \sa class BDCSVD, class JacobiSVD @@ -67,7 +67,7 @@ public: typedef Matrix MatrixUType; typedef Matrix MatrixVType; typedef typename internal::plain_diag_type::type SingularValuesType; - + Derived& derived() { return *static_cast(this); } const Derived& derived() const { return *static_cast(this); } @@ -120,7 +120,7 @@ public: eigen_assert(m_isInitialized && "SVD is not initialized."); return m_nonzeroSingularValues; } - + /** \returns the rank of the matrix of which \c *this is the SVD. * * \note This method has to determine which singular values should be considered nonzero. @@ -137,7 +137,7 @@ public: while(i>=0 && m_singularValues.coeff(i) < premultiplied_threshold) --i; return i+1; } - + /** Allows to prescribe a threshold to be used by certain methods, such as rank() and solve(), * which need to determine when singular values are to be considered nonzero. * This is not used for the SVD decomposition itself. @@ -193,7 +193,7 @@ public: inline Index rows() const { return m_rows; } inline Index cols() const { return m_cols; } - + /** \returns a (least squares) solution of \f$ A x = b \f$ using the current SVD decomposition of A. * * \param b the right-hand-side of the equation to solve. @@ -211,7 +211,7 @@ public: eigen_assert(computeU() && computeV() && "SVD::solve() requires both unitaries U and V to be computed (thin unitaries suffice)."); return Solve(derived(), b.derived()); } - + #ifndef EIGEN_PARSED_BY_DOXYGEN template EIGEN_DEVICE_FUNC @@ -219,12 +219,12 @@ public: #endif protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } - + // return true if already allocated bool allocate(Index rows, Index cols, unsigned int computationOptions) ; @@ -258,7 +258,7 @@ protected: #ifndef EIGEN_PARSED_BY_DOXYGEN template template -void SVDBase::_solve_impl(const RhsType &rhs, DstType &dst) const +EIGEN_DEVICE_FUNC void SVDBase::_solve_impl(const RhsType &rhs, DstType &dst) const { eigen_assert(rhs.rows() == rows()); diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h index 18352a847..00646a223 100644 --- a/Eigen/src/SparseCore/SparseAssign.h +++ b/Eigen/src/SparseCore/SparseAssign.h @@ -10,9 +10,9 @@ #ifndef EIGEN_SPARSEASSIGN_H #define EIGEN_SPARSEASSIGN_H -namespace Eigen { +namespace Eigen { -template +template template Derived& SparseMatrixBase::operator=(const EigenBase &other) { @@ -104,7 +104,7 @@ void assign_sparse_to_sparse(DstXprType &dst, const SrcXprType &src) enum { Flip = (DstEvaluatorType::Flags & RowMajorBit) != (SrcEvaluatorType::Flags & RowMajorBit) }; - + DstXprType temp(src.rows(), src.cols()); temp.reserve((std::max)(src.rows(),src.cols())*2); @@ -127,7 +127,7 @@ void assign_sparse_to_sparse(DstXprType &dst, const SrcXprType &src) template< typename DstXprType, typename SrcXprType, typename Functor> struct Assignment { - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { assign_sparse_to_sparse(dst.derived(), src.derived()); } @@ -137,15 +137,15 @@ struct Assignment template< typename DstXprType, typename SrcXprType, typename Functor> struct Assignment { - static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { if(internal::is_same >::value) dst.setZero(); - + internal::evaluator srcEval(src); resize_if_allowed(dst, src, func); internal::evaluator dstEval(dst); - + const Index outerEvaluationSize = (internal::evaluator::Flags&RowMajorBit) ? src.rows() : src.cols(); for (Index j=0; j::InnerIterator i(srcEval,j); i; ++i) @@ -159,7 +159,7 @@ template, internal::assign_op, Sparse2Sparse> { typedef Solve SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { Index dstRows = src.rows(); Index dstCols = src.cols(); @@ -182,7 +182,7 @@ struct Assignment typedef Array ArrayXI; typedef Array ArrayXS; template - static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static EIGEN_DEVICE_FUNC void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { Index dstRows = src.rows(); Index dstCols = src.cols(); @@ -196,16 +196,16 @@ struct Assignment Map(dst.outerIndexPtr(), size+1).setLinSpaced(0,StorageIndex(size)); Map(dst.valuePtr(), size) = src.diagonal(); } - + template static void run(SparseMatrixBase &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { dst.diagonal() = src.diagonal(); } - + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) { dst.diagonal() += src.diagonal(); } - + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) { dst.diagonal() -= src.diagonal(); } }; diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h index 4cbf68781..0e12e8fb2 100644 --- a/Eigen/src/SparseCore/SparseProduct.h +++ b/Eigen/src/SparseCore/SparseProduct.h @@ -10,7 +10,7 @@ #ifndef EIGEN_SPARSEPRODUCT_H #define EIGEN_SPARSEPRODUCT_H -namespace Eigen { +namespace Eigen { /** \returns an expression of the product of two sparse matrices. * By default a conservative product preserving the symbolic non zeros is performed. @@ -102,13 +102,13 @@ template< typename DstXprType, typename Lhs, typename Rhs> struct Assignment, internal::assign_op::Scalar>, Sparse2Dense> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { Index dstRows = src.rows(); Index dstCols = src.cols(); if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) dst.resize(dstRows, dstCols); - + generic_product_impl::evalTo(dst,src.lhs(),src.rhs()); } }; @@ -118,7 +118,7 @@ template< typename DstXprType, typename Lhs, typename Rhs> struct Assignment, internal::add_assign_op::Scalar>, Sparse2Dense> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { generic_product_impl::addTo(dst,src.lhs(),src.rhs()); } @@ -129,7 +129,7 @@ template< typename DstXprType, typename Lhs, typename Rhs> struct Assignment, internal::sub_assign_op::Scalar>, Sparse2Dense> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { generic_product_impl::subTo(dst,src.lhs(),src.rhs()); } diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h index 76117a010..f703f8cd6 100644 --- a/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -10,8 +10,8 @@ #ifndef EIGEN_SPARSE_SELFADJOINTVIEW_H #define EIGEN_SPARSE_SELFADJOINTVIEW_H -namespace Eigen { - +namespace Eigen { + /** \ingroup SparseCore_Module * \class SparseSelfAdjointView * @@ -27,7 +27,7 @@ namespace Eigen { * \sa SparseMatrixBase::selfadjointView() */ namespace internal { - + template struct traits > : traits { }; @@ -44,7 +44,7 @@ template class SparseSelfAdjointView : public EigenBase > { public: - + enum { Mode = _Mode, TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0), @@ -58,7 +58,7 @@ template class SparseSelfAdjointView typedef Matrix VectorI; typedef typename internal::ref_selector::non_const_type MatrixTypeNested; typedef typename internal::remove_all::type _MatrixTypeNested; - + explicit inline SparseSelfAdjointView(MatrixType& matrix) : m_matrix(matrix) { eigen_assert(rows()==cols() && "SelfAdjointView is only for squared matrices"); @@ -94,7 +94,7 @@ template class SparseSelfAdjointView { return Product(lhs.derived(), rhs); } - + /** Efficient sparse self-adjoint matrix times dense vector/matrix product */ template Product @@ -121,7 +121,7 @@ template class SparseSelfAdjointView */ template SparseSelfAdjointView& rankUpdate(const SparseMatrixBase& u, const Scalar& alpha = Scalar(1)); - + /** \returns an expression of P H P^-1 */ // TODO implement twists in a more evaluator friendly fashion SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode> twistedBy(const PermutationMatrix& perm) const @@ -148,7 +148,7 @@ template class SparseSelfAdjointView PermutationMatrix pnull; return *this = src.twistedBy(pnull); } - + void resize(Index rows, Index cols) { EIGEN_ONLY_USED_FOR_DEBUG(rows); @@ -156,7 +156,7 @@ template class SparseSelfAdjointView eigen_assert(rows == this->rows() && cols == this->cols() && "SparseSelfadjointView::resize() does not actually allow to resize."); } - + protected: MatrixTypeNested m_matrix; @@ -203,7 +203,7 @@ SparseSelfAdjointView::rankUpdate(const SparseMatrixBase // in the future selfadjoint-ness should be defined by the expression traits // such that Transpose > is valid. (currently TriangularBase::transpose() is overloaded to make it work) @@ -226,7 +226,7 @@ struct Assignment typedef internal::assign_op AssignOpType; template - static void run(SparseMatrix &dst, const SrcXprType &src, const AssignOpType&/*func*/) + static EIGEN_DEVICE_FUNC void run(SparseMatrix &dst, const SrcXprType &src, const AssignOpType&/*func*/) { internal::permute_symm_to_fullsymm(src.matrix(), dst); } @@ -257,7 +257,7 @@ struct Assignment run(tmp, src, AssignOpType()); dst -= tmp; } - + template static void run(DynamicSparseMatrix& dst, const SrcXprType &src, const AssignOpType&/*func*/) { @@ -280,13 +280,13 @@ template::type SparseLhsTypeNested; typedef typename internal::remove_all::type SparseLhsTypeNestedCleaned; typedef evaluator LhsEval; typedef typename LhsEval::InnerIterator LhsIterator; typedef typename SparseLhsType::Scalar LhsScalar; - + enum { LhsIsRowMajor = (LhsEval::Flags&RowMajorBit)==RowMajorBit, ProcessFirstHalf = @@ -295,7 +295,7 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons || ( (Mode&Lower) && LhsIsRowMajor), ProcessSecondHalf = !ProcessFirstHalf }; - + SparseLhsTypeNested lhs_nested(lhs); LhsEval lhsEval(lhs_nested); @@ -349,7 +349,7 @@ struct generic_product_impl::type RhsNested; LhsNested lhsNested(lhsView.matrix()); RhsNested rhsNested(rhs); - + internal::sparse_selfadjoint_time_dense_product(lhsNested, rhsNested, dst, alpha); } }; @@ -366,7 +366,7 @@ struct generic_product_impl::type RhsNested; LhsNested lhsNested(lhs); RhsNested rhsNested(rhsView.matrix()); - + // transpose everything Transpose dstT(dst); internal::sparse_selfadjoint_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha); @@ -390,7 +390,7 @@ struct product_evaluator, ProductTag, Spar ::new (static_cast(this)) Base(m_result); generic_product_impl::evalTo(m_result, m_lhs, xpr.rhs()); } - + protected: typename Rhs::PlainObject m_lhs; PlainObject m_result; @@ -410,7 +410,7 @@ struct product_evaluator, ProductTag, Spar ::new (static_cast(this)) Base(m_result); generic_product_impl::evalTo(m_result, xpr.lhs(), m_rhs); } - + protected: typename Lhs::PlainObject m_rhs; PlainObject m_result; @@ -432,13 +432,13 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix VectorI; typedef evaluator MatEval; typedef typename evaluator::InnerIterator MatIterator; - + MatEval matEval(mat); Dest& dest(_dest.derived()); enum { StorageOrderMatch = int(Dest::IsRowMajor) == int(MatrixType::IsRowMajor) }; - + Index size = mat.rows(); VectorI count; count.resize(size); @@ -465,7 +465,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix(it.index()); Index r = it.row(); Index c = it.col(); - + StorageIndex jp = perm ? perm[j] : j; StorageIndex ip = perm ? perm[i] : i; - + if(Mode==int(Upper|Lower)) { Index k = count[StorageOrderMatch ? jp : ip]++; @@ -531,7 +531,7 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrixj)) continue; - + StorageIndex ip = perm ? perm[i] : i; count[int(DstMode)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++; } @@ -555,22 +555,22 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrixj)) continue; - + StorageIndex jp = perm ? perm[j] : j; StorageIndex ip = perm? perm[i] : i; - + Index k = count[int(DstMode)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++; dest.innerIndexPtr()[k] = int(DstMode)==int(Lower) ? (std::max)(ip,jp) : (std::min)(ip,jp); - + if(!StorageOrderMatch) std::swap(ip,jp); if( ((int(DstMode)==int(Lower) && ipjp))) dest.valuePtr()[k] = numext::conj(it.value()); @@ -609,17 +609,17 @@ class SparseSymmetricPermutationProduct typedef Matrix VectorI; typedef typename MatrixType::Nested MatrixTypeNested; typedef typename internal::remove_all::type NestedExpression; - + SparseSymmetricPermutationProduct(const MatrixType& mat, const Perm& perm) : m_matrix(mat), m_perm(perm) {} - + inline Index rows() const { return m_matrix.rows(); } inline Index cols() const { return m_matrix.cols(); } - + const NestedExpression& matrix() const { return m_matrix; } const Perm& perm() const { return m_perm; } - + protected: MatrixTypeNested m_matrix; const Perm& m_perm; @@ -627,21 +627,21 @@ class SparseSymmetricPermutationProduct }; namespace internal { - + template struct Assignment, internal::assign_op, Sparse2Sparse> { typedef SparseSymmetricPermutationProduct SrcXprType; typedef typename DstXprType::StorageIndex DstIndex; template - static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &) + static EIGEN_DEVICE_FUNC void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &) { // internal::permute_symm_to_fullsymm(m_matrix,_dest,m_perm.indices().data()); SparseMatrix tmp; internal::permute_symm_to_fullsymm(src.matrix(),tmp,src.perm().indices().data()); dst = tmp; } - + template static void run(SparseSelfAdjointView& dst, const SrcXprType &src, const internal::assign_op &) { diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h index 7409fcae9..c8d703d2d 100644 --- a/Eigen/src/SparseQR/SparseQR.h +++ b/Eigen/src/SparseQR/SparseQR.h @@ -42,31 +42,31 @@ namespace internal { * \ingroup SparseQR_Module * \class SparseQR * \brief Sparse left-looking rank-revealing QR factorization - * - * This class implements a left-looking rank-revealing QR decomposition + * + * This class implements a left-looking rank-revealing QR decomposition * of sparse matrices. When a column has a norm less than a given tolerance - * it is implicitly permuted to the end. The QR factorization thus obtained is - * given by A*P = Q*R where R is upper triangular or trapezoidal. - * + * it is implicitly permuted to the end. The QR factorization thus obtained is + * given by A*P = Q*R where R is upper triangular or trapezoidal. + * * P is the column permutation which is the product of the fill-reducing and the * rank-revealing permutations. Use colsPermutation() to get it. - * - * Q is the orthogonal matrix represented as products of Householder reflectors. + * + * Q is the orthogonal matrix represented as products of Householder reflectors. * Use matrixQ() to get an expression and matrixQ().adjoint() to get the adjoint. * You can then apply it to a vector. - * + * * R is the sparse triangular or trapezoidal matrix. The later occurs when A is rank-deficient. * matrixR().topLeftCorner(rank(), rank()) always returns a triangular factor of full rank. - * + * * \tparam _MatrixType The type of the sparse matrix A, must be a column-major SparseMatrix<> - * \tparam _OrderingType The fill-reducing ordering method. See the \link OrderingMethods_Module + * \tparam _OrderingType The fill-reducing ordering method. See the \link OrderingMethods_Module * OrderingMethods \endlink module for the list of built-in and external ordering methods. - * + * * \implsparsesolverconcept * * \warning The input sparse matrix A must be in compressed mode (see SparseMatrix::makeCompressed()). * \warning For complex matrices matrixQ().transpose() will actually return the adjoint matrix. - * + * */ template class SparseQR : public SparseSolverBase > @@ -90,26 +90,26 @@ class SparseQR : public SparseSolverBase > ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; - + public: SparseQR () : m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false) { } - + /** Construct a QR factorization of the matrix \a mat. - * + * * \warning The matrix \a mat must be in compressed mode (see SparseMatrix::makeCompressed()). - * + * * \sa compute() */ explicit SparseQR(const MatrixType& mat) : m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false) { compute(mat); } - + /** Computes the QR factorization of the sparse matrix \a mat. - * + * * \warning The matrix \a mat must be in compressed mode (see SparseMatrix::makeCompressed()). - * + * * \sa analyzePattern(), factorize() */ void compute(const MatrixType& mat) @@ -119,15 +119,15 @@ class SparseQR : public SparseSolverBase > } void analyzePattern(const MatrixType& mat); void factorize(const MatrixType& mat); - - /** \returns the number of rows of the represented matrix. + + /** \returns the number of rows of the represented matrix. */ inline Index rows() const { return m_pmat.rows(); } - - /** \returns the number of columns of the represented matrix. + + /** \returns the number of columns of the represented matrix. */ inline Index cols() const { return m_pmat.cols();} - + /** \returns a const reference to the \b sparse upper triangular matrix R of the QR factorization. * \warning The entries of the returned matrix are not sorted. This means that using it in algorithms * expecting sorted entries will fail. This include random coefficient accesses (SpaseMatrix::coeff()), @@ -142,7 +142,7 @@ class SparseQR : public SparseSolverBase > * \endcode */ const QRMatrixType& matrixR() const { return m_R; } - + /** \returns the number of non linearly dependent columns as determined by the pivoting threshold. * * \sa setPivotThreshold() @@ -150,9 +150,9 @@ class SparseQR : public SparseSolverBase > Index rank() const { eigen_assert(m_isInitialized && "The factorization should be called first, use compute()"); - return m_nonzeropivots; + return m_nonzeropivots; } - + /** \returns an expression of the matrix Q as products of sparse Householder reflectors. * The common usage of this function is to apply it to a dense matrix or vector * \code @@ -171,23 +171,23 @@ class SparseQR : public SparseSolverBase > * reflectors are stored unsorted, two transpositions are needed to sort * them before performing the product. */ - SparseQRMatrixQReturnType matrixQ() const + SparseQRMatrixQReturnType matrixQ() const { return SparseQRMatrixQReturnType(*this); } - + /** \returns a const reference to the column permutation P that was applied to A such that A*P = Q*R * It is the combination of the fill-in reducing permutation and numerical column pivoting. */ const PermutationType& colsPermutation() const - { + { eigen_assert(m_isInitialized && "Decomposition is not initialized."); return m_outputPerm_c; } - + /** \returns A string describing the type of error. * This method is provided to ease debugging, not to handle errors. */ std::string lastErrorMessage() const { return m_lastError; } - + /** \internal */ template bool _solve_impl(const MatrixBase &B, MatrixBase &dest) const @@ -196,21 +196,21 @@ class SparseQR : public SparseSolverBase > eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix"); Index rank = this->rank(); - + // Compute Q^* * b; typename Dest::PlainObject y, b; y = this->matrixQ().adjoint() * B; b = y; - + // Solve with the triangular matrix R y.resize((std::max)(cols(),y.rows()),y.cols()); y.topRows(rank) = this->matrixR().topLeftCorner(rank, rank).template triangularView().solve(b.topRows(rank)); y.bottomRows(y.rows()-rank).setZero(); - + // Apply the column permutation if (m_perm_c.size()) dest = colsPermutation() * y.topRows(cols()); else dest = y.topRows(cols()); - + m_info = Success; return true; } @@ -225,13 +225,13 @@ class SparseQR : public SparseSolverBase > m_useDefaultThreshold = false; m_threshold = threshold; } - + /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. * * \sa compute() */ template - inline const Solve solve(const MatrixBase& B) const + inline const Solve solve(const MatrixBase& B) const { eigen_assert(m_isInitialized && "The factorization should be called first, use compute()"); eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix"); @@ -244,14 +244,14 @@ class SparseQR : public SparseSolverBase > eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix"); return Solve(*this, B.derived()); } - + /** \brief Reports whether previous computation was successful. * * \returns \c Success if computation was successful, * \c NumericalIssue if the QR factorization reports a numerical problem * \c InvalidInput if the input matrix is invalid * - * \sa iparm() + * \sa iparm() */ ComputationInfo info() const { @@ -270,7 +270,7 @@ class SparseQR : public SparseSolverBase > this->m_isQSorted = true; } - + protected: bool m_analysisIsok; bool m_factorizationIsok; @@ -290,18 +290,18 @@ class SparseQR : public SparseSolverBase > IndexVector m_firstRowElt; // First element in each row bool m_isQSorted; // whether Q is sorted or not bool m_isEtreeOk; // whether the elimination tree match the initial input matrix - + template friend struct SparseQR_QProduct; - + }; -/** \brief Preprocessing step of a QR factorization - * +/** \brief Preprocessing step of a QR factorization + * * \warning The matrix \a mat must be in compressed mode (see SparseMatrix::makeCompressed()). - * + * * In this step, the fill-reducing permutation is computed and applied to the columns of A * and the column elimination tree is computed as well. Only the sparsity pattern of \a mat is exploited. - * + * * \note In this step it is assumed that there is no empty row in the matrix \a mat. */ template @@ -311,26 +311,26 @@ void SparseQR::analyzePattern(const MatrixType& mat) // Copy to a column major matrix if the input is rowmajor typename internal::conditional::type matCpy(mat); // Compute the column fill reducing ordering - OrderingType ord; - ord(matCpy, m_perm_c); + OrderingType ord; + ord(matCpy, m_perm_c); Index n = mat.cols(); Index m = mat.rows(); Index diagSize = (std::min)(m,n); - + if (!m_perm_c.size()) { m_perm_c.resize(n); m_perm_c.indices().setLinSpaced(n, 0,StorageIndex(n-1)); } - + // Compute the column elimination tree of the permuted matrix m_outputPerm_c = m_perm_c.inverse(); internal::coletree(matCpy, m_etree, m_firstRowElt, m_outputPerm_c.indices().data()); m_isEtreeOk = true; - + m_R.resize(m, n); m_Q.resize(m, diagSize); - + // Allocate space for nonzero elements : rough estimation m_R.reserve(2*mat.nonZeros()); //FIXME Get a more accurate estimation through symbolic factorization with the etree m_Q.reserve(2*mat.nonZeros()); @@ -339,17 +339,17 @@ void SparseQR::analyzePattern(const MatrixType& mat) } /** \brief Performs the numerical QR factorization of the input matrix - * + * * The function SparseQR::analyzePattern(const MatrixType&) must have been called beforehand with * a matrix having the same sparsity pattern than \a mat. - * + * * \param mat The sparse column-major matrix */ template void SparseQR::factorize(const MatrixType& mat) { using std::abs; - + eigen_assert(m_analysisIsok && "analyzePattern() should be called before this step"); StorageIndex m = StorageIndex(mat.rows()); StorageIndex n = StorageIndex(mat.cols()); @@ -359,7 +359,7 @@ void SparseQR::factorize(const MatrixType& mat) Index nzcolR, nzcolQ; // Number of nonzero for the current column of R and Q ScalarVector tval(m); // The dense vector used to compute the current column RealScalar pivotThreshold = m_threshold; - + m_R.setZero(); m_Q.setZero(); m_pmat = mat; @@ -371,12 +371,12 @@ void SparseQR::factorize(const MatrixType& mat) } m_pmat.uncompress(); // To have the innerNonZeroPtr allocated - + // Apply the fill-in reducing permutation lazily: { // If the input is row major, copy the original column indices, // otherwise directly use the input matrix - // + // IndexVector originalOuterIndicesCpy; const StorageIndex *originalOuterIndices = mat.outerIndexPtr(); if(MatrixType::IsRowMajor) @@ -384,20 +384,20 @@ void SparseQR::factorize(const MatrixType& mat) originalOuterIndicesCpy = IndexVector::Map(m_pmat.outerIndexPtr(),n+1); originalOuterIndices = originalOuterIndicesCpy.data(); } - + for (int i = 0; i < n; i++) { Index p = m_perm_c.size() ? m_perm_c.indices()(i) : i; - m_pmat.outerIndexPtr()[p] = originalOuterIndices[i]; - m_pmat.innerNonZeroPtr()[p] = originalOuterIndices[i+1] - originalOuterIndices[i]; + m_pmat.outerIndexPtr()[p] = originalOuterIndices[i]; + m_pmat.innerNonZeroPtr()[p] = originalOuterIndices[i+1] - originalOuterIndices[i]; } } - + /* Compute the default threshold as in MatLab, see: * Tim Davis, "Algorithm 915, SuiteSparseQR: Multifrontal Multithreaded Rank-Revealing - * Sparse QR Factorization, ACM Trans. on Math. Soft. 38(1), 2011, Page 8:3 + * Sparse QR Factorization, ACM Trans. on Math. Soft. 38(1), 2011, Page 8:3 */ - if(m_useDefaultThreshold) + if(m_useDefaultThreshold) { RealScalar max2Norm = 0.0; for (int j = 0; j < n; j++) max2Norm = numext::maxi(max2Norm, m_pmat.col(j).norm()); @@ -405,10 +405,10 @@ void SparseQR::factorize(const MatrixType& mat) max2Norm = RealScalar(1); pivotThreshold = 20 * (m + n) * max2Norm * NumTraits::epsilon(); } - + // Initialize the numerical permutation m_pivotperm.setIdentity(n); - + StorageIndex nonzeroCol = 0; // Record the number of valid pivots m_Q.startVec(0); @@ -421,8 +421,8 @@ void SparseQR::factorize(const MatrixType& mat) Qidx(0) = nonzeroCol; nzcolR = 0; nzcolQ = 1; bool found_diag = nonzeroCol>=m; - tval.setZero(); - + tval.setZero(); + // Symbolic factorization: find the nonzero locations of the column k of the factors R and Q, i.e., // all the nodes (with indexes lower than rank) reachable through the column elimination tree (etree) rooted at node k. // Note: if the diagonal entry does not exist, then its contribution must be explicitly added, @@ -432,7 +432,7 @@ void SparseQR::factorize(const MatrixType& mat) StorageIndex curIdx = nonzeroCol; if(itp) curIdx = StorageIndex(itp.row()); if(curIdx == nonzeroCol) found_diag = true; - + // Get the nonzeros indexes of the current column of R StorageIndex st = m_firstRowElt(curIdx); // The traversal of the etree starts here if (st < 0 ) @@ -442,7 +442,7 @@ void SparseQR::factorize(const MatrixType& mat) return; } - // Traverse the etree + // Traverse the etree Index bi = nzcolR; for (; mark(st) != col; st = m_etree(st)) { @@ -454,13 +454,13 @@ void SparseQR::factorize(const MatrixType& mat) // Reverse the list to get the topological ordering Index nt = nzcolR-bi; for(Index i = 0; i < nt/2; i++) std::swap(Ridx(bi+i), Ridx(nzcolR-i-1)); - + // Copy the current (curIdx,pcol) value of the input matrix if(itp) tval(curIdx) = itp.value(); else tval(curIdx) = Scalar(0); - + // Compute the pattern of Q(:,k) - if(curIdx > nonzeroCol && mark(curIdx) != col ) + if(curIdx > nonzeroCol && mark(curIdx) != col ) { Qidx(nzcolQ) = curIdx; // Add this row to the pattern of Q, mark(curIdx) = col; // and mark it as visited @@ -472,15 +472,15 @@ void SparseQR::factorize(const MatrixType& mat) for (Index i = nzcolR-1; i >= 0; i--) { Index curIdx = Ridx(i); - + // Apply the curIdx-th householder vector to the current column (temporarily stored into tval) Scalar tdot(0); - + // First compute q' * tval tdot = m_Q.col(curIdx).dot(tval); tdot *= m_hcoeffs(curIdx); - + // Then update tval = tval - q * tau // FIXME: tval -= tdot * m_Q.col(curIdx) should amount to the same (need to check/add support for efficient "dense ?= sparse") for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq) @@ -500,16 +500,16 @@ void SparseQR::factorize(const MatrixType& mat) } } } // End update current column - + Scalar tau = RealScalar(0); RealScalar beta = 0; - + if(nonzeroCol < diagSize) { // Compute the Householder reflection that eliminate the current column // FIXME this step should call the Householder module. Scalar c0 = nzcolQ ? tval(Qidx(0)) : Scalar(0); - + // First, the squared norm of Q((col+1):m, col) RealScalar sqrNorm = 0.; for (Index itq = 1; itq < nzcolQ; ++itq) sqrNorm += numext::abs2(tval(Qidx(itq))); @@ -528,7 +528,7 @@ void SparseQR::factorize(const MatrixType& mat) for (Index itq = 1; itq < nzcolQ; ++itq) tval(Qidx(itq)) /= (c0 - beta); tau = numext::conj((beta-c0) / beta); - + } } @@ -536,7 +536,7 @@ void SparseQR::factorize(const MatrixType& mat) for (Index i = nzcolR-1; i >= 0; i--) { Index curIdx = Ridx(i); - if(curIdx < nonzeroCol) + if(curIdx < nonzeroCol) { m_R.insertBackByOuterInnerUnordered(col, curIdx) = tval(curIdx); tval(curIdx) = Scalar(0.); @@ -562,17 +562,17 @@ void SparseQR::factorize(const MatrixType& mat) else { // Zero pivot found: move implicitly this column to the end - for (Index j = nonzeroCol; j < n-1; j++) + for (Index j = nonzeroCol; j < n-1; j++) std::swap(m_pivotperm.indices()(j), m_pivotperm.indices()[j+1]); - + // Recompute the column elimination tree internal::coletree(m_pmat, m_etree, m_firstRowElt, m_pivotperm.indices().data()); m_isEtreeOk = false; } } - + m_hcoeffs.tail(diagSize-nonzeroCol).setZero(); - + // Finalize the column pointers of the sparse matrices R and Q m_Q.finalize(); m_Q.makeCompressed(); @@ -581,18 +581,18 @@ void SparseQR::factorize(const MatrixType& mat) m_isQSorted = false; m_nonzeropivots = nonzeroCol; - + if(nonzeroCol void evalTo(DesType& res) const @@ -651,7 +651,7 @@ struct SparseQR_QProduct : ReturnByValue struct SparseQRMatrixQReturnType : public EigenBase > -{ +{ typedef typename SparseQRType::Scalar Scalar; typedef Matrix DenseMatrix; enum { @@ -701,7 +701,7 @@ struct SparseQRMatrixQTransposeReturnType }; namespace internal { - + template struct evaluator_traits > { @@ -716,7 +716,7 @@ struct Assignment, internal: typedef SparseQRMatrixQReturnType SrcXprType; typedef typename DstXprType::Scalar Scalar; typedef typename DstXprType::StorageIndex StorageIndex; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { typename DstXprType::PlainObject idMat(src.rows(), src.cols()); idMat.setIdentity(); @@ -732,7 +732,7 @@ struct Assignment, internal: typedef SparseQRMatrixQReturnType SrcXprType; typedef typename DstXprType::Scalar Scalar; typedef typename DstXprType::StorageIndex StorageIndex; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static EIGEN_DEVICE_FUNC void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { dst = src.m_qr.matrixQ() * DstXprType::Identity(src.m_qr.rows(), src.m_qr.rows()); } diff --git a/cmake/EigenConfigureTesting.cmake b/cmake/EigenConfigureTesting.cmake index 3a824397f..634512a40 100644 --- a/cmake/EigenConfigureTesting.cmake +++ b/cmake/EigenConfigureTesting.cmake @@ -1,7 +1,7 @@ include(EigenTesting) include(CheckCXXSourceCompiles) -# configure the "site" and "buildname" +# configure the "site" and "buildname" ei_set_sitename() # retrieve and store the build string @@ -11,6 +11,15 @@ add_custom_target(buildtests) add_custom_target(check COMMAND "ctest") add_dependencies(check buildtests) +# Convenience target for only building GPU tests. +add_custom_target(buildtests_gpu) +add_custom_target(check_gpu COMMAND "ctest" "--output-on-failure" + "--no-compress-output" + "--build-no-clean" + "-T" "test" + "-L" "gpu") +add_dependencies(check_gpu buildtests_gpu) + # check whether /bin/bash exists (disabled as not used anymore) # find_file(EIGEN_BIN_BASH_EXISTS "/bin/bash" PATHS "/" NO_DEFAULT_PATH) @@ -50,7 +59,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) set(CTEST_CUSTOM_COVERAGE_EXCLUDE "/test/") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_FLAGS}") endif(EIGEN_COVERAGE_TESTING) - + elseif(MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS /D_SCL_SECURE_NO_WARNINGS") endif(CMAKE_COMPILER_IS_GNUCXX) diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index d7056a6ac..a9696abe7 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -18,7 +18,9 @@ macro(ei_add_test_internal testname testname_with_suffix) set(filename ${testname}.cpp) endif() + set(is_gpu_test OFF) if(EIGEN_ADD_TEST_FILENAME_EXTENSION STREQUAL cu) + set(is_gpu_test ON) if(EIGEN_TEST_CUDA_CLANG) set_source_files_properties(${filename} PROPERTIES LANGUAGE CXX) if(CUDA_64_BIT_DEVICE_CODE) @@ -48,6 +50,9 @@ macro(ei_add_test_internal testname testname_with_suffix) else() add_dependencies(buildtests ${targetname}) endif() + if (is_gpu_test) + add_dependencies(buildtests_gpu ${targetname}) + endif() if(EIGEN_NO_ASSERTION_CHECKING) ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_NO_ASSERTION_CHECKING=1") @@ -98,6 +103,10 @@ macro(ei_add_test_internal testname testname_with_suffix) endif() add_test(${testname_with_suffix} "${targetname}") + if (is_gpu_test) + # Add gpu tag for testing only GPU tests. + set_property(TEST ${testname_with_suffix} APPEND PROPERTY LABELS "gpu") + endif() # Specify target and test labels accoirding to EIGEN_CURRENT_SUBPROJECT get_property(current_subproject GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 50a2db708..81d79f949 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -362,14 +362,35 @@ if(EIGEN_TEST_CUDA) find_package(CUDA 5.0) if(CUDA_FOUND) - set(CUDA_PROPAGATE_HOST_FLAGS OFF) - if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE) + if( (NOT EIGEN_TEST_CXX11) OR (CMAKE_VERSION VERSION_LESS 3.3)) + string(APPEND EIGEN_CUDA_CXX11_FLAGS " -std=c++11") endif() + if(EIGEN_TEST_CUDA_CLANG) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_30") + string(APPEND CMAKE_CXX_FLAGS " --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}") + foreach(GPU IN LISTS EIGEN_CUDA_COMPUTE_ARCH) + string(APPEND CMAKE_CXX_FLAGS " --cuda-gpu-arch=sm_${GPU}") + endforeach() + string(APPEND CMAKE_CXX_FLAGS " ${EIGEN_CUDA_CXX_FLAGS}") + else() + set(CUDA_PROPAGATE_HOST_FLAGS OFF) + set(NVCC_ARCH_FLAGS) + # Define an -arch=sm_, otherwise if GPU does not exactly match one of + # those in the arch list for -gencode, the kernels will fail to run with + # cudaErrorNoKernelImageForDevice + # This can happen with newer cards (e.g. sm_75) and compiling with older + # versions of nvcc (e.g. 9.2) that do not support their specific arch. + list(LENGTH EIGEN_CUDA_COMPUTE_ARCH EIGEN_CUDA_COMPUTE_ARCH_SIZE) + if(EIGEN_CUDA_COMPUTE_ARCH_SIZE) + list(GET EIGEN_CUDA_COMPUTE_ARCH 0 EIGEN_CUDA_COMPUTE_DEFAULT) + set(NVCC_ARCH_FLAGS " -arch=sm_${EIGEN_CUDA_COMPUTE_DEFAULT}") + endif() + foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH) + string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}") + endforeach() + set(CUDA_NVCC_FLAGS "--expt-relaxed-constexpr -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS} ${EIGEN_CUDA_CXX_FLAGS}") + cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include") endif() - cuda_include_directories(${CMAKE_CURRENT_BINARY_DIR}) set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") ei_add_test(cuda_basic) diff --git a/test/cuda_common.h b/test/cuda_common.h index 9737693ac..895911128 100644 --- a/test/cuda_common.h +++ b/test/cuda_common.h @@ -37,26 +37,26 @@ void run_on_cuda(const Kernel& ker, int n, const Input& in, Output& out) typename Output::Scalar* d_out; std::ptrdiff_t in_bytes = in.size() * sizeof(typename Input::Scalar); std::ptrdiff_t out_bytes = out.size() * sizeof(typename Output::Scalar); - + cudaMalloc((void**)(&d_in), in_bytes); cudaMalloc((void**)(&d_out), out_bytes); - + cudaMemcpy(d_in, in.data(), in_bytes, cudaMemcpyHostToDevice); cudaMemcpy(d_out, out.data(), out_bytes, cudaMemcpyHostToDevice); - + // Simple and non-optimal 1D mapping assuming n is not too large // That's only for unit testing! dim3 Blocks(128); dim3 Grids( (n+int(Blocks.x)-1)/int(Blocks.x) ); - cudaThreadSynchronize(); + cudaDeviceSynchronize(); run_on_cuda_meta_kernel<<>>(ker, n, d_in, d_out); - cudaThreadSynchronize(); - + cudaDeviceSynchronize(); + // check inputs have not been modified cudaMemcpy(const_cast(in.data()), d_in, in_bytes, cudaMemcpyDeviceToHost); cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost); - + cudaFree(d_in); cudaFree(d_out); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 172a8b5ca..a96e59f4c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -678,15 +678,15 @@ struct TensorEvaluator, template friend struct internal::FullReducerShard; #endif #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) - template friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); + template friend __global__ void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); #ifdef EIGEN_HAS_CUDA_FP16 - template friend void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*); - template friend void internal::FullReductionKernelHalfFloat(R, const S, I, half*, half2*); - template friend void internal::InnerReductionKernelHalfFloat(R, const S, I, I, half*); + template friend __global__ void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*); + template friend __global__ void internal::FullReductionKernelHalfFloat(R, const S, I, half*, half2*); + template friend __global__ void internal::InnerReductionKernelHalfFloat(R, const S, I, I, half*); #endif - template friend void internal::InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); + template friend __global__ void internal::InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); - template friend void internal::OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); + template friend __global__ void internal::OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); #endif template friend struct internal::InnerReducer; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 65638b6a8..62c4a766d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -168,7 +168,12 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num #pragma unroll for (int offset = warpSize/2; offset > 0; offset /= 2) { +#if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000 + reducer.reduce(__shfl_down(accum, offset, warpSize), &accum); + #else + reducer.reduce(__shfl_down_sync(0xFFFFFFFF, accum, offset, warpSize), &accum); +#endif } if ((threadIdx.x & (warpSize - 1)) == 0) { @@ -244,7 +249,11 @@ __global__ void FullReductionKernelHalfFloat(Reducer reducer, const Self input, #pragma unroll for (int offset = warpSize/2; offset > 0; offset /= 2) { +#if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000 reducer.reducePacket(__shfl_down(accum, offset, warpSize), &accum); +#else + reducer.reducePacket(__shfl_down_sync(0xFFFFFFFF, accum, offset, warpSize), &accum); +#endif } if ((threadIdx.x & (warpSize - 1)) == 0) { @@ -426,7 +435,11 @@ __global__ void InnerReductionKernel(Reducer reducer, const Self input, Index nu #pragma unroll for (int offset = warpSize/2; offset > 0; offset /= 2) { +#if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000 reducer.reduce(__shfl_down(reduced_val, offset), &reduced_val); +#else + reducer.reduce(__shfl_down_sync(0xFFFFFFFF, reduced_val, offset), &reduced_val); +#endif } if ((threadIdx.x & (warpSize - 1)) == 0) { @@ -516,8 +529,15 @@ __global__ void InnerReductionKernelHalfFloat(Reducer reducer, const Self input, #pragma unroll for (int offset = warpSize/2; offset > 0; offset /= 2) { +#if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000 + reducer.reducePacket(__shfl_down(reduced_val1, offset, warpSize), &reduced_val1); reducer.reducePacket(__shfl_down(reduced_val2, offset, warpSize), &reduced_val2); +#else + reducer.reducePacket(__shfl_down_sync(0xFFFFFFFF, reduced_val1, offset, warpSize), &reduced_val1); + reducer.reducePacket(__shfl_down_sync(0xFFFFFFFF, reduced_val2, offset, warpSize), &reduced_val2); + +#endif } half val1 = __low2half(reduced_val1); diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h index 46d60d323..0e57bf7f4 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h @@ -15,27 +15,27 @@ namespace Eigen { namespace internal { /** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); } /** \internal \returns the derivative of lgamma, psi(\a a) (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); } /** \internal \returns the zeta function of two arguments (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta(x, q); } /** \internal \returns the polygamma function (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); } /** \internal \returns the erf(\a a) (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet perf(const Packet& a) { using numext::erf; return erf(a); } /** \internal \returns the erfc(\a a) (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } /** \internal \returns the incomplete gamma function igamma(\a a, \a x) */ diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index aa86aa0b1..17f9f61bc 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -216,17 +216,14 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS}) if( (NOT EIGEN_TEST_CXX11) OR (CMAKE_VERSION VERSION_LESS 3.3)) - set(EIGEN_CUDA_CXX11_FLAG "-std=c++11") - else() - # otherwise the flag has already been added because of the above set(CMAKE_CXX_STANDARD 11) - set(EIGEN_CUDA_CXX11_FLAG "") + string(APPEND EIGEN_CUDA_CXX11_FLAGS " -std=c++11") endif() if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE) endif() if(EIGEN_TEST_CUDA_CLANG) - string(APPEND CMAKE_CXX_FLAGS " --cuda-path=${CUDA_TOOLKIT_ROOT_DIR} ${EIGEN_CUDA_CXX11_FLAG}") + string(APPEND CMAKE_CXX_FLAGS " --cuda-path=${CUDA_TOOLKIT_ROOT_DIR} ${EIGEN_CUDA_CXX11_FLAGS}") foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH) string(APPEND CMAKE_CXX_FLAGS " --cuda-gpu-arch=sm_${ARCH}") endforeach() @@ -246,7 +243,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH) string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}") endforeach() - set(CUDA_NVCC_FLAGS "--expt-relaxed-constexpr -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS} ${EIGEN_CUDA_CXX11_FLAG}") + set(CUDA_NVCC_FLAGS "--expt-relaxed-constexpr -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS} ${EIGEN_CUDA_CXX11_FLAGS}") cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include") endif()