diff --git a/.hgignore b/.hgignore index e33ba2e9d..769a47f1f 100644 --- a/.hgignore +++ b/.hgignore @@ -30,3 +30,5 @@ log patch a a.* +lapack/testing +lapack/reference diff --git a/CMakeLists.txt b/CMakeLists.txt index 96d6c8701..00287c9bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ project(Eigen) -cmake_minimum_required(VERSION 2.8.2) +cmake_minimum_required(VERSION 2.8.4) # guard against in-source builds @@ -219,12 +219,26 @@ if(NOT MSVC) message(STATUS "Enabling AltiVec in tests/examples") endif() + option(EIGEN_TEST_VSX "Enable/Disable VSX in tests/examples" OFF) + if(EIGEN_TEST_VSX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -mvsx") + message(STATUS "Enabling VSX in tests/examples") + endif() + option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF) if(EIGEN_TEST_NEON) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a8") message(STATUS "Enabling NEON in tests/examples") endif() + option(EIGEN_TEST_NEON64 "Enable/Disable Neon in tests/examples" OFF) + if(EIGEN_TEST_NEON64) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + message(STATUS "Enabling NEON in tests/examples") + endif() + + + check_cxx_compiler_flag("-fopenmp" COMPILER_SUPPORT_OPENMP) if(COMPILER_SUPPORT_OPENMP) option(EIGEN_TEST_OPENMP "Enable/Disable OpenMP in tests/examples" OFF) @@ -434,6 +448,7 @@ if(cmake_generator_tolower MATCHES "makefile") message(STATUS "make check | Build and run the unit-tests. Read this page:") message(STATUS " | http://eigen.tuxfamily.org/index.php?title=Tests") message(STATUS "make blas | Build BLAS library (not the same thing as Eigen)") + message(STATUS "make uninstall| Removes files installed by make install") message(STATUS "--------------+--------------------------------------------------------------") else() message(STATUS "To build/run the unit tests, read this page:") @@ -469,3 +484,7 @@ install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake DESTINATION ${EIGEN_CONFIG_CMAKE_PATH} ) + +# Add uninstall target +add_custom_target ( uninstall + COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/EigenUninstall.cmake) diff --git a/Eigen/Cholesky b/Eigen/Cholesky index 7314d326c..dd0ca911c 100644 --- a/Eigen/Cholesky +++ b/Eigen/Cholesky @@ -21,7 +21,6 @@ * \endcode */ -#include "src/misc/Solve.h" #include "src/Cholesky/LLT.h" #include "src/Cholesky/LDLT.h" #ifdef EIGEN_USE_LAPACKE diff --git a/Eigen/CholmodSupport b/Eigen/CholmodSupport index 745b884e7..687cd9777 100644 --- a/Eigen/CholmodSupport +++ b/Eigen/CholmodSupport @@ -33,12 +33,8 @@ extern "C" { * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "src/CholmodSupport/CholmodSupport.h" - #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_CHOLMODSUPPORT_MODULE_H diff --git a/Eigen/Core b/Eigen/Core index acdeca5f4..0263caf20 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -57,7 +57,7 @@ // Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3) // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details. -#if defined(__MINGW32__) && EIGEN_GNUC_AT_LEAST(4,6) +#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6) #pragma GCC optimize ("-fno-ipa-cp-clone") #endif @@ -75,18 +75,18 @@ #endif #endif -#ifdef _MSC_VER +#if EIGEN_COMP_MSVC #include // for _aligned_malloc -- need it regardless of whether vectorization is enabled - #if (_MSC_VER >= 1500) // 2008 or later + #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later // Remember that usage of defined() in a #define is undefined by the standard. // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP. - #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(_M_X64) + #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64 #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER #endif #endif #else // Remember that usage of defined() in a #define is undefined by the standard - #if (defined __SSE2__) && ( (!defined __GNUC__) || (defined __INTEL_COMPILER) || EIGEN_GNUC_AT_LEAST(4,2) ) + #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) ) #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC #endif #endif @@ -125,9 +125,7 @@ #define EIGEN_VECTORIZE_SSE4_1 #define EIGEN_VECTORIZE_SSE4_2 #endif - #ifdef __FMA__ - #define EIGEN_VECTORIZE_FMA - #endif + // include files // This extern "C" works around a MINGW-w64 compilation issue @@ -140,7 +138,7 @@ extern "C" { // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly. // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus: - #if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1110 + #if EIGEN_COMP_ICC >= 1110 #include #else #include @@ -162,6 +160,15 @@ #endif #endif } // end extern "C" + #elif defined __VSX__ + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_VSX + #include + // We need to #undef all these ugly tokens defined in + // => use __vector instead of vector + #undef bool + #undef vector + #undef pixel #elif defined __ALTIVEC__ #define EIGEN_VECTORIZE #define EIGEN_VECTORIZE_ALTIVEC @@ -171,7 +178,7 @@ #undef bool #undef vector #undef pixel - #elif defined __ARM_NEON__ + #elif defined __ARM_NEON #define EIGEN_VECTORIZE #define EIGEN_VECTORIZE_NEON #include @@ -192,7 +199,7 @@ #endif // MSVC for windows mobile does not have the errno.h file -#if !(defined(_MSC_VER) && defined(_WIN32_WCE)) && !defined(__ARMCC_VERSION) +#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM #define EIGEN_HAS_ERRNO #endif @@ -218,7 +225,7 @@ #endif // required for __cpuid, needs to be included after cmath -#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64)) && (!defined(_WIN32_WCE)) +#if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE #include #endif @@ -240,6 +247,8 @@ inline static const char *SimdInstructionSetsInUse(void) { return "SSE, SSE2"; #elif defined(EIGEN_VECTORIZE_ALTIVEC) return "AltiVec"; +#elif defined(EIGEN_VECTORIZE_VSX) + return "VSX"; #elif defined(EIGEN_VECTORIZE_NEON) return "ARM NEON"; #else @@ -271,8 +280,8 @@ using std::ptrdiff_t; */ #include "src/Core/util/Constants.h" -#include "src/Core/util/ForwardDeclarations.h" #include "src/Core/util/Meta.h" +#include "src/Core/util/ForwardDeclarations.h" #include "src/Core/util/StaticAssert.h" #include "src/Core/util/XprHelper.h" #include "src/Core/util/Memory.h" @@ -291,7 +300,7 @@ using std::ptrdiff_t; #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/MathFunctions.h" #include "src/Core/arch/SSE/Complex.h" -#elif defined EIGEN_VECTORIZE_ALTIVEC +#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) #include "src/Core/arch/AltiVec/PacketMath.h" #include "src/Core/arch/AltiVec/Complex.h" #elif defined EIGEN_VECTORIZE_NEON @@ -310,19 +319,16 @@ using std::ptrdiff_t; #include "src/Core/functors/UnaryFunctors.h" #include "src/Core/functors/NullaryFunctors.h" #include "src/Core/functors/StlFunctors.h" +#include "src/Core/functors/AssignmentFunctors.h" #include "src/Core/DenseCoeffsBase.h" #include "src/Core/DenseBase.h" #include "src/Core/MatrixBase.h" #include "src/Core/EigenBase.h" -#ifdef EIGEN_ENABLE_EVALUATORS -#include "src/Core/functors/AssignmentFunctors.h" #include "src/Core/Product.h" #include "src/Core/CoreEvaluators.h" #include "src/Core/AssignEvaluator.h" -#include "src/Core/ProductEvaluators.h" -#endif #ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874 // at least confirmed with Doxygen 1.5.5 and 1.5.6 @@ -333,7 +339,10 @@ using std::ptrdiff_t; #include "src/Core/util/BlasUtil.h" #include "src/Core/DenseStorage.h" #include "src/Core/NestByValue.h" -#include "src/Core/ForceAlignedAccess.h" + +// #include "src/Core/ForceAlignedAccess.h" +// #include "src/Core/Flagged.h" + #include "src/Core/ReturnByValue.h" #include "src/Core/NoAlias.h" #include "src/Core/PlainObjectBase.h" @@ -346,12 +355,12 @@ using std::ptrdiff_t; #include "src/Core/SelfCwiseBinaryOp.h" #include "src/Core/Dot.h" #include "src/Core/StableNorm.h" -#include "src/Core/MapBase.h" #include "src/Core/Stride.h" +#include "src/Core/MapBase.h" #include "src/Core/Map.h" +#include "src/Core/Ref.h" #include "src/Core/Block.h" #include "src/Core/VectorBlock.h" -#include "src/Core/Ref.h" #include "src/Core/Transpose.h" #include "src/Core/DiagonalMatrix.h" #include "src/Core/Diagonal.h" @@ -364,14 +373,15 @@ using std::ptrdiff_t; #include "src/Core/IO.h" #include "src/Core/Swap.h" #include "src/Core/CommaInitializer.h" -#include "src/Core/Flagged.h" #include "src/Core/ProductBase.h" #include "src/Core/GeneralProduct.h" +#include "src/Core/Solve.h" +#include "src/Core/Inverse.h" #include "src/Core/TriangularMatrix.h" #include "src/Core/SelfAdjointView.h" #include "src/Core/products/GeneralBlockPanelKernel.h" #include "src/Core/products/Parallelizer.h" -#include "src/Core/products/CoeffBasedProduct.h" +#include "src/Core/ProductEvaluators.h" #include "src/Core/products/GeneralMatrixVector.h" #include "src/Core/products/GeneralMatrixMatrix.h" #include "src/Core/SolveTriangular.h" diff --git a/Eigen/IterativeLinearSolvers b/Eigen/IterativeLinearSolvers index 0f4159dc1..c06668bd2 100644 --- a/Eigen/IterativeLinearSolvers +++ b/Eigen/IterativeLinearSolvers @@ -26,9 +26,7 @@ * \endcode */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - +#include "src/IterativeLinearSolvers/SolveWithGuess.h" #include "src/IterativeLinearSolvers/IterativeSolverBase.h" #include "src/IterativeLinearSolvers/BasicPreconditioners.h" #include "src/IterativeLinearSolvers/ConjugateGradient.h" diff --git a/Eigen/LU b/Eigen/LU index 29a98cb9a..132ecc42c 100644 --- a/Eigen/LU +++ b/Eigen/LU @@ -16,7 +16,6 @@ * \endcode */ -#include "src/misc/Solve.h" #include "src/misc/Kernel.h" #include "src/misc/Image.h" #include "src/LU/FullPivLU.h" @@ -25,7 +24,7 @@ #include "src/LU/PartialPivLU_MKL.h" #endif #include "src/LU/Determinant.h" -#include "src/LU/Inverse.h" +#include "src/LU/InverseImpl.h" // Use the SSE optimized version whenever possible. At the moment the // SSE version doesn't compile when AVX is enabled diff --git a/Eigen/PaStiXSupport b/Eigen/PaStiXSupport index 7c616ee5e..e7d275f97 100644 --- a/Eigen/PaStiXSupport +++ b/Eigen/PaStiXSupport @@ -35,12 +35,8 @@ extern "C" { * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "src/PaStiXSupport/PaStiXSupport.h" - #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_PASTIXSUPPORT_MODULE_H diff --git a/Eigen/QR b/Eigen/QR index 4c2533610..230cb079a 100644 --- a/Eigen/QR +++ b/Eigen/QR @@ -24,7 +24,6 @@ * \endcode */ -#include "src/misc/Solve.h" #include "src/QR/HouseholderQR.h" #include "src/QR/FullPivHouseholderQR.h" #include "src/QR/ColPivHouseholderQR.h" diff --git a/Eigen/SPQRSupport b/Eigen/SPQRSupport index 77016442e..e3f49bb5a 100644 --- a/Eigen/SPQRSupport +++ b/Eigen/SPQRSupport @@ -21,8 +21,6 @@ * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" #include "src/CholmodSupport/CholmodSupport.h" #include "src/SPQRSupport/SuiteSparseQRSupport.h" diff --git a/Eigen/SVD b/Eigen/SVD index c3d24286c..dbd37b17a 100644 --- a/Eigen/SVD +++ b/Eigen/SVD @@ -12,21 +12,25 @@ * * * This module provides SVD decomposition for matrices (both real and complex). - * This decomposition is accessible via the following MatrixBase method: + * Two decomposition algorithms are provided: + * - JacobiSVD implementing two-sided Jacobi iterations is numerically very accurate, fast for small matrices, but very slow for larger ones. + * - BDCSVD implementing a recursive divide & conquer strategy on top of an upper-bidiagonalization which remains fast for large problems. + * These decompositions are accessible via the respective classes and following MatrixBase methods: * - MatrixBase::jacobiSvd() + * - MatrixBase::bdcSvd() * * \code * #include * \endcode */ -#include "src/misc/Solve.h" +#include "src/SVD/UpperBidiagonalization.h" #include "src/SVD/SVDBase.h" #include "src/SVD/JacobiSVD.h" +#include "src/SVD/BDCSVD.h" #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT) #include "src/SVD/JacobiSVD_MKL.h" #endif -#include "src/SVD/UpperBidiagonalization.h" #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/Eigen/SparseCholesky b/Eigen/SparseCholesky index 9f5056aa1..b6a320c40 100644 --- a/Eigen/SparseCholesky +++ b/Eigen/SparseCholesky @@ -34,8 +34,6 @@ #error The SparseCholesky module has nothing to offer in MPL2 only mode #endif -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" #include "src/SparseCholesky/SimplicialCholesky.h" #ifndef EIGEN_MPL2_ONLY diff --git a/Eigen/SparseCore b/Eigen/SparseCore index 9b5be5e15..d5c0f6271 100644 --- a/Eigen/SparseCore +++ b/Eigen/SparseCore @@ -26,37 +26,32 @@ * This module depends on: Core. */ -namespace Eigen { - -/** The type used to identify a general sparse storage. */ -struct Sparse {}; - -} - #include "src/SparseCore/SparseUtil.h" #include "src/SparseCore/SparseMatrixBase.h" +#include "src/SparseCore/SparseAssign.h" #include "src/SparseCore/CompressedStorage.h" #include "src/SparseCore/AmbiVector.h" #include "src/SparseCore/SparseMatrix.h" #include "src/SparseCore/MappedSparseMatrix.h" #include "src/SparseCore/SparseVector.h" -#include "src/SparseCore/SparseBlock.h" -#include "src/SparseCore/SparseTranspose.h" #include "src/SparseCore/SparseCwiseUnaryOp.h" #include "src/SparseCore/SparseCwiseBinaryOp.h" +#include "src/SparseCore/SparseTranspose.h" +#include "src/SparseCore/SparseBlock.h" #include "src/SparseCore/SparseDot.h" -#include "src/SparseCore/SparsePermutation.h" #include "src/SparseCore/SparseRedux.h" -#include "src/SparseCore/SparseFuzzy.h" +#include "src/SparseCore/SparseView.h" +#include "src/SparseCore/SparseDiagonalProduct.h" #include "src/SparseCore/ConservativeSparseSparseProduct.h" #include "src/SparseCore/SparseSparseProductWithPruning.h" #include "src/SparseCore/SparseProduct.h" #include "src/SparseCore/SparseDenseProduct.h" -#include "src/SparseCore/SparseDiagonalProduct.h" -#include "src/SparseCore/SparseTriangularView.h" #include "src/SparseCore/SparseSelfAdjointView.h" +#include "src/SparseCore/SparseTriangularView.h" #include "src/SparseCore/TriangularSolver.h" -#include "src/SparseCore/SparseView.h" +#include "src/SparseCore/SparsePermutation.h" +#include "src/SparseCore/SparseFuzzy.h" +#include "src/SparseCore/SparseSolverBase.h" #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/Eigen/SparseLU b/Eigen/SparseLU index 8527a49bd..38b38b531 100644 --- a/Eigen/SparseLU +++ b/Eigen/SparseLU @@ -20,9 +20,6 @@ * Please, see the documentation of the SparseLU class for more details. */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - // Ordering interface #include "OrderingMethods" diff --git a/Eigen/SparseQR b/Eigen/SparseQR index 4ee42065e..efb2695ba 100644 --- a/Eigen/SparseQR +++ b/Eigen/SparseQR @@ -21,9 +21,6 @@ * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "OrderingMethods" #include "src/SparseCore/SparseColEtree.h" #include "src/SparseQR/SparseQR.h" diff --git a/Eigen/StdDeque b/Eigen/StdDeque index f27234778..be3a7f82b 100644 --- a/Eigen/StdDeque +++ b/Eigen/StdDeque @@ -14,7 +14,7 @@ #include "Core" #include -#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */ +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */ #define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) diff --git a/Eigen/StdList b/Eigen/StdList index 225c1e18f..07ba1297b 100644 --- a/Eigen/StdList +++ b/Eigen/StdList @@ -13,7 +13,7 @@ #include "Core" #include -#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */ +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */ #define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) diff --git a/Eigen/StdVector b/Eigen/StdVector index 6b22627f6..fdfc37766 100644 --- a/Eigen/StdVector +++ b/Eigen/StdVector @@ -14,7 +14,7 @@ #include "Core" #include -#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */ +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */ #define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...) diff --git a/Eigen/SuperLUSupport b/Eigen/SuperLUSupport index 575e14fbc..d1eac9464 100644 --- a/Eigen/SuperLUSupport +++ b/Eigen/SuperLUSupport @@ -48,12 +48,8 @@ namespace Eigen { struct SluMatrix; } * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "src/SuperLUSupport/SuperLUSupport.h" - #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_SUPERLUSUPPORT_MODULE_H diff --git a/Eigen/UmfPackSupport b/Eigen/UmfPackSupport index 984f64a84..0efad5dee 100644 --- a/Eigen/UmfPackSupport +++ b/Eigen/UmfPackSupport @@ -26,9 +26,6 @@ extern "C" { * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "src/UmfPackSupport/UmfPackSupport.h" #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index aa9784e54..5acbf4651 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -85,7 +85,7 @@ template class LDLT * according to the specified problem \a size. * \sa LDLT() */ - LDLT(Index size) + explicit LDLT(Index size) : m_matrix(size, size), m_transpositions(size), m_temporary(size), @@ -98,7 +98,7 @@ template class LDLT * This calculates the decomposition for the input \a matrix. * \sa LDLT(Index size) */ - LDLT(const MatrixType& matrix) + explicit LDLT(const MatrixType& matrix) : m_matrix(matrix.rows(), matrix.cols()), m_transpositions(matrix.rows()), m_temporary(matrix.rows()), @@ -175,13 +175,13 @@ template class LDLT * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt() */ template - inline const internal::solve_retval + inline const Solve solve(const MatrixBase& b) const { eigen_assert(m_isInitialized && "LDLT is not initialized."); eigen_assert(m_matrix.rows()==b.rows() && "LDLT::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); + return Solve(*this, b.derived()); } template @@ -217,6 +217,12 @@ template class LDLT eigen_assert(m_isInitialized && "LDLT is not initialized."); return Success; } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif protected: @@ -400,16 +406,16 @@ template struct LDLT_Traits { typedef const TriangularView MatrixL; typedef const TriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m; } - static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } }; template struct LDLT_Traits { typedef const TriangularView MatrixL; typedef const TriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); } - static inline MatrixU getU(const MatrixType& m) { return m; } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); } }; } // end namespace internal @@ -427,6 +433,7 @@ LDLT& LDLT::compute(const MatrixType& a) m_transpositions.resize(size); m_isInitialized = false; m_temporary.resize(size); + m_sign = internal::ZeroSign; internal::ldlt_inplace::unblocked(m_matrix, m_transpositions, m_temporary, m_sign); @@ -466,52 +473,45 @@ LDLT& LDLT::rankUpdate(const MatrixBase -struct solve_retval, Rhs> - : solve_retval_base, Rhs> +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +template +void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const { - typedef LDLT<_MatrixType,_UpLo> LDLTType; - EIGEN_MAKE_SOLVE_HELPERS(LDLTType,Rhs) + eigen_assert(rhs.rows() == rows()); + // dst = P b + dst = m_transpositions * rhs; - template void evalTo(Dest& dst) const + // dst = L^-1 (P b) + matrixL().solveInPlace(dst); + + // dst = D^-1 (L^-1 P b) + // more precisely, use pseudo-inverse of D (see bug 241) + using std::abs; + const typename Diagonal::RealReturnType vecD(vectorD()); + // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon + // as motivated by LAPACK's xGELSS: + // RealScalar tolerance = numext::maxi(vectorD.array().abs().maxCoeff() *NumTraits::epsilon(),RealScalar(1) / NumTraits::highest()); + // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest + // diagonal element is not well justified and to numerical issues in some cases. + // Moreover, Lapack's xSYTRS routines use 0 for the tolerance. + RealScalar tolerance = RealScalar(1) / NumTraits::highest(); + + for (Index i = 0; i < vecD.size(); ++i) { - eigen_assert(rhs().rows() == dec().matrixLDLT().rows()); - // dst = P b - dst = dec().transpositionsP() * rhs(); - - // dst = L^-1 (P b) - dec().matrixL().solveInPlace(dst); - - // dst = D^-1 (L^-1 P b) - // more precisely, use pseudo-inverse of D (see bug 241) - using std::abs; - EIGEN_USING_STD_MATH(max); - typedef typename LDLTType::MatrixType MatrixType; - typedef typename LDLTType::RealScalar RealScalar; - const typename Diagonal::RealReturnType vectorD(dec().vectorD()); - // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon - // as motivated by LAPACK's xGELSS: - // RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() *NumTraits::epsilon(),RealScalar(1) / NumTraits::highest()); - // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest - // diagonal element is not well justified and to numerical issues in some cases. - // Moreover, Lapack's xSYTRS routines use 0 for the tolerance. - RealScalar tolerance = RealScalar(1) / NumTraits::highest(); - for (Index i = 0; i < vectorD.size(); ++i) { - if(abs(vectorD(i)) > tolerance) - dst.row(i) /= vectorD(i); - else - dst.row(i).setZero(); - } - - // dst = L^-T (D^-1 L^-1 P b) - dec().matrixU().solveInPlace(dst); - - // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b - dst = dec().transpositionsP().transpose() * dst; + if(abs(vecD(i)) > tolerance) + dst.row(i) /= vecD(i); + else + dst.row(i).setZero(); } -}; + + // dst = L^-T (D^-1 L^-1 P b) + matrixU().solveInPlace(dst); + + // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b + dst = m_transpositions.transpose() * dst; } +#endif /** \internal use x = ldlt_object.solve(x); * diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h index 38e820165..90194e64d 100644 --- a/Eigen/src/Cholesky/LLT.h +++ b/Eigen/src/Cholesky/LLT.h @@ -83,10 +83,10 @@ template class LLT * according to the specified problem \a size. * \sa LLT() */ - LLT(Index size) : m_matrix(size, size), + explicit LLT(Index size) : m_matrix(size, size), m_isInitialized(false) {} - LLT(const MatrixType& matrix) + explicit LLT(const MatrixType& matrix) : m_matrix(matrix.rows(), matrix.cols()), m_isInitialized(false) { @@ -118,13 +118,13 @@ template class LLT * \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt() */ template - inline const internal::solve_retval + inline const Solve solve(const MatrixBase& b) const { eigen_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_matrix.rows()==b.rows() && "LLT::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); + return Solve(*this, b.derived()); } template @@ -161,6 +161,12 @@ template class LLT template LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1); + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif protected: /** \internal @@ -345,8 +351,8 @@ template struct LLT_Traits { typedef const TriangularView MatrixL; typedef const TriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m; } - static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } static bool inplace_decomposition(MatrixType& m) { return llt_inplace::blocked(m)==-1; } }; @@ -355,8 +361,8 @@ template struct LLT_Traits { typedef const TriangularView MatrixL; typedef const TriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); } - static inline MatrixU getU(const MatrixType& m) { return m; } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); } static bool inplace_decomposition(MatrixType& m) { return llt_inplace::blocked(m)==-1; } }; @@ -404,22 +410,16 @@ LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, c return *this; } - -namespace internal { -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +template +void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const { - typedef LLT<_MatrixType,UpLo> LLTType; - EIGEN_MAKE_SOLVE_HELPERS(LLTType,Rhs) - - template void evalTo(Dest& dst) const - { - dst = rhs(); - dec().solveInPlace(dst); - } -}; + dst = rhs; + solveInPlace(dst); } +#endif /** \internal use x = llt_object.solve(x); * diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h index c449960de..3eadb83a0 100644 --- a/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/Eigen/src/CholmodSupport/CholmodSupport.h @@ -105,7 +105,7 @@ const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& /** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix. * The data are not copied but shared. */ template -cholmod_sparse viewAsCholmod(const SparseSelfAdjointView, UpLo>& mat) +cholmod_sparse viewAsCholmod(const SparseSelfAdjointView, UpLo>& mat) { cholmod_sparse res = viewAsCholmod(mat.matrix().const_cast_derived()); @@ -157,8 +157,12 @@ enum CholmodMode { * \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT */ template -class CholmodBase : internal::noncopyable +class CholmodBase : public SparseSolverBase { + protected: + typedef SparseSolverBase Base; + using Base::derived; + using Base::m_isInitialized; public: typedef _MatrixType MatrixType; enum { UpLo = _UpLo }; @@ -170,14 +174,14 @@ class CholmodBase : internal::noncopyable public: CholmodBase() - : m_cholmodFactor(0), m_info(Success), m_isInitialized(false) + : m_cholmodFactor(0), m_info(Success) { m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); cholmod_start(&m_cholmod); } - CholmodBase(const MatrixType& matrix) - : m_cholmodFactor(0), m_info(Success), m_isInitialized(false) + explicit CholmodBase(const MatrixType& matrix) + : m_cholmodFactor(0), m_info(Success) { m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); cholmod_start(&m_cholmod); @@ -194,9 +198,6 @@ class CholmodBase : internal::noncopyable inline Index cols() const { return m_cholmodFactor->n; } inline Index rows() const { return m_cholmodFactor->n; } - Derived& derived() { return *static_cast(this); } - const Derived& derived() const { return *static_cast(this); } - /** \brief Reports whether previous computation was successful. * * \returns \c Success if computation was succesful, @@ -216,34 +217,6 @@ class CholmodBase : internal::noncopyable return derived(); } - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::solve_retval - solve(const MatrixBase& b) const - { - eigen_assert(m_isInitialized && "LLT is not initialized."); - eigen_assert(rows()==b.rows() - && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::sparse_solve_retval - solve(const SparseMatrixBase& b) const - { - eigen_assert(m_isInitialized && "LLT is not initialized."); - eigen_assert(rows()==b.rows() - && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval(*this, b.derived()); - } - /** Performs a symbolic decomposition on the sparsity pattern of \a matrix. * * This function is particularly useful when solving for several problems having the same structure. @@ -290,7 +263,7 @@ class CholmodBase : internal::noncopyable #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal */ template - void _solve(const MatrixBase &b, MatrixBase &dest) const + void _solve_impl(const MatrixBase &b, MatrixBase &dest) const { eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); const Index size = m_cholmodFactor->n; @@ -312,7 +285,7 @@ class CholmodBase : internal::noncopyable /** \internal */ template - void _solve(const SparseMatrix &b, SparseMatrix &dest) const + void _solve_impl(const SparseMatrix &b, SparseMatrix &dest) const { eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); const Index size = m_cholmodFactor->n; @@ -357,7 +330,6 @@ class CholmodBase : internal::noncopyable cholmod_factor* m_cholmodFactor; RealScalar m_shiftOffset[2]; mutable ComputationInfo m_info; - bool m_isInitialized; int m_factorizationIsOk; int m_analysisIsOk; }; @@ -572,36 +544,6 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom } }; -namespace internal { - -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -template -struct sparse_solve_retval, Rhs> - : sparse_solve_retval_base, Rhs> -{ - typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -} // end namespace internal - } // end namespace Eigen #endif // EIGEN_CHOLMODSUPPORT_H diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h index 28d6f1443..337086615 100644 --- a/Eigen/src/Core/Array.h +++ b/Eigen/src/Core/Array.h @@ -74,6 +74,21 @@ class Array { return Base::operator=(other); } + + /** Set all the entries to \a value. + * \sa DenseBase::setConstant(), DenseBase::fill() + */ + /* This overload is needed because the usage of + * using Base::operator=; + * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped + * the usage of 'using'. This should be done only for operator=. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array& operator=(const Scalar &value) + { + Base::setConstant(value); + return *this; + } /** Copies the value of the expression \a other into \c *this with automatic resizing. * @@ -99,7 +114,7 @@ class Array { return Base::_set(other); } - + /** Default constructor. * * For fixed-size matrices, does nothing. @@ -144,7 +159,6 @@ class Array } #endif - #ifndef EIGEN_PARSED_BY_DOXYGEN template EIGEN_DEVICE_FUNC @@ -244,13 +258,6 @@ class Array *this = other; } - /** Override MatrixBase::swap() since for dynamic-sized matrices of same type it is enough to swap the - * data pointers. - */ - template - void swap(ArrayBase const & other) - { this->_swap(other.derived()); } - EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; } EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); } diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 2c9ace4a7..d42693d4b 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -64,8 +64,7 @@ template class ArrayBase using Base::MaxSizeAtCompileTime; using Base::IsVectorAtCompileTime; using Base::Flags; - using Base::CoeffReadCost; - + using Base::derived; using Base::const_cast_derived; using Base::rows; @@ -121,8 +120,15 @@ template class ArrayBase EIGEN_DEVICE_FUNC Derived& operator=(const ArrayBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } + + /** Set all the entries to \a value. + * \sa DenseBase::setConstant(), DenseBase::fill() */ + EIGEN_DEVICE_FUNC + Derived& operator=(const Scalar &value) + { Base::setConstant(value); return derived(); } EIGEN_DEVICE_FUNC Derived& operator+=(const Scalar& scalar); @@ -153,9 +159,9 @@ template class ArrayBase /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array * \sa MatrixBase::array() */ EIGEN_DEVICE_FUNC - MatrixWrapper matrix() { return derived(); } + MatrixWrapper matrix() { return MatrixWrapper(derived()); } EIGEN_DEVICE_FUNC - const MatrixWrapper matrix() const { return derived(); } + const MatrixWrapper matrix() const { return MatrixWrapper(derived()); } // template // inline void evalTo(Dest& dst) const { dst = matrix(); } @@ -186,8 +192,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator-=(const ArrayBase &other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -200,8 +205,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator+=(const ArrayBase& other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } @@ -214,8 +218,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator*=(const ArrayBase& other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::mul_assign_op()); return derived(); } @@ -228,8 +231,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator/=(const ArrayBase& other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::div_assign_op()); return derived(); } diff --git a/Eigen/src/Core/ArrayWrapper.h b/Eigen/src/Core/ArrayWrapper.h index 28d7b7bd5..0b89c58cb 100644 --- a/Eigen/src/Core/ArrayWrapper.h +++ b/Eigen/src/Core/ArrayWrapper.h @@ -44,6 +44,7 @@ class ArrayWrapper : public ArrayBase > typedef ArrayBase Base; EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper) + typedef typename internal::remove_all::type NestedExpression; typedef typename internal::conditional< internal::is_lvalue::value, @@ -54,7 +55,7 @@ class ArrayWrapper : public ArrayBase > typedef typename internal::nested::type NestedExpressionType; EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} + explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } @@ -186,6 +187,7 @@ class MatrixWrapper : public MatrixBase > typedef MatrixBase > Base; EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper) + typedef typename internal::remove_all::type NestedExpression; typedef typename internal::conditional< internal::is_lvalue::value, @@ -196,7 +198,7 @@ class MatrixWrapper : public MatrixBase > typedef typename internal::nested::type NestedExpressionType; EIGEN_DEVICE_FUNC - inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {} + explicit inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {} EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 07da2fe31..53806ba33 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -14,485 +14,6 @@ namespace Eigen { -namespace internal { - -/*************************************************************************** -* Part 1 : the logic deciding a strategy for traversal and unrolling * -***************************************************************************/ - -template -struct assign_traits -{ -public: - enum { - DstIsAligned = Derived::Flags & AlignedBit, - DstHasDirectAccess = Derived::Flags & DirectAccessBit, - SrcIsAligned = OtherDerived::Flags & AlignedBit, - JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned - }; - -private: - enum { - InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime) - : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime) - : int(Derived::RowsAtCompileTime), - InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime) - : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime) - : int(Derived::MaxRowsAtCompileTime), - MaxSizeAtCompileTime = Derived::SizeAtCompileTime, - PacketSize = packet_traits::size - }; - - enum { - StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)), - MightVectorize = StorageOrdersAgree - && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit), - MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 - && int(DstIsAligned) && int(SrcIsAligned), - MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), - MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess - && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), - /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, - so it's only good for large enough sizes. */ - MaySliceVectorize = MightVectorize && DstHasDirectAccess - && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize) - /* slice vectorization can be slow, so we only want it if the slices are big, which is - indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block - in a fixed-size matrix */ - }; - -public: - enum { - Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) - : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) - : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) - : int(MayLinearize) ? int(LinearTraversal) - : int(DefaultTraversal), - Vectorized = int(Traversal) == InnerVectorizedTraversal - || int(Traversal) == LinearVectorizedTraversal - || int(Traversal) == SliceVectorizedTraversal - }; - -private: - enum { - UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), - MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic - && int(OtherDerived::CoeffReadCost) != Dynamic - && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit), - MayUnrollInner = int(InnerSize) != Dynamic - && int(OtherDerived::CoeffReadCost) != Dynamic - && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit) - }; - -public: - enum { - Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) - ? ( - int(MayUnrollCompletely) ? int(CompleteUnrolling) - : int(MayUnrollInner) ? int(InnerUnrolling) - : int(NoUnrolling) - ) - : int(Traversal) == int(LinearVectorizedTraversal) - ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) ) - : int(Traversal) == int(LinearTraversal) - ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) - : int(NoUnrolling) - }; - -#ifdef EIGEN_DEBUG_ASSIGN - static void debug() - { - EIGEN_DEBUG_VAR(DstIsAligned) - EIGEN_DEBUG_VAR(SrcIsAligned) - EIGEN_DEBUG_VAR(JointAlignment) - EIGEN_DEBUG_VAR(Derived::SizeAtCompileTime) - EIGEN_DEBUG_VAR(OtherDerived::CoeffReadCost) - EIGEN_DEBUG_VAR(InnerSize) - EIGEN_DEBUG_VAR(InnerMaxSize) - EIGEN_DEBUG_VAR(PacketSize) - EIGEN_DEBUG_VAR(StorageOrdersAgree) - EIGEN_DEBUG_VAR(MightVectorize) - EIGEN_DEBUG_VAR(MayLinearize) - EIGEN_DEBUG_VAR(MayInnerVectorize) - EIGEN_DEBUG_VAR(MayLinearVectorize) - EIGEN_DEBUG_VAR(MaySliceVectorize) - EIGEN_DEBUG_VAR(Traversal) - EIGEN_DEBUG_VAR(UnrollingLimit) - EIGEN_DEBUG_VAR(MayUnrollCompletely) - EIGEN_DEBUG_VAR(MayUnrollInner) - EIGEN_DEBUG_VAR(Unrolling) - } -#endif -}; - -/*************************************************************************** -* Part 2 : meta-unrollers -***************************************************************************/ - -/************************ -*** Default traversal *** -************************/ - -template -struct assign_DefaultTraversal_CompleteUnrolling -{ - enum { - outer = Index / Derived1::InnerSizeAtCompileTime, - inner = Index % Derived1::InnerSizeAtCompileTime - }; - - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - dst.copyCoeffByOuterInner(outer, inner, src); - assign_DefaultTraversal_CompleteUnrolling::run(dst, src); - } -}; - -template -struct assign_DefaultTraversal_CompleteUnrolling -{ - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} -}; - -template -struct assign_DefaultTraversal_InnerUnrolling -{ - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer) - { - dst.copyCoeffByOuterInner(outer, Index, src); - assign_DefaultTraversal_InnerUnrolling::run(dst, src, outer); - } -}; - -template -struct assign_DefaultTraversal_InnerUnrolling -{ - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {} -}; - -/*********************** -*** Linear traversal *** -***********************/ - -template -struct assign_LinearTraversal_CompleteUnrolling -{ - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - dst.copyCoeff(Index, src); - assign_LinearTraversal_CompleteUnrolling::run(dst, src); - } -}; - -template -struct assign_LinearTraversal_CompleteUnrolling -{ - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} -}; - -/************************** -*** Inner vectorization *** -**************************/ - -template -struct assign_innervec_CompleteUnrolling -{ - enum { - outer = Index / Derived1::InnerSizeAtCompileTime, - inner = Index % Derived1::InnerSizeAtCompileTime, - JointAlignment = assign_traits::JointAlignment - }; - - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - dst.template copyPacketByOuterInner(outer, inner, src); - assign_innervec_CompleteUnrolling::size, Stop>::run(dst, src); - } -}; - -template -struct assign_innervec_CompleteUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} -}; - -template -struct assign_innervec_InnerUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer) - { - dst.template copyPacketByOuterInner(outer, Index, src); - assign_innervec_InnerUnrolling::size, Stop>::run(dst, src, outer); - } -}; - -template -struct assign_innervec_InnerUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {} -}; - -/*************************************************************************** -* Part 3 : implementation of all cases -***************************************************************************/ - -template::Traversal, - int Unrolling = assign_traits::Unrolling, - int Version = Specialized> -struct assign_impl; - -/************************ -*** Default traversal *** -************************/ - -template -struct assign_impl -{ - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &, const Derived2 &) { } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &dst, const Derived2 &src) - { - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) - for(Index inner = 0; inner < innerSize; ++inner) - dst.copyCoeffByOuterInner(outer, inner, src); - } -}; - -template -struct assign_impl -{ - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - assign_DefaultTraversal_CompleteUnrolling - ::run(dst, src); - } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) - assign_DefaultTraversal_InnerUnrolling - ::run(dst, src, outer); - } -}; - -/*********************** -*** Linear traversal *** -***********************/ - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - EIGEN_DEVICE_FUNC - static inline void run(Derived1 &dst, const Derived2 &src) - { - const Index size = dst.size(); - for(Index i = 0; i < size; ++i) - dst.copyCoeff(i, src); - } -}; - -template -struct assign_impl -{ - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - assign_LinearTraversal_CompleteUnrolling - ::run(dst, src); - } -}; - -/************************** -*** Inner vectorization *** -**************************/ - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static inline void run(Derived1 &dst, const Derived2 &src) - { - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - const Index packetSize = packet_traits::size; - for(Index outer = 0; outer < outerSize; ++outer) - for(Index inner = 0; inner < innerSize; inner+=packetSize) - dst.template copyPacketByOuterInner(outer, inner, src); - } -}; - -template -struct assign_impl -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - assign_innervec_CompleteUnrolling - ::run(dst, src); - } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) - assign_innervec_InnerUnrolling - ::run(dst, src, outer); - } -}; - -/*************************** -*** Linear vectorization *** -***************************/ - -template -struct unaligned_assign_impl -{ - template - static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, typename Derived::Index, typename Derived::Index) {} -}; - -template <> -struct unaligned_assign_impl -{ - // MSVC must not inline this functions. If it does, it fails to optimize the - // packet access path. -#ifdef _MSC_VER - template - static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end) -#else - template - static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end) -#endif - { - for (typename Derived::Index index = start; index < end; ++index) - dst.copyCoeff(index, src); - } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - const Index size = dst.size(); - typedef packet_traits PacketTraits; - enum { - packetSize = PacketTraits::size, - dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(assign_traits::DstIsAligned) , - srcAlignment = assign_traits::JointAlignment - }; - const Index alignedStart = assign_traits::DstIsAligned ? 0 - : internal::first_aligned(&dst.coeffRef(0), size); - const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; - - unaligned_assign_impl::DstIsAligned!=0>::run(src,dst,0,alignedStart); - - for(Index index = alignedStart; index < alignedEnd; index += packetSize) - { - dst.template copyPacket(index, src); - } - - unaligned_assign_impl<>::run(src,dst,alignedEnd,size); - } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - enum { size = Derived1::SizeAtCompileTime, - packetSize = packet_traits::size, - alignedSize = (size/packetSize)*packetSize }; - - assign_innervec_CompleteUnrolling::run(dst, src); - assign_DefaultTraversal_CompleteUnrolling::run(dst, src); - } -}; - -/************************** -*** Slice vectorization *** -***************************/ - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static inline void run(Derived1 &dst, const Derived2 &src) - { - typedef packet_traits PacketTraits; - enum { - packetSize = PacketTraits::size, - alignable = PacketTraits::AlignedOnScalar, - dstAlignment = alignable ? Aligned : int(assign_traits::DstIsAligned) , - srcAlignment = assign_traits::JointAlignment - }; - const Index packetAlignedMask = packetSize - 1; - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0; - Index alignedStart = ((!alignable) || assign_traits::DstIsAligned) ? 0 - : internal::first_aligned(&dst.coeffRef(0,0), innerSize); - - for(Index outer = 0; outer < outerSize; ++outer) - { - const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); - // do the non-vectorizable part of the assignment - for(Index inner = 0; inner(outer, inner, src); - - // do the non-vectorizable part of the assignment - for(Index inner = alignedEnd; inner((alignedStart+alignedStep)%packetSize, innerSize); - } - } -}; - -} // end namespace internal - -/*************************************************************************** -* Part 4 : implementation of DenseBase methods -***************************************************************************/ - template template EIGEN_STRONG_INLINE Derived& DenseBase @@ -506,91 +27,35 @@ EIGEN_STRONG_INLINE Derived& DenseBase EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived) EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) -#ifdef EIGEN_TEST_EVALUATORS - -#ifdef EIGEN_DEBUG_ASSIGN - internal::copy_using_evaluator_traits::debug(); -#endif eigen_assert(rows() == other.rows() && cols() == other.cols()); - internal::call_dense_assignment_loop(derived(),other.derived()); + internal::call_assignment_no_alias(derived(),other.derived()); -#else // EIGEN_TEST_EVALUATORS - -#ifdef EIGEN_DEBUG_ASSIGN - internal::assign_traits::debug(); -#endif - eigen_assert(rows() == other.rows() && cols() == other.cols()); - internal::assign_impl::Traversal) - : int(InvalidTraversal)>::run(derived(),other.derived()); - -#endif // EIGEN_TEST_EVALUATORS - -#ifndef EIGEN_NO_DEBUG - checkTransposeAliasing(other.derived()); -#endif return derived(); } -namespace internal { - -template::Flags) & EvalBeforeAssigningBit) != 0, - bool NeedToTranspose = ((int(Derived::RowsAtCompileTime) == 1 && int(OtherDerived::ColsAtCompileTime) == 1) - | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&". - // revert to || as soon as not needed anymore. - (int(Derived::ColsAtCompileTime) == 1 && int(OtherDerived::RowsAtCompileTime) == 1)) - && int(Derived::SizeAtCompileTime) != 1> -struct assign_selector; - -template -struct assign_selector { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); } - template - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { other.evalTo(dst); return dst; } -}; -template -struct assign_selector { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); } -}; -template -struct assign_selector { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); } - template - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { Transpose dstTrans(dst); other.evalTo(dstTrans); return dst; } -}; -template -struct assign_selector { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); } -}; - -} // end namespace internal - template template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const MatrixBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template @@ -598,7 +63,8 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const DenseBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template @@ -606,7 +72,8 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const EigenBase& other) { - return internal::assign_selector::evalTo(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template @@ -614,7 +81,8 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const ReturnByValue& other) { - return internal::assign_selector::evalTo(derived(), other.derived()); + other.derived().evalTo(derived()); + return derived(); } } // end namespace Eigen diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 5451a138f..4db10e697 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2011 Benoit Jacob -// Copyright (C) 2011-2013 Gael Guennebaud +// Copyright (C) 2011-2014 Gael Guennebaud // Copyright (C) 2011-2012 Jitse Niesen // // This Source Code Form is subject to the terms of the Mozilla @@ -24,37 +24,46 @@ namespace internal { // copy_using_evaluator_traits is based on assign_traits -template +template struct copy_using_evaluator_traits { + typedef typename DstEvaluator::XprType Dst; + + enum { + DstFlags = DstEvaluator::Flags, + SrcFlags = SrcEvaluator::Flags + }; + public: enum { - DstIsAligned = Derived::Flags & AlignedBit, - DstHasDirectAccess = Derived::Flags & DirectAccessBit, - SrcIsAligned = OtherDerived::Flags & AlignedBit, - JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned, - SrcEvalBeforeAssign = (evaluator_traits::HasEvalTo == 1) + DstIsAligned = DstFlags & AlignedBit, + DstHasDirectAccess = DstFlags & DirectAccessBit, + SrcIsAligned = SrcFlags & AlignedBit, + JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned }; private: enum { - InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime) - : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime) - : int(Derived::RowsAtCompileTime), - InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime) - : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime) - : int(Derived::MaxRowsAtCompileTime), - MaxSizeAtCompileTime = Derived::SizeAtCompileTime, - PacketSize = packet_traits::size + InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) + : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) + : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) + : int(Dst::MaxRowsAtCompileTime), + MaxSizeAtCompileTime = Dst::SizeAtCompileTime, + PacketSize = packet_traits::size }; enum { - StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)), + DstIsRowMajor = DstFlags&RowMajorBit, + SrcIsRowMajor = SrcFlags&RowMajorBit, + StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), MightVectorize = StorageOrdersAgree - && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit), + && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) + && (functor_traits::PacketAccess), MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 && int(DstIsAligned) && int(SrcIsAligned), - MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), + MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, @@ -68,8 +77,7 @@ private: public: enum { - Traversal = int(SrcEvalBeforeAssign) ? int(AllAtOnceTraversal) - : int(MayInnerVectorize) ? int(InnerVectorizedTraversal) + Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) : int(MayLinearize) ? int(LinearTraversal) @@ -82,12 +90,12 @@ public: private: enum { UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), - MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic - && int(OtherDerived::CoeffReadCost) != Dynamic - && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit), + MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic + && int(SrcEvaluator::CoeffReadCost) != Dynamic + && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit), MayUnrollInner = int(InnerSize) != Dynamic - && int(OtherDerived::CoeffReadCost) != Dynamic - && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit) + && int(SrcEvaluator::CoeffReadCost) != Dynamic + && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit) }; public: @@ -110,6 +118,12 @@ public: #ifdef EIGEN_DEBUG_ASSIGN static void debug() { + std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; + std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; + std::cerr.setf(std::ios::hex, std::ios::basefield); + EIGEN_DEBUG_VAR(DstFlags) + EIGEN_DEBUG_VAR(SrcFlags) + std::cerr.unsetf(std::ios::hex); EIGEN_DEBUG_VAR(DstIsAligned) EIGEN_DEBUG_VAR(SrcIsAligned) EIGEN_DEBUG_VAR(JointAlignment) @@ -127,6 +141,7 @@ public: EIGEN_DEBUG_VAR(MayUnrollCompletely) EIGEN_DEBUG_VAR(MayUnrollInner) EIGEN_DEBUG_VAR(Unrolling) + std::cerr << std::endl; } #endif }; @@ -142,6 +157,7 @@ public: template struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? typedef typename Kernel::DstEvaluatorType DstEvaluatorType; typedef typename DstEvaluatorType::XprType DstXprType; @@ -150,7 +166,7 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling inner = Index % DstXprType::InnerSizeAtCompileTime }; - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { kernel.assignCoeffByOuterInner(outer, inner); copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); @@ -160,13 +176,13 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling template struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { - static EIGEN_STRONG_INLINE void run(Kernel&) { } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } }; template struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { - static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, typename Kernel::Index outer) { kernel.assignCoeffByOuterInner(outer, Index); copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); @@ -176,7 +192,7 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling template struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { - static EIGEN_STRONG_INLINE void run(Kernel&, int) { } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index) { } }; /*********************** @@ -186,7 +202,7 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling template struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { - static EIGEN_STRONG_INLINE void run(Kernel& kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { kernel.assignCoeff(Index); copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); @@ -196,7 +212,7 @@ struct copy_using_evaluator_LinearTraversal_CompleteUnrolling template struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { - static EIGEN_STRONG_INLINE void run(Kernel&) { } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } }; /************************** @@ -206,16 +222,17 @@ struct copy_using_evaluator_LinearTraversal_CompleteUnrolling struct copy_using_evaluator_innervec_CompleteUnrolling { + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? typedef typename Kernel::DstEvaluatorType DstEvaluatorType; typedef typename DstEvaluatorType::XprType DstXprType; - + enum { outer = Index / DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime, JointAlignment = Kernel::AssignmentTraits::JointAlignment }; - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { kernel.template assignPacketByOuterInner(outer, inner); enum { NextIndex = Index + packet_traits::size }; @@ -226,17 +243,16 @@ struct copy_using_evaluator_innervec_CompleteUnrolling template struct copy_using_evaluator_innervec_CompleteUnrolling { - static EIGEN_STRONG_INLINE void run(Kernel&) { } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } }; template struct copy_using_evaluator_innervec_InnerUnrolling { - static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, typename Kernel::Index outer) { kernel.template assignPacketByOuterInner(outer, Index); - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - enum { NextIndex = Index + packet_traits::size }; + enum { NextIndex = Index + packet_traits::size }; copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); } }; @@ -244,7 +260,7 @@ struct copy_using_evaluator_innervec_InnerUnrolling template struct copy_using_evaluator_innervec_InnerUnrolling { - static EIGEN_STRONG_INLINE void run(Kernel &, int) { } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, typename Kernel::Index) { } }; /*************************************************************************** @@ -265,7 +281,7 @@ struct dense_assignment_loop; template struct dense_assignment_loop { - static void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static void run(Kernel &kernel) { typedef typename Kernel::Index Index; @@ -280,7 +296,7 @@ struct dense_assignment_loop template struct dense_assignment_loop { - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); @@ -291,7 +307,7 @@ template struct dense_assignment_loop { typedef typename Kernel::Index Index; - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; @@ -314,7 +330,7 @@ struct unaligned_dense_assignment_loop { // if IsAligned = true, then do nothing template - static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index, typename Kernel::Index) {} + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index, typename Kernel::Index) {} }; template <> @@ -323,14 +339,14 @@ struct unaligned_dense_assignment_loop // MSVC must not inline this functions. If it does, it fails to optimize the // packet access path. // FIXME check which version exhibits this issue -#ifdef _MSC_VER +#if EIGEN_COMP_MSVC template static EIGEN_DONT_INLINE void run(Kernel &kernel, typename Kernel::Index start, typename Kernel::Index end) #else template - static EIGEN_STRONG_INLINE void run(Kernel &kernel, + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, typename Kernel::Index start, typename Kernel::Index end) #endif @@ -343,7 +359,7 @@ struct unaligned_dense_assignment_loop template struct dense_assignment_loop { - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::Index Index; @@ -371,7 +387,7 @@ template struct dense_assignment_loop { typedef typename Kernel::Index Index; - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; @@ -391,7 +407,7 @@ struct dense_assignment_loop struct dense_assignment_loop { - static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) { typedef typename Kernel::Index Index; @@ -407,7 +423,7 @@ struct dense_assignment_loop template struct dense_assignment_loop { - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); @@ -418,7 +434,7 @@ template struct dense_assignment_loop { typedef typename Kernel::Index Index; - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; const Index outerSize = kernel.outerSize(); @@ -434,7 +450,7 @@ struct dense_assignment_loop template struct dense_assignment_loop { - static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) { typedef typename Kernel::Index Index; const Index size = kernel.size(); @@ -446,7 +462,7 @@ struct dense_assignment_loop template struct dense_assignment_loop { - static EIGEN_STRONG_INLINE void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); @@ -460,7 +476,7 @@ struct dense_assignment_loop template struct dense_assignment_loop { - static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) { typedef typename Kernel::Index Index; typedef packet_traits PacketTraits; @@ -496,25 +512,8 @@ struct dense_assignment_loop } }; -/**************************** -*** All-at-once traversal *** -****************************/ - -// TODO: this 'AllAtOnceTraversal' should be dropped or caught earlier (Gael) -// Indeed, what to do with the kernel's functor?? -template -struct dense_assignment_loop -{ - static inline void run(Kernel & kernel) - { - // Evaluate rhs in temporary to prevent aliasing problems in a = a * a; - // TODO: Do not pass the xpr object to evalTo() (Jitse) - kernel.srcEvaluator().evalTo(kernel.dstEvaluator(), kernel.dstExpression()); - } -}; - /*************************************************************************** -* Part 4 : Generic Assignment routine +* Part 4 : Generic dense assignment kernel ***************************************************************************/ // This class generalize the assignment of a coefficient (or packet) from one dense evaluator @@ -523,7 +522,7 @@ struct dense_assignment_loop // This abstraction level permits to keep the evaluation loops as simple and as generic as possible. // One can customize the assignment using this generic dense_assignment_kernel with different // functors, or by completely overloading it, by-passing a functor. -template +template class generic_dense_assignment_kernel { protected: @@ -535,35 +534,44 @@ public: typedef SrcEvaluatorTypeT SrcEvaluatorType; typedef typename DstEvaluatorType::Scalar Scalar; typedef typename DstEvaluatorType::Index Index; - typedef copy_using_evaluator_traits AssignmentTraits; + typedef copy_using_evaluator_traits AssignmentTraits; - generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) + EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) - {} + { + #ifdef EIGEN_DEBUG_ASSIGN + AssignmentTraits::debug(); + #endif + } - Index size() const { return m_dstExpr.size(); } - Index innerSize() const { return m_dstExpr.innerSize(); } - Index outerSize() const { return m_dstExpr.outerSize(); } - Index outerStride() const { return m_dstExpr.outerStride(); } + EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); } + EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); } + EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); } + EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); } + EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); } // TODO get rid of this one: - DstXprType& dstExpression() const { return m_dstExpr; } + EIGEN_DEVICE_FUNC DstXprType& dstExpression() const { return m_dstExpr; } - DstEvaluatorType& dstEvaluator() { return m_dst; } - const SrcEvaluatorType& srcEvaluator() const { return m_src; } + EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; } + EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } - void assignCoeff(Index row, Index col) + /// Assign src(row,col) to dst(row,col) through the assignment functor. + EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col) { m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); } - void assignCoeff(Index index) + /// \sa assignCoeff(Index,Index) + EIGEN_DEVICE_FUNC void assignCoeff(Index index) { m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); } - void assignCoeffByOuterInner(Index outer, Index inner) + /// \sa assignCoeff(Index,Index) + EIGEN_DEVICE_FUNC void assignCoeffByOuterInner(Index outer, Index inner) { Index row = rowIndexByOuterInner(outer, inner); Index col = colIndexByOuterInner(outer, inner); @@ -572,40 +580,40 @@ public: template - void assignPacket(Index row, Index col) + EIGEN_DEVICE_FUNC void assignPacket(Index row, Index col) { m_functor.template assignPacket(&m_dst.coeffRef(row,col), m_src.template packet(row,col)); } template - void assignPacket(Index index) + EIGEN_DEVICE_FUNC void assignPacket(Index index) { m_functor.template assignPacket(&m_dst.coeffRef(index), m_src.template packet(index)); } template - void assignPacketByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC void assignPacketByOuterInner(Index outer, Index inner) { Index row = rowIndexByOuterInner(outer, inner); Index col = colIndexByOuterInner(outer, inner); assignPacket(row, col); } - static Index rowIndexByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC static Index rowIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::RowsAtCompileTime) == 1 ? 0 : int(Traits::ColsAtCompileTime) == 1 ? inner - : int(Traits::Flags)&RowMajorBit ? outer + : int(DstEvaluatorType::Flags)&RowMajorBit ? outer : inner; } - static Index colIndexByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC static Index colIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::ColsAtCompileTime) == 1 ? 0 : int(Traits::RowsAtCompileTime) == 1 ? inner - : int(Traits::Flags)&RowMajorBit ? inner + : int(DstEvaluatorType::Flags)&RowMajorBit ? inner : outer; } @@ -617,13 +625,13 @@ protected: DstXprType& m_dstExpr; }; +/*************************************************************************** +* Part 5 : Entry point for dense rectangular assignment +***************************************************************************/ + template -void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) +EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) { -#ifdef EIGEN_DEBUG_ASSIGN - // TODO these traits should be computed from information provided by the evaluators - internal::copy_using_evaluator_traits::debug(); -#endif eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); typedef typename evaluator::type DstEvaluatorType; @@ -639,201 +647,147 @@ void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, co } template -void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) +EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) { call_dense_assignment_loop(dst, src, internal::assign_op()); } /*************************************************************************** -* Part 5 : Entry points +* Part 6 : Generic assignment ***************************************************************************/ -// Based on DenseBase::LazyAssign() -// The following functions are just for testing and they are meant to be moved to operator= and the likes. +// Based on the respective shapes of the destination and source, +// the class AssignmentKind determine the kind of assignment mechanism. +// AssignmentKind must define a Kind typedef. +template struct AssignmentKind; -template class StorageBase, typename SrcXprType> -EIGEN_STRONG_INLINE -const DstXprType& copy_using_evaluator(const NoAlias& dst, - const EigenBase& src) -{ - return noalias_copy_using_evaluator(dst.expression(), src.derived(), internal::assign_op()); -} +// Assignement kind defined in this file: +struct Dense2Dense {}; +struct EigenBase2EigenBase {}; -template::AssumeAliasing> -struct AddEvalIfAssumingAliasing; - -template -struct AddEvalIfAssumingAliasing -{ - static const XprType& run(const XprType& xpr) - { - return xpr; - } -}; - -template -struct AddEvalIfAssumingAliasing -{ - static const EvalToTemp run(const XprType& xpr) - { - return EvalToTemp(xpr); - } -}; - -template -EIGEN_STRONG_INLINE -const DstXprType& copy_using_evaluator(const EigenBase& dst, const EigenBase& src, const Functor &func) -{ - return noalias_copy_using_evaluator(dst.const_cast_derived(), - AddEvalIfAssumingAliasing::run(src.derived()), - func - ); -} - -// this mimics operator= -template -EIGEN_STRONG_INLINE -const DstXprType& copy_using_evaluator(const EigenBase& dst, const EigenBase& src) -{ - return copy_using_evaluator(dst.const_cast_derived(), src.derived(), internal::assign_op()); -} - -template -EIGEN_STRONG_INLINE -const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase& dst, const EigenBase& src, const Functor &func) -{ -#ifdef EIGEN_DEBUG_ASSIGN - internal::copy_using_evaluator_traits::debug(); -#endif -#ifdef EIGEN_NO_AUTOMATIC_RESIZING - eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size()) - : (dst.rows() == src.rows() && dst.cols() == src.cols()))) - && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); -#else - dst.const_cast_derived().resizeLike(src.derived()); -#endif - call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func); - return dst.derived(); -} - -template -EIGEN_STRONG_INLINE -const DstXprType& noalias_copy_using_evaluator(const EigenBase& dst, const EigenBase& src, const Functor &func) -{ - call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func); - return dst.derived(); -} - -// Based on DenseBase::swap() -// TODO: Check whether we need to do something special for swapping two -// Arrays or Matrices. (Jitse) - -// Overload default assignPacket behavior for swapping them -template -class swap_kernel : public generic_dense_assignment_kernel > -{ - typedef generic_dense_assignment_kernel > Base; - typedef typename DstEvaluatorTypeT::PacketScalar PacketScalar; - using Base::m_dst; - using Base::m_src; - using Base::m_functor; - -public: - typedef typename Base::Scalar Scalar; - typedef typename Base::Index Index; - typedef typename Base::DstXprType DstXprType; - - swap_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, DstXprType& dstExpr) - : Base(dst, src, swap_assign_op(), dstExpr) - {} - - template - void assignPacket(Index row, Index col) - { - m_functor.template swapPacket(&m_dst.coeffRef(row,col), &const_cast(m_src).coeffRef(row,col)); - } - - template - void assignPacket(Index index) - { - m_functor.template swapPacket(&m_dst.coeffRef(index), &const_cast(m_src).coeffRef(index)); - } - - // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael) - template - void assignPacketByOuterInner(Index outer, Index inner) - { - Index row = Base::rowIndexByOuterInner(outer, inner); - Index col = Base::colIndexByOuterInner(outer, inner); - assignPacket(row, col); - } -}; - -template -void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src) -{ - // TODO there is too much redundancy with call_dense_assignment_loop - - eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); - - typedef typename evaluator::type DstEvaluatorType; - typedef typename evaluator::type SrcEvaluatorType; - - DstEvaluatorType dstEvaluator(dst); - SrcEvaluatorType srcEvaluator(src); +template struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; +template<> struct AssignmentKind { typedef Dense2Dense Kind; }; - typedef swap_kernel Kernel; - Kernel kernel(dstEvaluator, srcEvaluator, dst.const_cast_derived()); +// This is the main assignment class +template< typename DstXprType, typename SrcXprType, typename Functor, + typename Kind = typename AssignmentKind< typename evaluator_traits::Shape , typename evaluator_traits::Shape >::Kind, + typename Scalar = typename DstXprType::Scalar> +struct Assignment; + + +// The only purpose of this call_assignment() function is to deal with noalias() / AssumeAliasing and automatic transposition. +// Indeed, I (Gael) think that this concept of AssumeAliasing was a mistake, and it makes thing quite complicated. +// So this intermediate function removes everything related to AssumeAliasing such that Assignment +// does not has to bother about these annoying details. + +template +EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src) +{ + call_assignment(dst, src, internal::assign_op()); +} +template +EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src) +{ + call_assignment(dst, src, internal::assign_op()); +} + +// Deal with AssumeAliasing +template +EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if::AssumeAliasing==1, void*>::type = 0) +{ + typename plain_matrix_type::type tmp(src); + call_assignment_no_alias(dst, tmp, func); +} + +template +EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if::AssumeAliasing==0, void*>::type = 0) +{ + call_assignment_no_alias(dst, src, func); +} + +// by-pass AssumeAliasing +// FIXME the const version should probably not be needed +// When there is no aliasing, we require that 'dst' has been properly resized +template class StorageBase, typename Src, typename Func> +EIGEN_DEVICE_FUNC void call_assignment(const NoAlias& dst, const Src& src, const Func& func) +{ + call_assignment_no_alias(dst.expression(), src, func); +} +template class StorageBase, typename Src, typename Func> +EIGEN_DEVICE_FUNC void call_assignment(NoAlias& dst, const Src& src, const Func& func) +{ + call_assignment_no_alias(dst.expression(), src, func); +} + + +template +EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) +{ + enum { + NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) + | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&". + // revert to || as soon as not needed anymore. + (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)) + && int(Dst::SizeAtCompileTime) != 1 + }; + + typename Dst::Index dstRows = NeedToTranspose ? src.cols() : src.rows(); + typename Dst::Index dstCols = NeedToTranspose ? src.rows() : src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); - dense_assignment_loop::run(kernel); -} + typedef typename internal::conditional, Dst>::type ActualDstTypeCleaned; + typedef typename internal::conditional, Dst&>::type ActualDstType; + ActualDstType actualDst(dst); + + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src) -// Based on MatrixBase::operator+= (in CwiseBinaryOp.h) -template -void add_assign_using_evaluator(const MatrixBase& dst, const MatrixBase& src) + // TODO this line is commented to allow matrix = permutation + // Actually, the "Scalar" type for a permutation matrix does not really make sense, + // perhaps it could be void, and EIGEN_CHECK_BINARY_COMPATIBILIY could allow micing void with anything...? +// EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); + + Assignment::run(actualDst, src, func); +} +template +EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src) { - typedef typename DstXprType::Scalar Scalar; - copy_using_evaluator(dst.derived(), src.derived(), add_assign_op()); + call_assignment_no_alias(dst, src, internal::assign_op()); } -// Based on ArrayBase::operator+= -template -void add_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) +// forward declaration +template void check_for_aliasing(const Dst &dst, const Src &src); + +// Generic Dense to Dense assignment +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment { - typedef typename DstXprType::Scalar Scalar; - copy_using_evaluator(dst.derived(), src.derived(), add_assign_op()); -} + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + +#ifndef EIGEN_NO_DEBUG + internal::check_for_aliasing(dst, src); +#endif + + call_dense_assignment_loop(dst, src, func); + } +}; -// TODO: Add add_assign_using_evaluator for EigenBase ? (Jitse) - -template -void subtract_assign_using_evaluator(const MatrixBase& dst, const MatrixBase& src) +// Generic assignment through evalTo. +// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment { - typedef typename DstXprType::Scalar Scalar; - copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op()); -} - -template -void subtract_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) -{ - typedef typename DstXprType::Scalar Scalar; - copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op()); -} - -template -void multiply_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) -{ - typedef typename DstXprType::Scalar Scalar; - copy_using_evaluator(dst.derived(), src.derived(), mul_assign_op()); -} - -template -void divide_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) -{ - typedef typename DstXprType::Scalar Scalar; - copy_using_evaluator(dst.derived(), src.derived(), div_assign_op()); -} - + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + + src.evalTo(dst); + } +}; } // namespace internal diff --git a/Eigen/src/Core/BandMatrix.h b/Eigen/src/Core/BandMatrix.h index ffd7fe8b3..e59ee3da9 100644 --- a/Eigen/src/Core/BandMatrix.h +++ b/Eigen/src/Core/BandMatrix.h @@ -204,7 +204,7 @@ class BandMatrix : public BandMatrixBase::Index Index; typedef typename internal::traits::CoefficientsType CoefficientsType; - inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs) + explicit inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs) : m_coeffs(1+supers+subs,cols), m_rows(rows), m_supers(supers), m_subs(subs) { @@ -266,7 +266,7 @@ class BandMatrixWrapper : public BandMatrixBase::CoefficientsType CoefficientsType; typedef typename internal::traits::Index Index; - inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs) + explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs) : m_coeffs(coeffs), m_rows(rows), m_supers(supers), m_subs(subs) { @@ -314,7 +314,7 @@ class TridiagonalMatrix : public BandMatrix Base; typedef typename Base::Index Index; public: - TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {} + explicit TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {} inline typename Base::template DiagonalIntReturnType<1>::Type super() { return Base::template diagonal<1>(); } @@ -327,6 +327,25 @@ class TridiagonalMatrix : public BandMatrix +struct evaluator_traits > + : public evaluator_traits_base > +{ + typedef BandShape Shape; +}; + +template +struct evaluator_traits > + : public evaluator_traits_base > +{ + typedef BandShape Shape; +}; + +template<> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index da193d1a2..9cf9d5432 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -68,6 +68,7 @@ struct traits > : traits::MaxColsAtCompileTime), + XprTypeIsRowMajor = (int(traits::Flags)&RowMajorBit) != 0, IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 @@ -80,18 +81,14 @@ struct traits > : traits::ret) : int(inner_stride_at_compile_time::ret), - MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits::size) == 0) - && (InnerStrideAtCompileTime == 1) - ? PacketAccessBit : 0, - MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0, - FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (traits::Flags&LinearAccessBit))) ? LinearAccessBit : 0, + // IsAligned is needed by MapBase's assertions + // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator + IsAligned = 0, + // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, - Flags0 = traits::Flags & ( (HereditaryBits & ~RowMajorBit) | - DirectAccessBit | - MaskPacketAccessBit | - MaskAlignedBit), - Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit + Flags = (traits::Flags & DirectAccessBit) | FlagsLvalueBit | FlagsRowMajorBit + // FIXME DirectAccessBit should not be handled by expressions }; }; @@ -111,6 +108,8 @@ template class typedef Impl Base; EIGEN_GENERIC_PUBLIC_INTERFACE(Block) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block) + + typedef typename internal::remove_all::type NestedExpression; /** Column or Row constructor */ @@ -179,7 +178,7 @@ template : public MapBase > { typedef Block BlockType; + enum { + XprTypeIsRowMajor = (int(traits::Flags)&RowMajorBit) != 0 + }; public: typedef MapBase Base; @@ -343,9 +345,8 @@ class BlockImpl_dense */ EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index i) - : Base(internal::const_cast_ptr(&xpr.coeffRef( - (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0, - (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)), + : Base(xpr.data() + i * ( ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor)) + || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()), BlockRows==1 ? 1 : xpr.rows(), BlockCols==1 ? 1 : xpr.cols()), m_xpr(xpr) @@ -357,7 +358,8 @@ class BlockImpl_dense */ EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol) - : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr) + : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)), + m_xpr(xpr) { init(); } @@ -368,7 +370,7 @@ class BlockImpl_dense inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) - : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol)), blockRows, blockCols), + : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols), m_xpr(xpr) { init(); diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h index be9f48a8c..dac1887e0 100644 --- a/Eigen/src/Core/BooleanRedux.h +++ b/Eigen/src/Core/BooleanRedux.h @@ -17,9 +17,10 @@ namespace internal { template struct all_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Derived::RowsAtCompileTime, - row = (UnrollCount-1) % Derived::RowsAtCompileTime + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; static inline bool run(const Derived &mat) @@ -43,11 +44,12 @@ struct all_unroller template struct any_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Derived::RowsAtCompileTime, - row = (UnrollCount-1) % Derived::RowsAtCompileTime + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; - + static inline bool run(const Derived &mat) { return any_unroller::run(mat) || mat.coeff(row, col); @@ -78,19 +80,21 @@ struct any_unroller template inline bool DenseBase::all() const { + typedef typename internal::evaluator::type Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && CoeffReadCost != Dynamic + && Evaluator::CoeffReadCost != Dynamic && NumTraits::AddCost != Dynamic - && SizeAtCompileTime * (CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT + && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; + Evaluator evaluator(derived()); if(unroll) - return internal::all_unroller::run(derived()); + return internal::all_unroller::run(evaluator); else { for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if (!coeff(i, j)) return false; + if (!evaluator.coeff(i, j)) return false; return true; } } @@ -102,19 +106,21 @@ inline bool DenseBase::all() const template inline bool DenseBase::any() const { + typedef typename internal::evaluator::type Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && CoeffReadCost != Dynamic + && Evaluator::CoeffReadCost != Dynamic && NumTraits::AddCost != Dynamic - && SizeAtCompileTime * (CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT + && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; + Evaluator evaluator(derived()); if(unroll) - return internal::any_unroller::run(derived()); + return internal::any_unroller::run(evaluator); else { for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if (coeff(i, j)) return true; + if (evaluator.coeff(i, j)) return true; return false; } } diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 3568cb85f..1c7123b85 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2011 Benoit Jacob -// Copyright (C) 2011 Gael Guennebaud +// Copyright (C) 2011-2014 Gael Guennebaud // Copyright (C) 2011-2012 Jitse Niesen // // This Source Code Form is subject to the terms of the Mozilla @@ -14,56 +14,84 @@ #define EIGEN_COREEVALUATORS_H namespace Eigen { - + namespace internal { -// evaluator_traits contains traits for evaluator_impl +// This class returns the evaluator kind from the expression storage kind. +// Default assumes index based accessors +template +struct storage_kind_to_evaluator_kind { + typedef IndexBased Kind; +}; + +// This class returns the evaluator shape from the expression storage kind. +// It can be Dense, Sparse, Triangular, Diagonal, SelfAdjoint, Band, etc. +template struct storage_kind_to_shape; + + +template<> struct storage_kind_to_shape { typedef DenseShape Shape; }; + + +// FIXME Is this necessary? And why was it not before refactoring??? +template<> struct storage_kind_to_shape { typedef PermutationShape Shape; }; + + +// Evaluators have to be specialized with respect to various criteria such as: +// - storage/structure/shape +// - scalar type +// - etc. +// Therefore, we need specialization of evaluator providing additional template arguments for each kind of evaluators. +// We currently distinguish the following kind of evaluators: +// - unary_evaluator for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate) +// - binary_evaluator for expression taking two arguments (CwiseBinaryOp) +// - product_evaluator for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching. +// - mapbase_evaluator for Map, Block, Ref +// - block_evaluator for Block (special dispatching to a mapbase_evaluator or unary_evaluator) + +template< typename T, + typename LhsKind = typename evaluator_traits::Kind, + typename RhsKind = typename evaluator_traits::Kind, + typename LhsScalar = typename traits::Scalar, + typename RhsScalar = typename traits::Scalar> struct binary_evaluator; + +template< typename T, + typename Kind = typename evaluator_traits::Kind, + typename Scalar = typename T::Scalar> struct unary_evaluator; + +// evaluator_traits contains traits for evaluator template -struct evaluator_traits +struct evaluator_traits_base { - // 1 if evaluator_impl::evalTo() exists - // 0 if evaluator_impl allows coefficient-based access - static const int HasEvalTo = 0; - + // TODO check whether these two indirections are really needed. + // Basically, if nobody overwrite type and nestedType, then, they can be dropped +// typedef evaluator type; +// typedef evaluator nestedType; + + // by default, get evaluator kind and shape from storage + typedef typename storage_kind_to_evaluator_kind::StorageKind>::Kind Kind; + typedef typename storage_kind_to_shape::StorageKind>::Shape Shape; + // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a // temporary; 0 if not. static const int AssumeAliasing = 0; }; -// expression class for evaluating nested expression to a temporary - -template -class EvalToTemp; - -// evaluator::type is type of evaluator for T -// evaluator::nestedType is type of evaluator if T is nested inside another evaluator - +// Default evaluator traits template -struct evaluator_impl -{ }; - -template::HasEvalTo> -struct evaluator_nested_type; - -template -struct evaluator_nested_type +struct evaluator_traits : public evaluator_traits_base { - typedef evaluator_impl type; }; + +// By default, we assume a unary expression: template -struct evaluator_nested_type +struct evaluator : public unary_evaluator { - typedef evaluator_impl > type; + typedef unary_evaluator Base; + EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {} }; -template -struct evaluator -{ - typedef evaluator_impl type; - typedef typename evaluator_nested_type::type nestedType; -}; // TODO: Think about const-correctness @@ -76,47 +104,59 @@ struct evaluator // TODO this class does not seem to be necessary anymore template -struct evaluator_impl_base +struct evaluator_base { - typedef typename ExpressionType::Index Index; +// typedef typename evaluator_traits::type type; +// typedef typename evaluator_traits::nestedType nestedType; + typedef evaluator type; + typedef evaluator nestedType; + + typedef typename traits::Index Index; // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. typedef traits ExpressionTraits; - - evaluator_impl& derived() - { - return *static_cast*>(this); - } }; // -------------------- Matrix and Array -------------------- // -// evaluator_impl is a common base class for the +// evaluator is a common base class for the // Matrix and Array evaluators. +// Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense, +// so no need for more sophisticated dispatching. template -struct evaluator_impl > - : evaluator_impl_base +struct evaluator > + : evaluator_base { typedef PlainObjectBase PlainObjectType; - - enum { - IsRowMajor = PlainObjectType::IsRowMajor, - IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime, - RowsAtCompileTime = PlainObjectType::RowsAtCompileTime, - ColsAtCompileTime = PlainObjectType::ColsAtCompileTime - }; - - evaluator_impl(const PlainObjectType& m) - : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) - { } - typedef typename PlainObjectType::Index Index; typedef typename PlainObjectType::Scalar Scalar; typedef typename PlainObjectType::CoeffReturnType CoeffReturnType; typedef typename PlainObjectType::PacketScalar PacketScalar; typedef typename PlainObjectType::PacketReturnType PacketReturnType; - CoeffReturnType coeff(Index row, Index col) const + enum { + IsRowMajor = PlainObjectType::IsRowMajor, + IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime, + RowsAtCompileTime = PlainObjectType::RowsAtCompileTime, + ColsAtCompileTime = PlainObjectType::ColsAtCompileTime, + + CoeffReadCost = NumTraits::ReadCost, + Flags = compute_matrix_evaluator_flags< Scalar,Derived::RowsAtCompileTime,Derived::ColsAtCompileTime, + Derived::Options,Derived::MaxRowsAtCompileTime,Derived::MaxColsAtCompileTime>::ret + }; + + EIGEN_DEVICE_FUNC evaluator() + : m_data(0), + m_outerStride(IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime) + {} + + EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) + : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) + { } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { if (IsRowMajor) return m_data[row * m_outerStride.value() + col]; @@ -124,12 +164,12 @@ struct evaluator_impl > return m_data[row + col * m_outerStride.value()]; } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_data[index]; } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { if (IsRowMajor) return const_cast(m_data)[row * m_outerStride.value() + col]; @@ -137,7 +177,7 @@ struct evaluator_impl > return const_cast(m_data)[row + col * m_outerStride.value()]; } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return const_cast(m_data)[index]; } @@ -184,153 +224,45 @@ protected: }; template -struct evaluator_impl > - : evaluator_impl > > +struct evaluator > + : evaluator > > { typedef Matrix XprType; + + evaluator() {} - evaluator_impl(const XprType& m) - : evaluator_impl >(m) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) + : evaluator >(m) { } }; template -struct evaluator_impl > - : evaluator_impl > > +struct evaluator > + : evaluator > > { typedef Array XprType; - evaluator_impl(const XprType& m) - : evaluator_impl >(m) + evaluator() {} + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) + : evaluator >(m) { } }; -// -------------------- EvalToTemp -------------------- - -template -struct traits > - : public traits -{ }; - -template -class EvalToTemp - : public dense_xpr_base >::type -{ - public: - - typedef typename dense_xpr_base::type Base; - EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp) - - EvalToTemp(const ArgType& arg) - : m_arg(arg) - { } - - const ArgType& arg() const - { - return m_arg; - } - - Index rows() const - { - return m_arg.rows(); - } - - Index cols() const - { - return m_arg.cols(); - } - - private: - const ArgType& m_arg; -}; - -template -struct evaluator_impl > -{ - typedef EvalToTemp XprType; - typedef typename ArgType::PlainObject PlainObject; - - evaluator_impl(const XprType& xpr) - : m_result(xpr.rows(), xpr.cols()), m_resultImpl(m_result) - { - // TODO we should simply do m_result(xpr.arg()); - call_dense_assignment_loop(m_result, xpr.arg()); - } - - // This constructor is used when nesting an EvalTo evaluator in another evaluator - evaluator_impl(const ArgType& arg) - : m_result(arg.rows(), arg.cols()), m_resultImpl(m_result) - { - // TODO we should simply do m_result(xpr.arg()); - call_dense_assignment_loop(m_result, arg); - } - - typedef typename PlainObject::Index Index; - typedef typename PlainObject::Scalar Scalar; - typedef typename PlainObject::CoeffReturnType CoeffReturnType; - typedef typename PlainObject::PacketScalar PacketScalar; - typedef typename PlainObject::PacketReturnType PacketReturnType; - - // All other functions are forwarded to m_resultImpl - - CoeffReturnType coeff(Index row, Index col) const - { - return m_resultImpl.coeff(row, col); - } - - CoeffReturnType coeff(Index index) const - { - return m_resultImpl.coeff(index); - } - - Scalar& coeffRef(Index row, Index col) - { - return m_resultImpl.coeffRef(row, col); - } - - Scalar& coeffRef(Index index) - { - return m_resultImpl.coeffRef(index); - } - - template - PacketReturnType packet(Index row, Index col) const - { - return m_resultImpl.template packet(row, col); - } - - template - PacketReturnType packet(Index index) const - { - return m_resultImpl.packet(index); - } - - template - void writePacket(Index row, Index col, const PacketScalar& x) - { - m_resultImpl.template writePacket(row, col, x); - } - - template - void writePacket(Index index, const PacketScalar& x) - { - m_resultImpl.template writePacket(index, x); - } - -protected: - PlainObject m_result; - typename evaluator::nestedType m_resultImpl; -}; - // -------------------- Transpose -------------------- template -struct evaluator_impl > - : evaluator_impl_base > +struct unary_evaluator, IndexBased> + : evaluator_base > { typedef Transpose XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + Flags = evaluator::Flags ^ RowMajorBit + }; - evaluator_impl(const XprType& t) : m_argImpl(t.nestedExpression()) {} + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; @@ -338,22 +270,22 @@ struct evaluator_impl > typedef typename XprType::PacketScalar PacketScalar; typedef typename XprType::PacketReturnType PacketReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(col, row); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(col, row); } - typename XprType::Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC typename XprType::Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } @@ -387,13 +319,27 @@ protected: }; // -------------------- CwiseNullaryOp -------------------- +// Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator. +// Likewise, there is not need to more sophisticated dispatching here. template -struct evaluator_impl > +struct evaluator > + : evaluator_base > { typedef CwiseNullaryOp XprType; + typedef typename internal::remove_all::type PlainObjectTypeCleaned; + + enum { + CoeffReadCost = internal::functor_traits::Cost, + + Flags = (evaluator::Flags + & ( HereditaryBits + | (functor_has_linear_access::ret ? LinearAccessBit : 0) + | (functor_traits::PacketAccess ? PacketAccessBit : 0))) + | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit) // FIXME EvalBeforeNestingBit should be needed anymore + }; - evaluator_impl(const XprType& n) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) : m_functor(n.functor()) { } @@ -401,12 +347,12 @@ struct evaluator_impl > typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_functor(row, col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(index); } @@ -430,11 +376,20 @@ protected: // -------------------- CwiseUnaryOp -------------------- template -struct evaluator_impl > +struct unary_evaluator, IndexBased > + : evaluator_base > { typedef CwiseUnaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost, + + Flags = evaluator::Flags & ( + HereditaryBits | LinearAccessBit | AlignedBit + | (functor_traits::PacketAccess ? PacketAccessBit : 0)) + }; - evaluator_impl(const XprType& op) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) { } @@ -443,12 +398,12 @@ struct evaluator_impl > typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_functor(m_argImpl.coeff(row, col)); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(m_argImpl.coeff(index)); } @@ -472,12 +427,43 @@ protected: // -------------------- CwiseBinaryOp -------------------- +// this is a binary expression template -struct evaluator_impl > +struct evaluator > + : public binary_evaluator > { typedef CwiseBinaryOp XprType; + typedef binary_evaluator > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; - evaluator_impl(const XprType& xpr) +template +struct binary_evaluator, IndexBased, IndexBased> + : evaluator_base > +{ + typedef CwiseBinaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, + + LhsFlags = evaluator::Flags, + RhsFlags = evaluator::Flags, + SameType = is_same::value, + StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit), + Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( + HereditaryBits + | (int(LhsFlags) & int(RhsFlags) & + ( AlignedBit + | (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit) + }; + + EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) @@ -487,12 +473,12 @@ struct evaluator_impl > typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); } @@ -501,14 +487,14 @@ struct evaluator_impl > PacketScalar packet(Index row, Index col) const { return m_functor.packetOp(m_lhsImpl.template packet(row, col), - m_rhsImpl.template packet(row, col)); + m_rhsImpl.template packet(row, col)); } template PacketScalar packet(Index index) const { return m_functor.packetOp(m_lhsImpl.template packet(index), - m_rhsImpl.template packet(index)); + m_rhsImpl.template packet(index)); } protected: @@ -520,12 +506,18 @@ protected: // -------------------- CwiseUnaryView -------------------- template -struct evaluator_impl > - : evaluator_impl_base > +struct unary_evaluator, IndexBased> + : evaluator_base > { typedef CwiseUnaryView XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost, + + Flags = (evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)) + }; - evaluator_impl(const XprType& op) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_unaryOp(op.functor()), m_argImpl(op.nestedExpression()) { } @@ -534,22 +526,22 @@ struct evaluator_impl > typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_unaryOp(m_argImpl.coeff(row, col)); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_unaryOp(m_argImpl.coeff(index)); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_unaryOp(m_argImpl.coeffRef(row, col)); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_unaryOp(m_argImpl.coeffRef(index)); } @@ -561,13 +553,15 @@ protected: // -------------------- Map -------------------- -template -struct evaluator_impl > - : evaluator_impl_base -{ - typedef MapBase MapType; - typedef Derived XprType; +// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ? +// but that might complicate template specialization +template +struct mapbase_evaluator; +template +struct mapbase_evaluator : evaluator_base +{ + typedef Derived XprType; typedef typename XprType::PointerType PointerType; typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; @@ -575,81 +569,121 @@ struct evaluator_impl > typedef typename XprType::PacketScalar PacketScalar; typedef typename XprType::PacketReturnType PacketReturnType; - evaluator_impl(const XprType& map) - : m_data(const_cast(map.data())), - m_rowStride(map.rowStride()), - m_colStride(map.colStride()) - { } - enum { - RowsAtCompileTime = XprType::RowsAtCompileTime + IsRowMajor = XprType::RowsAtCompileTime, + ColsAtCompileTime = XprType::ColsAtCompileTime, + CoeffReadCost = NumTraits::ReadCost }; + + EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map) + : m_data(const_cast(map.data())), + m_xpr(map) + { + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator::Flags&PacketAccessBit, internal::inner_stride_at_compile_time::ret==1), + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); + } - CoeffReturnType coeff(Index row, Index col) const - { - return m_data[col * m_colStride + row * m_rowStride]; + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; } - CoeffReturnType coeff(Index index) const - { - return coeff(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_data[index * m_xpr.innerStride()]; } - Scalar& coeffRef(Index row, Index col) - { - return m_data[col * m_colStride + row * m_rowStride]; + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; } - Scalar& coeffRef(Index index) - { - return coeffRef(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return m_data[index * m_xpr.innerStride()]; } template PacketReturnType packet(Index row, Index col) const - { - PointerType ptr = m_data + row * m_rowStride + col * m_colStride; + { + PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); return internal::ploadt(ptr); } template PacketReturnType packet(Index index) const - { - return packet(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + { + return internal::ploadt(m_data + index * m_xpr.innerStride()); } template void writePacket(Index row, Index col, const PacketScalar& x) - { - PointerType ptr = m_data + row * m_rowStride + col * m_colStride; + { + PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); return internal::pstoret(ptr, x); } template void writePacket(Index index, const PacketScalar& x) - { - return writePacket(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0, - x); + { + internal::pstoret(m_data + index * m_xpr.innerStride(), x); } protected: PointerType m_data; - int m_rowStride; - int m_colStride; + const XprType& m_xpr; }; template -struct evaluator_impl > - : public evaluator_impl > > +struct evaluator > + : public mapbase_evaluator, PlainObjectType> { typedef Map XprType; + typedef typename XprType::Scalar Scalar; + + enum { + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? int(PlainObjectType::OuterStrideAtCompileTime) + : int(StrideType::OuterStrideAtCompileTime), + HasNoInnerStride = InnerStrideAtCompileTime == 1, + HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, + HasNoStride = HasNoInnerStride && HasNoOuterStride, + IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), + IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, + KeepsPacketAccess = bool(HasNoInnerStride) + && ( bool(IsDynamicSize) + || HasNoOuterStride + || ( OuterStrideAtCompileTime!=Dynamic + && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ), + Flags0 = evaluator::Flags, + Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), + Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) + ? int(Flags1) : int(Flags1 & ~LinearAccessBit), + Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit) + }; - evaluator_impl(const XprType& map) - : evaluator_impl >(map) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map) + : mapbase_evaluator(map) + { } +}; + +// -------------------- Ref -------------------- + +template +struct evaluator > + : public mapbase_evaluator, PlainObjectType> +{ + typedef Ref XprType; + + enum { + Flags = evaluator >::Flags + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref) + : mapbase_evaluator(ref) { } }; @@ -659,21 +693,68 @@ template::ret> struct block_evaluator; template -struct evaluator_impl > +struct evaluator > : block_evaluator { typedef Block XprType; + typedef typename XprType::Scalar Scalar; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime, + + ArgTypeIsRowMajor = (int(evaluator::Flags)&RowMajorBit) != 0, + IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0 + : ArgTypeIsRowMajor, + HasSameStorageOrderAsArgType = (IsRowMajor == ArgTypeIsRowMajor), + InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + InnerStrideAtCompileTime = HasSameStorageOrderAsArgType + ? int(inner_stride_at_compile_time::ret) + : int(outer_stride_at_compile_time::ret), + OuterStrideAtCompileTime = HasSameStorageOrderAsArgType + ? int(outer_stride_at_compile_time::ret) + : int(inner_stride_at_compile_time::ret), + MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits::size) == 0) + && (InnerStrideAtCompileTime == 1) + ? PacketAccessBit : 0, + + MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0, + FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator::Flags&LinearAccessBit))) ? LinearAccessBit : 0, + FlagsRowMajorBit = XprType::Flags&RowMajorBit, + Flags0 = evaluator::Flags & ( (HereditaryBits & ~RowMajorBit) | + DirectAccessBit | + MaskPacketAccessBit | + MaskAlignedBit), + Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit + }; typedef block_evaluator block_evaluator_type; - evaluator_impl(const XprType& block) : block_evaluator_type(block) {} + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) {} }; +// no direct-access => dispatch to a unary evaluator template struct block_evaluator - : evaluator_impl_base > + : unary_evaluator > { typedef Block XprType; - block_evaluator(const XprType& block) + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) + : unary_evaluator(block) + {} +}; + +template +struct unary_evaluator, IndexBased> + : evaluator_base > +{ + typedef Block XprType; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) : m_argImpl(block.nestedExpression()), m_startRow(block.startRow()), m_startCol(block.startCol()) @@ -689,26 +770,24 @@ struct block_evaluator @@ -721,7 +800,7 @@ struct block_evaluator(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + RowsAtCompileTime == 1 ? index : 0); } template @@ -734,8 +813,8 @@ struct block_evaluator(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0, - x); + RowsAtCompileTime == 1 ? index : 0, + x); } protected: @@ -749,24 +828,38 @@ protected: template struct block_evaluator - : evaluator_impl > > + : mapbase_evaluator, + typename Block::PlainObject> { typedef Block XprType; - block_evaluator(const XprType& block) - : evaluator_impl >(block) - { } + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) + : mapbase_evaluator(block) + { + // FIXME this should be an internal assertion + eigen_assert(EIGEN_IMPLIES(evaluator::Flags&AlignedBit, (size_t(block.data()) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned"); + } }; // -------------------- Select -------------------- +// TODO shall we introduce a ternary_evaluator? +// TODO enable vectorization for Select template -struct evaluator_impl > +struct evaluator > + : evaluator_base > { typedef Select XprType; + enum { + CoeffReadCost = evaluator::CoeffReadCost + + EIGEN_SIZE_MAX(evaluator::CoeffReadCost, + evaluator::CoeffReadCost), - evaluator_impl(const XprType& select) + Flags = (unsigned int)evaluator::Flags & evaluator::Flags & HereditaryBits + }; + + inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) : m_conditionImpl(select.conditionMatrix()), m_thenImpl(select.thenMatrix()), m_elseImpl(select.elseMatrix()) @@ -775,7 +868,7 @@ struct evaluator_impl -struct evaluator_impl > +struct unary_evaluator > + : evaluator_base > { typedef Replicate XprType; - - evaluator_impl(const XprType& replicate) - : m_argImpl(replicate.nestedExpression()), - m_rows(replicate.nestedExpression().rows()), - m_cols(replicate.nestedExpression().cols()) - { } - typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketReturnType PacketReturnType; + enum { + Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor + }; + typedef typename internal::nested_eval::type ArgTypeNested; + typedef typename internal::remove_all::type ArgTypeNestedCleaned; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + + Flags = (evaluator::Flags & HereditaryBits & ~RowMajorBit) | (traits::Flags & RowMajorBit) + }; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate) + : m_arg(replicate.nestedExpression()), + m_argImpl(m_arg), + m_rows(replicate.nestedExpression().rows()), + m_cols(replicate.nestedExpression().cols()) + {} + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { // try to avoid using modulo; this is a pure optimization strategy const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 @@ -842,9 +947,10 @@ struct evaluator_impl > } protected: - typename evaluator::nestedType m_argImpl; - const variable_if_dynamic m_rows; - const variable_if_dynamic m_cols; + const ArgTypeNested m_arg; // FIXME is it OK to store both the argument and its evaluator?? (we have the same situation in evaluator_product) + typename evaluator::nestedType m_argImpl; + const variable_if_dynamic m_rows; + const variable_if_dynamic m_cols; }; @@ -855,23 +961,35 @@ protected: // the row() and col() member functions. template< typename ArgType, typename MemberOp, int Direction> -struct evaluator_impl > +struct evaluator > + : evaluator_base > { typedef PartialReduxExpr XprType; + typedef typename XprType::Scalar InputScalar; + enum { + TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(XprType::ColsAtCompileTime) + }; + typedef typename MemberOp::template Cost CostOpType; + enum { + CoeffReadCost = TraversalSize==Dynamic ? Dynamic + : TraversalSize * evaluator::CoeffReadCost + int(CostOpType::value), + + Flags = (traits::Flags&RowMajorBit) | (evaluator::Flags&HereditaryBits) + }; - evaluator_impl(const XprType expr) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType expr) : m_expr(expr) - { } + {} typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_expr.coeff(row, col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_expr.coeff(index); } @@ -883,16 +1001,20 @@ protected: // -------------------- MatrixWrapper and ArrayWrapper -------------------- // -// evaluator_impl_wrapper_base is a common base class for the +// evaluator_wrapper_base is a common base class for the // MatrixWrapper and ArrayWrapper evaluators. template -struct evaluator_impl_wrapper_base - : evaluator_impl_base +struct evaluator_wrapper_base + : evaluator_base { typedef typename remove_all::type ArgType; + enum { + CoeffReadCost = evaluator::CoeffReadCost, + Flags = evaluator::Flags + }; - evaluator_impl_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} + EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} typedef typename ArgType::Index Index; typedef typename ArgType::Scalar Scalar; @@ -900,22 +1022,22 @@ struct evaluator_impl_wrapper_base typedef typename ArgType::PacketScalar PacketScalar; typedef typename ArgType::PacketReturnType PacketReturnType; - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(row, col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(row, col); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } @@ -949,24 +1071,24 @@ protected: }; template -struct evaluator_impl > - : evaluator_impl_wrapper_base > +struct unary_evaluator > + : evaluator_wrapper_base > { typedef MatrixWrapper XprType; - evaluator_impl(const XprType& wrapper) - : evaluator_impl_wrapper_base >(wrapper.nestedExpression()) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base >(wrapper.nestedExpression()) { } }; template -struct evaluator_impl > - : evaluator_impl_wrapper_base > +struct unary_evaluator > + : evaluator_wrapper_base > { typedef ArrayWrapper XprType; - evaluator_impl(const XprType& wrapper) - : evaluator_impl_wrapper_base >(wrapper.nestedExpression()) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base >(wrapper.nestedExpression()) { } }; @@ -977,8 +1099,8 @@ struct evaluator_impl > template struct reverse_packet_cond; template -struct evaluator_impl > - : evaluator_impl_base > +struct unary_evaluator > + : evaluator_base > { typedef Reverse XprType; typedef typename XprType::Index Index; @@ -997,34 +1119,44 @@ struct evaluator_impl > OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1, ReversePacket = (Direction == BothDirections) || ((Direction == Vertical) && IsColMajor) - || ((Direction == Horizontal) && IsRowMajor) + || ((Direction == Horizontal) && IsRowMajor), + + CoeffReadCost = evaluator::CoeffReadCost, + + // let's enable LinearAccess only with vectorization because of the product overhead + // FIXME enable DirectAccess with negative strides? + Flags0 = evaluator::Flags, + LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) ) + ? LinearAccessBit : 0, + + Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess) }; typedef internal::reverse_packet_cond reverse_packet; - evaluator_impl(const XprType& reverse) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse) : m_argImpl(reverse.nestedExpression()), m_rows(ReverseRow ? reverse.nestedExpression().rows() : 0), m_cols(ReverseCol ? reverse.nestedExpression().cols() : 0) { } - CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row, - ReverseCol ? m_cols.value() - col - 1 : col); + ReverseCol ? m_cols.value() - col - 1 : col); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1); } - Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row, - ReverseCol ? m_cols.value() - col - 1 : col); + ReverseCol ? m_cols.value() - col - 1 : col); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1); } @@ -1071,36 +1203,44 @@ protected: // -------------------- Diagonal -------------------- template -struct evaluator_impl > - : evaluator_impl_base > +struct evaluator > + : evaluator_base > { typedef Diagonal XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + + Flags = (unsigned int)evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit + }; - evaluator_impl(const XprType& diagonal) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal) : m_argImpl(diagonal.nestedExpression()), m_index(diagonal.index()) { } typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; + // FIXME having to check whether ArgType is sparse here i not very nice. + typedef typename internal::conditional::value, + typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType; - CoeffReturnType coeff(Index row, Index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index) const { return m_argImpl.coeff(row + rowOffset(), row + colOffset()); } - CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index + rowOffset(), index + colOffset()); } - Scalar& coeffRef(Index row, Index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index) { return m_argImpl.coeffRef(row + rowOffset(), row + colOffset()); } - Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index + rowOffset(), index + colOffset()); } @@ -1110,8 +1250,88 @@ protected: const internal::variable_if_dynamicindex m_index; private: - EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } - EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } +}; + + +//---------------------------------------------------------------------- +// deprecated code +//---------------------------------------------------------------------- + +// -------------------- EvalToTemp -------------------- + +// expression class for evaluating nested expression to a temporary + +template class EvalToTemp; + +template +struct traits > + : public traits +{ }; + +template +class EvalToTemp + : public dense_xpr_base >::type +{ + public: + + typedef typename dense_xpr_base::type Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp) + + explicit EvalToTemp(const ArgType& arg) + : m_arg(arg) + { } + + const ArgType& arg() const + { + return m_arg; + } + + Index rows() const + { + return m_arg.rows(); + } + + Index cols() const + { + return m_arg.cols(); + } + + private: + const ArgType& m_arg; +}; + +template +struct evaluator > + : public evaluator::type +{ + typedef EvalToTemp XprType; + typedef typename ArgType::PlainObject PlainObject; + typedef typename evaluator::type Base; + + typedef evaluator type; + typedef evaluator nestedType; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + // TODO we should simply do m_result(xpr.arg()); + call_dense_assignment_loop(m_result, xpr.arg()); + } + + // This constructor is used when nesting an EvalTo evaluator in another evaluator + EIGEN_DEVICE_FUNC evaluator(const ArgType& arg) + : m_result(arg.rows(), arg.cols()) + { + ::new (static_cast(this)) Base(m_result); + // TODO we should simply do m_result(xpr.arg()); + call_dense_assignment_loop(m_result, arg); + } + +protected: + PlainObject m_result; }; } // namespace internal diff --git a/Eigen/src/Core/CoreIterators.h b/Eigen/src/Core/CoreIterators.h index 6da4683d2..7feebc4e4 100644 --- a/Eigen/src/Core/CoreIterators.h +++ b/Eigen/src/Core/CoreIterators.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2008-2014 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -15,47 +15,116 @@ namespace Eigen { /* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core */ -/** \ingroup SparseCore_Module - * \class InnerIterator - * \brief An InnerIterator allows to loop over the element of a sparse (or dense) matrix or expression - * - * todo +namespace internal { + +template +class inner_iterator_selector; + +} + +/** \class InnerIterator + * \brief An InnerIterator allows to loop over the element of any matrix expression. + * + * \warning To be used with care because an evaluator is constructed every time an InnerIterator iterator is constructed. + * + * TODO: add a usage example */ - -// generic version for dense matrix and expressions -template class DenseBase::InnerIterator +template +class InnerIterator { - protected: - typedef typename Derived::Scalar Scalar; - typedef typename Derived::Index Index; - - enum { IsRowMajor = (Derived::Flags&RowMajorBit)==RowMajorBit }; - public: - EIGEN_STRONG_INLINE InnerIterator(const Derived& expr, Index outer) - : m_expression(expr), m_inner(0), m_outer(outer), m_end(expr.innerSize()) - {} - - EIGEN_STRONG_INLINE Scalar value() const - { - return (IsRowMajor) ? m_expression.coeff(m_outer, m_inner) - : m_expression.coeff(m_inner, m_outer); - } - - EIGEN_STRONG_INLINE InnerIterator& operator++() { m_inner++; return *this; } - - EIGEN_STRONG_INLINE Index index() const { return m_inner; } - inline Index row() const { return IsRowMajor ? m_outer : index(); } - inline Index col() const { return IsRowMajor ? index() : m_outer; } - - EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; } - - protected: - const Derived& m_expression; - Index m_inner; - const Index m_outer; - const Index m_end; +protected: + typedef internal::inner_iterator_selector::Kind> IteratorType; + typedef typename internal::evaluator::type EvaluatorType; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::Index Index; +public: + /** Construct an iterator over the \a outerId -th row or column of \a xpr */ + InnerIterator(const XprType &xpr, const Index &outerId) + : m_eval(xpr), m_iter(m_eval, outerId, xpr.innerSize()) + {} + + /// \returns the value of the current coefficient. + EIGEN_STRONG_INLINE Scalar value() const { return m_iter.value(); } + /** Increment the iterator \c *this to the next non-zero coefficient. + * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView + */ + EIGEN_STRONG_INLINE InnerIterator& operator++() { m_iter.operator++(); return *this; } + /// \returns the column or row index of the current coefficient. + EIGEN_STRONG_INLINE Index index() const { return m_iter.index(); } + /// \returns the row index of the current coefficient. + EIGEN_STRONG_INLINE Index row() const { return m_iter.row(); } + /// \returns the column index of the current coefficient. + EIGEN_STRONG_INLINE Index col() const { return m_iter.col(); } + /// \returns \c true if the iterator \c *this still references a valid coefficient. + EIGEN_STRONG_INLINE operator bool() const { return m_iter; } + +protected: + EvaluatorType m_eval; + IteratorType m_iter; +private: + // If you get here, then you're not using the right InnerIterator type, e.g.: + // SparseMatrix A; + // SparseMatrix::InnerIterator it(A,0); + template InnerIterator(const EigenBase&,Index outer); }; +namespace internal { + +// Generic inner iterator implementation for dense objects +template +class inner_iterator_selector +{ +protected: + typedef typename evaluator::type EvaluatorType; + typedef typename traits::Scalar Scalar; + typedef typename traits::Index Index; + enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit }; + +public: + EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &innerSize) + : m_eval(eval), m_inner(0), m_outer(outerId), m_end(innerSize) + {} + + EIGEN_STRONG_INLINE Scalar value() const + { + return (IsRowMajor) ? m_eval.coeff(m_outer, m_inner) + : m_eval.coeff(m_inner, m_outer); + } + + EIGEN_STRONG_INLINE inner_iterator_selector& operator++() { m_inner++; return *this; } + + EIGEN_STRONG_INLINE Index index() const { return m_inner; } + inline Index row() const { return IsRowMajor ? m_outer : index(); } + inline Index col() const { return IsRowMajor ? index() : m_outer; } + + EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; } + +protected: + const EvaluatorType& m_eval; + Index m_inner; + const Index m_outer; + const Index m_end; +}; + +// For iterator-based evaluator, inner-iterator is already implemented as +// evaluator<>::InnerIterator +template +class inner_iterator_selector + : public evaluator::InnerIterator +{ +protected: + typedef typename evaluator::InnerIterator Base; + typedef typename evaluator::type EvaluatorType; + typedef typename traits::Index Index; + +public: + EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &/*innerSize*/) + : Base(eval, outerId) + {} +}; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_COREITERATORS_H diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index e20daacc8..a205c3f10 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2008-2014 Gael Guennebaud // Copyright (C) 2006-2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -56,8 +56,9 @@ struct traits > typename Rhs::Scalar ) >::type Scalar; - typedef typename promote_storage_type::StorageKind, - typename traits::StorageKind>::ret StorageKind; + typedef typename cwise_promote_storage_type::StorageKind, + typename traits::StorageKind, + BinaryOp>::ret StorageKind; typedef typename promote_index_type::Index, typename traits::Index>::type Index; typedef typename Lhs::Nested LhsNested; @@ -65,60 +66,37 @@ struct traits > typedef typename remove_reference::type _LhsNested; typedef typename remove_reference::type _RhsNested; enum { - LhsCoeffReadCost = _LhsNested::CoeffReadCost, - RhsCoeffReadCost = _RhsNested::CoeffReadCost, - LhsFlags = _LhsNested::Flags, - RhsFlags = _RhsNested::Flags, - SameType = is_same::value, - StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit), - Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( - HereditaryBits - | (int(LhsFlags) & int(RhsFlags) & - ( AlignedBit - | (StorageOrdersAgree ? LinearAccessBit : 0) - | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) - ) - ) - ), - Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), - CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits::Cost + Flags = _LhsNested::Flags & RowMajorBit }; }; } // end namespace internal -// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor -// that would take two operands of different types. If there were such an example, then this check should be -// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as -// currently they take only one typename Scalar template parameter. -// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths. -// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to -// add together a float matrix and a double matrix. -#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \ - EIGEN_STATIC_ASSERT((internal::functor_is_product_like::ret \ - ? int(internal::scalar_product_traits::Defined) \ - : int(internal::is_same::value)), \ - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - template class CwiseBinaryOpImpl; -template -class CwiseBinaryOp : internal::no_assignment_operator, +template +class CwiseBinaryOp : public CwiseBinaryOpImpl< - BinaryOp, Lhs, Rhs, - typename internal::promote_storage_type::StorageKind, - typename internal::traits::StorageKind>::ret> + BinaryOp, LhsType, RhsType, + typename internal::cwise_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + BinaryOp>::ret>, + internal::no_assignment_operator { public: + + typedef typename internal::remove_all::type Lhs; + typedef typename internal::remove_all::type Rhs; typedef typename CwiseBinaryOpImpl< - BinaryOp, Lhs, Rhs, - typename internal::promote_storage_type::StorageKind, - typename internal::traits::StorageKind>::ret>::Base Base; + BinaryOp, LhsType, RhsType, + typename internal::cwise_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + BinaryOp>::ret>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp) - typedef typename internal::nested::type LhsNested; - typedef typename internal::nested::type RhsNested; + typedef typename internal::nested::type LhsNested; + typedef typename internal::nested::type RhsNested; typedef typename internal::remove_reference::type _LhsNested; typedef typename internal::remove_reference::type _RhsNested; @@ -165,43 +143,13 @@ class CwiseBinaryOp : internal::no_assignment_operator, const BinaryOp m_functor; }; -template -class CwiseBinaryOpImpl - : public internal::dense_xpr_base >::type +// Generic API dispatcher +template +class CwiseBinaryOpImpl + : public internal::generic_xpr_base >::type { - typedef CwiseBinaryOp Derived; - public: - - typedef typename internal::dense_xpr_base >::type Base; - EIGEN_DENSE_PUBLIC_INTERFACE( Derived ) - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const - { - return derived().functor()(derived().lhs().coeff(rowId, colId), - derived().rhs().coeff(rowId, colId)); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const - { - return derived().functor().packetOp(derived().lhs().template packet(rowId, colId), - derived().rhs().template packet(rowId, colId)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - return derived().functor()(derived().lhs().coeff(index), - derived().rhs().coeff(index)); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index index) const - { - return derived().functor().packetOp(derived().lhs().template packet(index), - derived().rhs().template packet(index)); - } +public: + typedef typename internal::generic_xpr_base >::type Base; }; /** replaces \c *this by \c *this - \a other. @@ -213,8 +161,7 @@ template EIGEN_STRONG_INLINE Derived & MatrixBase::operator-=(const MatrixBase &other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -227,8 +174,7 @@ template EIGEN_STRONG_INLINE Derived & MatrixBase::operator+=(const MatrixBase& other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 124383114..05c4fedd0 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -35,19 +35,13 @@ template struct traits > : traits { enum { - Flags = (traits::Flags - & ( HereditaryBits - | (functor_has_linear_access::ret ? LinearAccessBit : 0) - | (functor_traits::PacketAccess ? PacketAccessBit : 0))) - | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), - CoeffReadCost = functor_traits::Cost + Flags = traits::Flags & RowMajorBit }; }; } template -class CwiseNullaryOp : internal::no_assignment_operator, - public internal::dense_xpr_base< CwiseNullaryOp >::type +class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp >::type, internal::no_assignment_operator { public: diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h index aa7df197f..da1d1992d 100644 --- a/Eigen/src/Core/CwiseUnaryOp.h +++ b/Eigen/src/Core/CwiseUnaryOp.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2008-2014 Gael Guennebaud // Copyright (C) 2006-2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -44,10 +44,7 @@ struct traits > typedef typename XprType::Nested XprTypeNested; typedef typename remove_reference::type _XprTypeNested; enum { - Flags = _XprTypeNested::Flags & ( - HereditaryBits | LinearAccessBit | AlignedBit - | (functor_traits::PacketAccess ? PacketAccessBit : 0)), - CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits::Cost + Flags = _XprTypeNested::Flags & RowMajorBit }; }; } @@ -56,16 +53,16 @@ template class CwiseUnaryOpImpl; template -class CwiseUnaryOp : internal::no_assignment_operator, - public CwiseUnaryOpImpl::StorageKind> +class CwiseUnaryOp : public CwiseUnaryOpImpl::StorageKind>, internal::no_assignment_operator { public: typedef typename CwiseUnaryOpImpl::StorageKind>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp) + typedef typename internal::remove_all::type NestedExpression; EIGEN_DEVICE_FUNC - inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) + explicit inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) : m_xpr(xpr), m_functor(func) {} EIGEN_DEVICE_FUNC @@ -92,42 +89,13 @@ class CwiseUnaryOp : internal::no_assignment_operator, const UnaryOp m_functor; }; -// This is the generic implementation for dense storage. -// It can be used for any expression types implementing the dense concept. -template -class CwiseUnaryOpImpl - : public internal::dense_xpr_base >::type +// Generic API dispatcher +template +class CwiseUnaryOpImpl + : public internal::generic_xpr_base >::type { - public: - - typedef CwiseUnaryOp Derived; - typedef typename internal::dense_xpr_base >::type Base; - EIGEN_DENSE_PUBLIC_INTERFACE(Derived) - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const - { - return derived().functor()(derived().nestedExpression().coeff(rowId, colId)); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const - { - return derived().functor().packetOp(derived().nestedExpression().template packet(rowId, colId)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - return derived().functor()(derived().nestedExpression().coeff(index)); - } - - template - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE PacketScalar packet(Index index) const - { - return derived().functor().packetOp(derived().nestedExpression().template packet(index)); - } +public: + typedef typename internal::generic_xpr_base >::type Base; }; } // end namespace Eigen diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h index b2638d326..6680f32dd 100644 --- a/Eigen/src/Core/CwiseUnaryView.h +++ b/Eigen/src/Core/CwiseUnaryView.h @@ -37,8 +37,8 @@ struct traits > typedef typename MatrixType::Nested MatrixTypeNested; typedef typename remove_all::type _MatrixTypeNested; enum { - Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)), - CoeffReadCost = traits<_MatrixTypeNested>::CoeffReadCost + functor_traits::Cost, + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions MatrixTypeInnerStride = inner_stride_at_compile_time::ret, // need to cast the sizeof's from size_t to int explicitly, otherwise: // "error: no integral type can represent all of the enumerator values @@ -62,8 +62,9 @@ class CwiseUnaryView : public CwiseUnaryViewImpl::StorageKind>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView) + typedef typename internal::remove_all::type NestedExpression; - inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp()) + explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp()) : m_matrix(mat), m_functor(func) {} EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView) @@ -88,6 +89,15 @@ class CwiseUnaryView : public CwiseUnaryViewImpl +class CwiseUnaryViewImpl + : public internal::generic_xpr_base >::type +{ +public: + typedef typename internal::generic_xpr_base >::type Base; +}; + template class CwiseUnaryViewImpl : public internal::dense_xpr_base< CwiseUnaryView >::type @@ -100,38 +110,18 @@ class CwiseUnaryViewImpl EIGEN_DENSE_PUBLIC_INTERFACE(Derived) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl) - inline Scalar* data() { return &coeffRef(0); } - inline const Scalar* data() const { return &coeff(0); } + EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); } - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride() * sizeof(typename internal::traits::Scalar) / sizeof(Scalar); } - inline Index outerStride() const + EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride() * sizeof(typename internal::traits::Scalar) / sizeof(Scalar); } - - EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const - { - return derived().functor()(derived().nestedExpression().coeff(row, col)); - } - - EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return derived().functor()(derived().nestedExpression().coeff(index)); - } - - EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) - { - return derived().functor()(const_cast_derived().nestedExpression().coeffRef(row, col)); - } - - EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - return derived().functor()(const_cast_derived().nestedExpression().coeffRef(index)); - } }; } // end namespace Eigen diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index bd5dd14ed..e81b58481 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -50,7 +50,11 @@ template class DenseBase using internal::special_scalar_op_base::Scalar, typename NumTraits::Scalar>::Real>::operator*; - class InnerIterator; + + /** Inner iterator type to iterate over the coefficients of a row or column. + * \sa class InnerIterator + */ + typedef Eigen::InnerIterator InnerIterator; typedef typename internal::traits::StorageKind StorageKind; @@ -74,16 +78,6 @@ template class DenseBase using Base::colIndexByOuterInner; using Base::coeff; using Base::coeffByOuterInner; - using Base::packet; - using Base::packetByOuterInner; - using Base::writePacket; - using Base::writePacketByOuterInner; - using Base::coeffRef; - using Base::coeffRefByOuterInner; - using Base::copyCoeff; - using Base::copyCoeffByOuterInner; - using Base::copyPacket; - using Base::copyPacketByOuterInner; using Base::operator(); using Base::operator[]; using Base::x; @@ -169,16 +163,11 @@ template class DenseBase InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime) : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime), - CoeffReadCost = internal::traits::CoeffReadCost, - /**< This is a rough measure of how expensive it is to read one coefficient from - * this expression. - */ - InnerStrideAtCompileTime = internal::inner_stride_at_compile_time::ret, OuterStrideAtCompileTime = internal::outer_stride_at_compile_time::ret }; - enum { ThisConstantIsPrivateInPlainObjectBase }; + enum { IsPlainObjectBase = 0 }; /** \returns the number of nonzero coefficients which is in practice the number * of stored coefficients. */ @@ -278,7 +267,8 @@ template class DenseBase Derived& operator=(const ReturnByValue& func); #ifndef EIGEN_PARSED_BY_DOXYGEN - /** Copies \a other into *this without evaluating other. \returns a reference to *this. */ + /** Copies \a other into *this without evaluating other. \returns a reference to *this. + * \deprecated */ template EIGEN_DEVICE_FUNC Derived& lazyAssign(const DenseBase& other); @@ -287,27 +277,24 @@ template class DenseBase EIGEN_DEVICE_FUNC CommaInitializer operator<< (const Scalar& s); + // TODO flagged is temporarly disabled. It seems useless now template - const Flagged flagged() const; + EIGEN_DEPRECATED + const Derived& flagged() const + { return derived(); } template EIGEN_DEVICE_FUNC CommaInitializer operator<< (const DenseBase& other); + typedef Transpose TransposeReturnType; EIGEN_DEVICE_FUNC - Eigen::Transpose transpose(); + TransposeReturnType transpose(); typedef typename internal::add_const >::type ConstTransposeReturnType; EIGEN_DEVICE_FUNC ConstTransposeReturnType transpose() const; EIGEN_DEVICE_FUNC void transposeInPlace(); -#ifndef EIGEN_NO_DEBUG - protected: - template - void checkTransposeAliasing(const OtherDerived& other) const; - public: -#endif - EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(Index rows, Index cols, const Scalar& value); @@ -387,16 +374,17 @@ template class DenseBase // size types on MSVC. return typename internal::eval::type(derived()); } - + /** swaps *this with the expression \a other. * */ template EIGEN_DEVICE_FUNC - void swap(const DenseBase& other, - int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase) + void swap(const DenseBase& other) { - SwapWrapper(derived()).lazyAssign(other.derived()); + EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + eigen_assert(rows()==other.rows() && cols()==other.cols()); + call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op()); } /** swaps *this with the matrix or array \a other. @@ -406,10 +394,10 @@ template class DenseBase EIGEN_DEVICE_FUNC void swap(PlainObjectBase& other) { - SwapWrapper(derived()).lazyAssign(other.derived()); + eigen_assert(rows()==other.rows() && cols()==other.cols()); + call_assignment(derived(), other.derived(), internal::swap_assign_op()); } - EIGEN_DEVICE_FUNC inline const NestByValue nestByValue() const; EIGEN_DEVICE_FUNC inline const ForceAlignedAccess forceAlignedAccess() const; EIGEN_DEVICE_FUNC inline ForceAlignedAccess forceAlignedAccess(); diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h index 4e986e875..a9e4dbaf9 100644 --- a/Eigen/src/Core/DenseCoeffsBase.h +++ b/Eigen/src/Core/DenseCoeffsBase.h @@ -97,8 +97,8 @@ class DenseCoeffsBase : public EigenBase EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - return derived().coeff(row, col); + && col >= 0 && col < cols()); + return typename internal::evaluator::type(derived()).coeff(row,col); } EIGEN_DEVICE_FUNC @@ -117,7 +117,7 @@ class DenseCoeffsBase : public EigenBase { eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); - return derived().coeff(row, col); + return coeff(row, col); } /** Short version: don't use this function, use @@ -140,7 +140,7 @@ class DenseCoeffsBase : public EigenBase coeff(Index index) const { eigen_internal_assert(index >= 0 && index < size()); - return derived().coeff(index); + return typename internal::evaluator::type(derived()).coeff(index); } @@ -159,7 +159,7 @@ class DenseCoeffsBase : public EigenBase EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) eigen_assert(index >= 0 && index < size()); - return derived().coeff(index); + return coeff(index); } /** \returns the coefficient at given index. @@ -177,7 +177,7 @@ class DenseCoeffsBase : public EigenBase operator()(Index index) const { eigen_assert(index >= 0 && index < size()); - return derived().coeff(index); + return coeff(index); } /** equivalent to operator[](0). */ @@ -217,9 +217,8 @@ class DenseCoeffsBase : public EigenBase template EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - return derived().template packet(row,col); + eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); + return typename internal::evaluator::type(derived()).template packet(row,col); } @@ -245,7 +244,7 @@ class DenseCoeffsBase : public EigenBase EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { eigen_internal_assert(index >= 0 && index < size()); - return derived().template packet(index); + return typename internal::evaluator::type(derived()).template packet(index); } protected: @@ -325,8 +324,8 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && row < rows() - && col >= 0 && col < cols()); - return derived().coeffRef(row, col); + && col >= 0 && col < cols()); + return typename internal::evaluator::type(derived()).coeffRef(row,col); } EIGEN_DEVICE_FUNC @@ -348,7 +347,7 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && row < rows() && col >= 0 && col < cols()); - return derived().coeffRef(row, col); + return coeffRef(row, col); } @@ -372,7 +371,7 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && index < size()); - return derived().coeffRef(index); + return typename internal::evaluator::type(derived()).coeffRef(index); } /** \returns a reference to the coefficient at given index. @@ -389,7 +388,7 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && index < size()); - return derived().coeffRef(index); + return coeffRef(index); } /** \returns a reference to the coefficient at given index. @@ -406,7 +405,7 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && index < size()); - return derived().coeffRef(index); + return coeffRef(index); } /** equivalent to operator[](0). */ @@ -432,144 +431,6 @@ class DenseCoeffsBase : public DenseCoeffsBase - EIGEN_STRONG_INLINE void writePacket - (Index row, Index col, const typename internal::packet_traits::type& val) - { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - derived().template writePacket(row,col,val); - } - - - /** \internal */ - template - EIGEN_STRONG_INLINE void writePacketByOuterInner - (Index outer, Index inner, const typename internal::packet_traits::type& val) - { - writePacket(rowIndexByOuterInner(outer, inner), - colIndexByOuterInner(outer, inner), - val); - } - - /** \internal - * Stores the given packet of coefficients, at the given index in this expression. It is your responsibility - * to ensure that a packet really starts there. This method is only available on expressions having the - * PacketAccessBit and the LinearAccessBit. - * - * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select - * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets - * starting at an address which is a multiple of the packet size. - */ - template - EIGEN_STRONG_INLINE void writePacket - (Index index, const typename internal::packet_traits::type& val) - { - eigen_internal_assert(index >= 0 && index < size()); - derived().template writePacket(index,val); - } - -#ifndef EIGEN_PARSED_BY_DOXYGEN - - /** \internal Copies the coefficient at position (row,col) of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase& other) - { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - derived().coeffRef(row, col) = other.derived().coeff(row, col); - } - - /** \internal Copies the coefficient at the given index of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase& other) - { - eigen_internal_assert(index >= 0 && index < size()); - derived().coeffRef(index) = other.derived().coeff(index); - } - - - template - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void copyCoeffByOuterInner(Index outer, Index inner, const DenseBase& other) - { - const Index row = rowIndexByOuterInner(outer,inner); - const Index col = colIndexByOuterInner(outer,inner); - // derived() is important here: copyCoeff() may be reimplemented in Derived! - derived().copyCoeff(row, col, other); - } - - /** \internal Copies the packet at position (row,col) of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template - EIGEN_STRONG_INLINE void copyPacket(Index row, Index col, const DenseBase& other) - { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - derived().template writePacket(row, col, - other.derived().template packet(row, col)); - } - - /** \internal Copies the packet at the given index of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template - EIGEN_STRONG_INLINE void copyPacket(Index index, const DenseBase& other) - { - eigen_internal_assert(index >= 0 && index < size()); - derived().template writePacket(index, - other.derived().template packet(index)); - } - - /** \internal */ - template - EIGEN_STRONG_INLINE void copyPacketByOuterInner(Index outer, Index inner, const DenseBase& other) - { - const Index row = rowIndexByOuterInner(outer,inner); - const Index col = colIndexByOuterInner(outer,inner); - // derived() is important here: copyCoeff() may be reimplemented in Derived! - derived().template copyPacket< OtherDerived, StoreMode, LoadMode>(row, col, other); - } -#endif - }; /** \brief Base class providing direct read-only coefficient access to matrices and arrays. diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h index 59f515495..852648639 100644 --- a/Eigen/src/Core/DenseStorage.h +++ b/Eigen/src/Core/DenseStorage.h @@ -130,7 +130,7 @@ template class DenseSt public: EIGEN_DEVICE_FUNC DenseStorage() {} EIGEN_DEVICE_FUNC - DenseStorage(internal::constructor_without_unaligned_array_assert) + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()) {} EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data) {} @@ -155,7 +155,7 @@ template class DenseStorage class DenseStorage class DenseStorage class DenseStorage class DenseStorage(size)), m_rows(nbRows), m_cols(nbCols) @@ -350,7 +350,7 @@ template class DenseStorage(size)), m_cols(nbCols) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } DenseStorage(const DenseStorage& other) @@ -416,7 +416,7 @@ template class DenseStorage(size)), m_rows(nbRows) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } DenseStorage(const DenseStorage& other) diff --git a/Eigen/src/Core/Diagonal.h b/Eigen/src/Core/Diagonal.h index b160479ab..33b82f90f 100644 --- a/Eigen/src/Core/Diagonal.h +++ b/Eigen/src/Core/Diagonal.h @@ -52,8 +52,7 @@ struct traits > MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))), MaxColsAtCompileTime = 1, MaskLvalueBit = is_lvalue::value ? LvalueBit : 0, - Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, - CoeffReadCost = _MatrixTypeNested::CoeffReadCost, + Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions MatrixTypeOuterStride = outer_stride_at_compile_time::ret, InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1, OuterStrideAtCompileTime = 0 @@ -71,17 +70,15 @@ template class Diagonal EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal) EIGEN_DEVICE_FUNC - inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} + explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal) EIGEN_DEVICE_FUNC inline Index rows() const { - EIGEN_USING_STD_MATH(min); - return m_index.value()<0 ? (min)(Index(m_matrix.cols()),Index(m_matrix.rows()+m_index.value())) - : (min)(Index(m_matrix.rows()),Index(m_matrix.cols()-m_index.value())); - + return m_index.value()<0 ? numext::mini(Index(m_matrix.cols()),Index(m_matrix.rows()+m_index.value())) + : numext::mini(Index(m_matrix.rows()),Index(m_matrix.cols()-m_index.value())); } EIGEN_DEVICE_FUNC @@ -149,14 +146,14 @@ template class Diagonal } EIGEN_DEVICE_FUNC - const typename internal::remove_all::type& + inline const typename internal::remove_all::type& nestedExpression() const { return m_matrix; } EIGEN_DEVICE_FUNC - int index() const + inline Index index() const { return m_index.value(); } @@ -173,7 +170,7 @@ template class Diagonal EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; } - // triger a compile time error is someone try to call packet + // trigger a compile time error is someone try to call packet template typename MatrixType::PacketReturnType packet(Index) const; template typename MatrixType::PacketReturnType packet(Index,Index) const; }; @@ -190,7 +187,7 @@ template inline typename MatrixBase::DiagonalReturnType MatrixBase::diagonal() { - return derived(); + return DiagonalReturnType(derived()); } /** This is the const version of diagonal(). */ @@ -213,18 +210,18 @@ MatrixBase::diagonal() const * * \sa MatrixBase::diagonal(), class Diagonal */ template -inline typename MatrixBase::template DiagonalIndexReturnType::Type +inline typename MatrixBase::DiagonalDynamicIndexReturnType MatrixBase::diagonal(Index index) { - return typename DiagonalIndexReturnType::Type(derived(), index); + return DiagonalDynamicIndexReturnType(derived(), index); } /** This is the const version of diagonal(Index). */ template -inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type +inline typename MatrixBase::ConstDiagonalDynamicIndexReturnType MatrixBase::diagonal(Index index) const { - return typename ConstDiagonalIndexReturnType::Type(derived(), index); + return ConstDiagonalDynamicIndexReturnType(derived(), index); } /** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this @@ -239,20 +236,20 @@ MatrixBase::diagonal(Index index) const * * \sa MatrixBase::diagonal(), class Diagonal */ template -template -inline typename MatrixBase::template DiagonalIndexReturnType::Type +template +inline typename MatrixBase::template DiagonalIndexReturnType::Type MatrixBase::diagonal() { - return derived(); + return typename DiagonalIndexReturnType::Type(derived()); } /** This is the const version of diagonal(). */ template -template -inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type +template +inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type MatrixBase::diagonal() const { - return derived(); + return typename ConstDiagonalIndexReturnType::Type(derived()); } } // end namespace Eigen diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index 96b65483d..49b9b7925 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -30,7 +30,7 @@ class DiagonalBase : public EigenBase MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, IsVectorAtCompileTime = 0, - Flags = 0 + Flags = NoPreferredStorageOrderBit }; typedef Matrix DenseMatrixType; @@ -44,18 +44,7 @@ class DiagonalBase : public EigenBase EIGEN_DEVICE_FUNC DenseMatrixType toDenseMatrix() const { return derived(); } - template - EIGEN_DEVICE_FUNC - void evalTo(MatrixBase &other) const; - template - EIGEN_DEVICE_FUNC - void addTo(MatrixBase &other) const - { other.diagonal() += diagonal(); } - template - EIGEN_DEVICE_FUNC - void subTo(MatrixBase &other) const - { other.diagonal() -= diagonal(); } - + EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); } EIGEN_DEVICE_FUNC @@ -66,44 +55,37 @@ class DiagonalBase : public EigenBase EIGEN_DEVICE_FUNC inline Index cols() const { return diagonal().size(); } - /** \returns the diagonal matrix product of \c *this by the matrix \a matrix. - */ template EIGEN_DEVICE_FUNC - const DiagonalProduct + const Product operator*(const MatrixBase &matrix) const { - return DiagonalProduct(matrix.derived(), derived()); + return Product(derived(),matrix.derived()); } + typedef DiagonalWrapper, const DiagonalVectorType> > InverseReturnType; EIGEN_DEVICE_FUNC - inline const DiagonalWrapper, const DiagonalVectorType> > + inline const InverseReturnType inverse() const { - return diagonal().cwiseInverse(); + return InverseReturnType(diagonal().cwiseInverse()); } + typedef DiagonalWrapper, const DiagonalVectorType> > ScalarMultipleReturnType; EIGEN_DEVICE_FUNC - inline const DiagonalWrapper, const DiagonalVectorType> > + inline const ScalarMultipleReturnType operator*(const Scalar& scalar) const { - return diagonal() * scalar; + return ScalarMultipleReturnType(diagonal() * scalar); } EIGEN_DEVICE_FUNC - friend inline const DiagonalWrapper, const DiagonalVectorType> > + friend inline const ScalarMultipleReturnType operator*(const Scalar& scalar, const DiagonalBase& other) { - return other.diagonal() * scalar; + return ScalarMultipleReturnType(other.diagonal() * scalar); } }; -template -template -void DiagonalBase::evalTo(MatrixBase &other) const -{ - other.setZero(); - other.diagonal() = diagonal(); -} #endif /** \class DiagonalMatrix @@ -125,10 +107,10 @@ struct traits > : traits > { typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType; - typedef Dense StorageKind; + typedef DiagonalShape StorageKind; typedef DenseIndex Index; enum { - Flags = LvalueBit + Flags = LvalueBit | NoPreferredStorageOrderBit }; }; } @@ -164,7 +146,7 @@ class DiagonalMatrix /** Constructs a diagonal matrix with given dimension */ EIGEN_DEVICE_FUNC - inline DiagonalMatrix(Index dim) : m_diagonal(dim) {} + explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {} /** 2D constructor. */ EIGEN_DEVICE_FUNC @@ -249,13 +231,14 @@ struct traits > typedef _DiagonalVectorType DiagonalVectorType; typedef typename DiagonalVectorType::Scalar Scalar; typedef typename DiagonalVectorType::Index Index; - typedef typename DiagonalVectorType::StorageKind StorageKind; + typedef DiagonalShape StorageKind; + typedef typename traits::XprKind XprKind; enum { RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, - MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, - MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, - Flags = traits::Flags & LvalueBit + MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + Flags = (traits::Flags & LvalueBit) | NoPreferredStorageOrderBit }; }; } @@ -272,7 +255,7 @@ class DiagonalWrapper /** Constructor from expression of diagonal coefficients to wrap. */ EIGEN_DEVICE_FUNC - inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {} + explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {} /** \returns a const reference to the wrapped expression of diagonal coefficients. */ EIGEN_DEVICE_FUNC @@ -295,7 +278,7 @@ template inline const DiagonalWrapper MatrixBase::asDiagonal() const { - return derived(); + return DiagonalWrapper(derived()); } /** \returns true if *this is approximately equal to a diagonal matrix, @@ -326,6 +309,33 @@ bool MatrixBase::isDiagonal(const RealScalar& prec) const return true; } +namespace internal { + +template<> struct storage_kind_to_shape { typedef DiagonalShape Shape; }; + +struct Diagonal2Dense {}; + +template<> struct AssignmentKind { typedef Diagonal2Dense Kind; }; + +// Diagonal matrix to Dense assignment +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment +{ + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + dst.setZero(); + dst.diagonal() = src.diagonal(); + } + + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + { dst.diagonal() += src.diagonal(); } + + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + { dst.diagonal() -= src.diagonal(); } +}; + +} // namespace internal + } // end namespace Eigen #endif // EIGEN_DIAGONALMATRIX_H diff --git a/Eigen/src/Core/DiagonalProduct.h b/Eigen/src/Core/DiagonalProduct.h index c03a0c2e1..d372b938f 100644 --- a/Eigen/src/Core/DiagonalProduct.h +++ b/Eigen/src/Core/DiagonalProduct.h @@ -13,116 +13,14 @@ namespace Eigen { -namespace internal { -template -struct traits > - : traits -{ - typedef typename scalar_product_traits::ReturnType Scalar; - enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime, - MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, - MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - - _StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor, - _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) - ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), - _SameTypes = is_same::value, - // FIXME currently we need same types, but in the future the next rule should be the one - //_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), - _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), - _LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0, - - Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit,//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit), - CoeffReadCost = NumTraits::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost - }; -}; -} - -template -class DiagonalProduct : internal::no_assignment_operator, - public MatrixBase > -{ - public: - - typedef MatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(DiagonalProduct) - - inline DiagonalProduct(const MatrixType& matrix, const DiagonalType& diagonal) - : m_matrix(matrix), m_diagonal(diagonal) - { - eigen_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols())); - } - - EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); } - EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); } - - EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const - { - return m_diagonal.diagonal().coeff(ProductOrder == OnTheLeft ? row : col) * m_matrix.coeff(row, col); - } - - EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const - { - enum { - StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor - }; - return coeff(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const - { - enum { - StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor - }; - const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col; - return packet_impl(row,col,indexInDiagonalVector,typename internal::conditional< - ((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft) - ||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), internal::true_type, internal::false_type>::type()); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const - { - enum { - StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor - }; - return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); - } - - protected: - template - EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const - { - return internal::pmul(m_matrix.template packet(row, col), - internal::pset1(m_diagonal.diagonal().coeff(id))); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const - { - enum { - InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, - DiagonalVectorPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned) - }; - return internal::pmul(m_matrix.template packet(row, col), - m_diagonal.diagonal().template packet(id)); - } - - typename MatrixType::Nested m_matrix; - typename DiagonalType::Nested m_diagonal; -}; - /** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal. */ template template -inline const DiagonalProduct +inline const Product MatrixBase::operator*(const DiagonalBase &a_diagonal) const { - return DiagonalProduct(derived(), a_diagonal.derived()); + return Product(derived(),a_diagonal.derived()); } } // end namespace Eigen diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index db16e4acc..68e9c2660 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -113,8 +113,7 @@ template inline const typename MatrixBase::PlainObject MatrixBase::normalized() const { - typedef typename internal::nested::type Nested; - typedef typename internal::remove_reference::type _Nested; + typedef typename internal::nested_eval::type _Nested; _Nested n(derived()); return n / n.norm(); } @@ -206,8 +205,8 @@ template bool MatrixBase::isOrthogonal (const MatrixBase& other, const RealScalar& prec) const { - typename internal::nested::type nested(derived()); - typename internal::nested::type otherNested(other.derived()); + typename internal::nested_eval::type nested(derived()); + typename internal::nested_eval::type otherNested(other.derived()); return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm(); } diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h index 1a577c2dc..52b66e6dc 100644 --- a/Eigen/src/Core/EigenBase.h +++ b/Eigen/src/Core/EigenBase.h @@ -121,7 +121,7 @@ template template Derived& DenseBase::operator=(const EigenBase &other) { - other.derived().evalTo(derived()); + call_assignment(derived(), other.derived()); return derived(); } @@ -129,7 +129,7 @@ template template Derived& DenseBase::operator+=(const EigenBase &other) { - other.derived().addTo(derived()); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } @@ -137,7 +137,7 @@ template template Derived& DenseBase::operator-=(const EigenBase &other) { - other.derived().subTo(derived()); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h index 1f2955fc1..2e2a50be5 100644 --- a/Eigen/src/Core/Flagged.h +++ b/Eigen/src/Core/Flagged.h @@ -48,39 +48,39 @@ template clas ExpressionType, const ExpressionType&>::type ExpressionTypeNested; typedef typename ExpressionType::InnerIterator InnerIterator; - inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {} + explicit inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {} - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - inline Index outerStride() const { return m_matrix.outerStride(); } - inline Index innerStride() const { return m_matrix.innerStride(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_matrix.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.innerStride(); } - inline CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index col) const { return m_matrix.coeff(row, col); } - inline CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const { return m_matrix.coeff(index); } - inline const Scalar& coeffRef(Index row, Index col) const + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index row, Index col) const { return m_matrix.const_cast_derived().coeffRef(row, col); } - inline const Scalar& coeffRef(Index index) const + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { return m_matrix.const_cast_derived().coeffRef(index); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_matrix.const_cast_derived().coeffRef(row, col); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_matrix.const_cast_derived().coeffRef(index); } @@ -109,13 +109,13 @@ template clas m_matrix.const_cast_derived().template writePacket(index, x); } - const ExpressionType& _expression() const { return m_matrix; } + EIGEN_DEVICE_FUNC const ExpressionType& _expression() const { return m_matrix; } template - typename ExpressionType::PlainObject solveTriangular(const MatrixBase& other) const; + EIGEN_DEVICE_FUNC typename ExpressionType::PlainObject solveTriangular(const MatrixBase& other) const; template - void solveTriangularInPlace(const MatrixBase& other) const; + EIGEN_DEVICE_FUNC void solveTriangularInPlace(const MatrixBase& other) const; protected: ExpressionTypeNested m_matrix; @@ -132,7 +132,7 @@ template inline const Flagged DenseBase::flagged() const { - return derived(); + return Flagged(derived()); } } // end namespace Eigen diff --git a/Eigen/src/Core/ForceAlignedAccess.h b/Eigen/src/Core/ForceAlignedAccess.h index 807c7a293..7b08b45e6 100644 --- a/Eigen/src/Core/ForceAlignedAccess.h +++ b/Eigen/src/Core/ForceAlignedAccess.h @@ -39,29 +39,29 @@ template class ForceAlignedAccess typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess) - inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} - inline Index rows() const { return m_expression.rows(); } - inline Index cols() const { return m_expression.cols(); } - inline Index outerStride() const { return m_expression.outerStride(); } - inline Index innerStride() const { return m_expression.innerStride(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } - inline const CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const { return m_expression.coeff(row, col); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_expression.const_cast_derived().coeffRef(row, col); } - inline const CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } @@ -90,7 +90,7 @@ template class ForceAlignedAccess m_expression.const_cast_derived().template writePacket(index, x); } - operator const ExpressionType&() const { return m_expression; } + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } protected: const ExpressionType& m_expression; @@ -127,7 +127,7 @@ template inline typename internal::add_const_on_value_type,Derived&>::type>::type MatrixBase::forceAlignedAccessIf() const { - return derived(); + return derived(); // FIXME This should not work but apparently is never used } /** \returns an expression of *this with forced aligned access if \a Enable is true. @@ -138,7 +138,7 @@ template inline typename internal::conditional,Derived&>::type MatrixBase::forceAlignedAccessIf() { - return derived(); + return derived(); // FIXME This should not work but apparently is never used } } // end namespace Eigen diff --git a/Eigen/src/Core/Fuzzy.h b/Eigen/src/Core/Fuzzy.h index f9a88dd3c..3e403a09d 100644 --- a/Eigen/src/Core/Fuzzy.h +++ b/Eigen/src/Core/Fuzzy.h @@ -22,10 +22,9 @@ struct isApprox_selector EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) { - EIGEN_USING_STD_MATH(min); - typename internal::nested::type nested(x); - typename internal::nested::type otherNested(y); - return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum()); + typename internal::nested_eval::type nested(x); + typename internal::nested_eval::type otherNested(y); + return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum()); } }; diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 9d3d5562c..81750722c 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -13,28 +13,6 @@ namespace Eigen { -/** \class GeneralProduct - * \ingroup Core_Module - * - * \brief Expression of the product of two general matrices or vectors - * - * \param LhsNested the type used to store the left-hand side - * \param RhsNested the type used to store the right-hand side - * \param ProductMode the type of the product - * - * This class represents an expression of the product of two general matrices. - * We call a general matrix, a dense matrix with full storage. For instance, - * This excludes triangular, selfadjoint, and sparse matrices. - * It is the return type of the operator* between general matrices. Its template - * arguments are determined automatically by ProductReturnType. Therefore, - * GeneralProduct should never be used direclty. To determine the result type of a - * function which involves a matrix product, use ProductReturnType::Type. - * - * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase&) - */ -template::value> -class GeneralProduct; - enum { Large = 2, Small = 3 @@ -59,14 +37,14 @@ template struct product_type typedef typename remove_all::type _Lhs; typedef typename remove_all::type _Rhs; enum { - MaxRows = _Lhs::MaxRowsAtCompileTime, - Rows = _Lhs::RowsAtCompileTime, - MaxCols = _Rhs::MaxColsAtCompileTime, - Cols = _Rhs::ColsAtCompileTime, - MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime, - _Rhs::MaxRowsAtCompileTime), - Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, - _Rhs::RowsAtCompileTime) + MaxRows = traits<_Lhs>::MaxRowsAtCompileTime, + Rows = traits<_Lhs>::RowsAtCompileTime, + MaxCols = traits<_Rhs>::MaxColsAtCompileTime, + Cols = traits<_Rhs>::ColsAtCompileTime, + MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime, + traits<_Rhs>::MaxRowsAtCompileTime), + Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime, + traits<_Rhs>::RowsAtCompileTime) }; // the splitting into different lines of code here, introducing the _select enums and the typedef below, @@ -81,7 +59,8 @@ private: public: enum { - value = selector::ret + value = selector::ret, + ret = selector::ret }; #ifdef EIGEN_DEBUG_PRODUCT static void debug() @@ -97,6 +76,31 @@ public: #endif }; +// template struct product_tag +// { +// private: +// +// typedef typename remove_all::type _Lhs; +// typedef typename remove_all::type _Rhs; +// enum { +// Rows = _Lhs::RowsAtCompileTime, +// Cols = _Rhs::ColsAtCompileTime, +// Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, _Rhs::RowsAtCompileTime) +// }; +// +// enum { +// rows_select = Rows==1 ? int(Rows) : int(Large), +// cols_select = Cols==1 ? int(Cols) : int(Large), +// depth_select = Depth==1 ? int(Depth) : int(Large) +// }; +// typedef product_type_selector selector; +// +// public: +// enum { +// ret = selector::ret +// }; +// +// }; /* The following allows to select the kind of product at compile time * based on the three dimensions of the product. @@ -127,54 +131,6 @@ template<> struct product_type_selector { enum } // end namespace internal -/** \class ProductReturnType - * \ingroup Core_Module - * - * \brief Helper class to get the correct and optimized returned type of operator* - * - * \param Lhs the type of the left-hand side - * \param Rhs the type of the right-hand side - * \param ProductMode the type of the product (determined automatically by internal::product_mode) - * - * This class defines the typename Type representing the optimized product expression - * between two matrix expressions. In practice, using ProductReturnType::Type - * is the recommended way to define the result type of a function returning an expression - * which involve a matrix product. The class Product should never be - * used directly. - * - * \sa class Product, MatrixBase::operator*(const MatrixBase&) - */ -template -struct ProductReturnType -{ - // TODO use the nested type to reduce instanciations ???? -// typedef typename internal::nested::type LhsNested; -// typedef typename internal::nested::type RhsNested; - - typedef GeneralProduct Type; -}; - -template -struct ProductReturnType -{ - typedef typename internal::nested::type >::type LhsNested; - typedef typename internal::nested::type >::type RhsNested; - typedef CoeffBasedProduct Type; -}; - -template -struct ProductReturnType -{ - typedef typename internal::nested::type >::type LhsNested; - typedef typename internal::nested::type >::type RhsNested; - typedef CoeffBasedProduct Type; -}; - -// this is a workaround for sun CC -template -struct LazyProductReturnType : public ProductReturnType -{}; - /*********************************************************************** * Implementation of Inner Vector Vector Product ***********************************************************************/ @@ -186,119 +142,10 @@ struct LazyProductReturnType : public ProductReturnType with: operator=(Scalar x); -namespace internal { - -template -struct traits > - : traits::ReturnType,1,1> > -{}; - -} - -template -class GeneralProduct - : internal::no_assignment_operator, - public Matrix::ReturnType,1,1> -{ - typedef Matrix::ReturnType,1,1> Base; - public: - GeneralProduct(const Lhs& lhs, const Rhs& rhs) - { - EIGEN_STATIC_ASSERT((internal::is_same::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - - Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); - } - - /** Convertion to scalar */ - operator const typename Base::Scalar() const { - return Base::coeff(0,0); - } -}; - /*********************************************************************** * Implementation of Outer Vector Vector Product ***********************************************************************/ -namespace internal { - -// Column major -template -EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&) -{ - typedef typename Dest::Index Index; - // FIXME make sure lhs is sequentially stored - // FIXME not very good if rhs is real and lhs complex while alpha is real too - const Index cols = dest.cols(); - for (Index j=0; j -EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) { - typedef typename Dest::Index Index; - // FIXME make sure rhs is sequentially stored - // FIXME not very good if lhs is real and rhs complex while alpha is real too - const Index rows = dest.rows(); - for (Index i=0; i -struct traits > - : traits, Lhs, Rhs> > -{}; - -} - -template -class GeneralProduct - : public ProductBase, Lhs, Rhs> -{ - template struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; - - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) - - GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) - { - EIGEN_STATIC_ASSERT((internal::is_same::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - } - - struct set { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; - struct add { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; - struct sub { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; - struct adds { - Scalar m_scale; - adds(const Scalar& s) : m_scale(s) {} - template void operator()(const Dst& dst, const Src& src) const { - dst.const_cast_derived() += m_scale * src; - } - }; - - template - inline void evalTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, set(), IsRowMajor()); - } - - template - inline void addTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, add(), IsRowMajor()); - } - - template - inline void subTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor()); - } - - template void scaleAndAddTo(Dest& dest, const Scalar& alpha) const - { - internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor()); - } -}; - /*********************************************************************** * Implementation of General Matrix Vector Product ***********************************************************************/ @@ -312,60 +159,13 @@ class GeneralProduct */ namespace internal { -template -struct traits > - : traits, Lhs, Rhs> > -{}; - template -struct gemv_selector; +struct gemv_dense_sense_selector; } // end namespace internal -template -class GeneralProduct - : public ProductBase, Lhs, Rhs> -{ - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) - - typedef typename Lhs::Scalar LhsScalar; - typedef typename Rhs::Scalar RhsScalar; - - GeneralProduct(const Lhs& a_lhs, const Rhs& a_rhs) : Base(a_lhs,a_rhs) - { -// EIGEN_STATIC_ASSERT((internal::is_same::value), -// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - } - - enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; - typedef typename internal::conditional::type MatrixType; - - template void scaleAndAddTo(Dest& dst, const Scalar& alpha) const - { - eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols()); - internal::gemv_selector::HasUsableDirectAccess)>::run(*this, dst, alpha); - } -}; - namespace internal { -// The vector is on the left => transposition -template -struct gemv_selector -{ - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) - { - Transpose destT(dest); - enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; - gemv_selector - ::run(GeneralProduct,Transpose, GemvProduct> - (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha); - } -}; - template struct gemv_static_vector_if; template @@ -402,27 +202,43 @@ struct gemv_static_vector_if #endif }; -template<> struct gemv_selector +// The vector is on the left => transposition +template +struct gemv_dense_sense_selector { - template - static inline void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename ProductType::Index Index; - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::RealScalar RealScalar; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; + Transpose destT(dest); + enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; + gemv_dense_sense_selector + ::run(rhs.transpose(), lhs.transpose(), destT, alpha); + } +}; + +template<> struct gemv_dense_sense_selector +{ + template + static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + typedef typename Dest::Index Index; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + typedef typename Dest::RealScalar RealScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef Map, Aligned> MappedDest; - ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs()); - ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs()); + ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); + ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); enum { // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 @@ -477,34 +293,35 @@ template<> struct gemv_selector } }; -template<> struct gemv_selector +template<> struct gemv_dense_sense_selector { - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::Index Index; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::_ActualRhsType _ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; + typedef typename Dest::Index Index; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all::type ActualRhsTypeCleaned; - typename add_const::type actualLhs = LhsBlasTraits::extract(prod.lhs()); - typename add_const::type actualRhs = RhsBlasTraits::extract(prod.rhs()); + typename add_const::type actualLhs = LhsBlasTraits::extract(lhs); + typename add_const::type actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); enum { // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 // on, the other hand it is good for the cache to pack the vector anyways... - DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1 + DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 }; - gemv_static_vector_if static_rhs; + gemv_static_vector_if static_rhs; ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(), DirectlyUseRhs ? const_cast(actualRhs.data()) : static_rhs.data()); @@ -515,7 +332,7 @@ template<> struct gemv_selector Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif - Map(actualRhsPtr, actualRhs.size()) = actualRhs; + Map(actualRhsPtr, actualRhs.size()) = actualRhs; } typedef const_blas_data_mapper LhsMapper; @@ -530,29 +347,29 @@ template<> struct gemv_selector } }; -template<> struct gemv_selector +template<> struct gemv_dense_sense_selector { - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { typedef typename Dest::Index Index; // TODO makes sure dest is sequentially stored in memory, otherwise use a temp - const Index size = prod.rhs().rows(); + const Index size = rhs.rows(); for(Index k=0; k struct gemv_selector +template<> struct gemv_dense_sense_selector { - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { typedef typename Dest::Index Index; // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp - const Index rows = prod.rows(); + const Index rows = dest.rows(); for(Index i=0; i struct gemv_selector */ #ifndef __CUDACC__ -#ifdef EIGEN_TEST_EVALUATORS template template inline const Product @@ -601,39 +417,9 @@ MatrixBase::operator*(const MatrixBase &other) const return Product(derived(), other.derived()); } -#else -template -template -inline const typename ProductReturnType::Type -MatrixBase::operator*(const MatrixBase &other) const -{ - // A note regarding the function declaration: In MSVC, this function will sometimes - // not be inlined since DenseStorage is an unwindable object for dynamic - // matrices and product types are holding a member to store the result. - // Thus it does not help tagging this function with EIGEN_STRONG_INLINE. - enum { - ProductIsValid = Derived::ColsAtCompileTime==Dynamic - || OtherDerived::RowsAtCompileTime==Dynamic - || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime), - AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime, - SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived) - }; - // note to the lost user: - // * for a dot product use: v1.dot(v2) - // * for a coeff-wise product use: v1.cwiseProduct(v2) - EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes), - INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) - EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), - INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) - EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) -#ifdef EIGEN_DEBUG_PRODUCT - internal::product_type::debug(); -#endif - return typename ProductReturnType::Type(derived(), other.derived()); -} -#endif -#endif +#endif // __CUDACC__ + /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. * * The returned product will behave like any other expressions: the coefficients of the product will be @@ -647,7 +433,7 @@ MatrixBase::operator*(const MatrixBase &other) const */ template template -const typename LazyProductReturnType::Type +const Product MatrixBase::lazyProduct(const MatrixBase &other) const { enum { @@ -666,7 +452,7 @@ MatrixBase::lazyProduct(const MatrixBase &other) const INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) - return typename LazyProductReturnType::Type(derived(), other.derived()); + return Product(derived(), other.derived()); } } // end namespace Eigen diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 3ef3475c7..8759cd06c 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -129,12 +129,12 @@ pdiv(const Packet& a, /** \internal \returns the min of \a a and \a b (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, - const Packet& b) { EIGEN_USING_STD_MATH(min); return (min)(a, b); } + const Packet& b) { return numext::mini(a, b); } /** \internal \returns the max of \a a and \a b (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, - const Packet& b) { EIGEN_USING_STD_MATH(max); return (max)(a, b); } + const Packet& b) { return numext::maxi(a, b); } /** \internal \returns the absolute value of \a a */ template EIGEN_DEVICE_FUNC inline Packet @@ -245,8 +245,8 @@ template EIGEN_DEVICE_FUNC inline void pstoreu /** \internal tries to do cache prefetching of \a addr */ template inline void prefetch(const Scalar* addr) { -#if !defined(_MSC_VER) -__builtin_prefetch(addr); +#if !EIGEN_COMP_MSVC + __builtin_prefetch(addr); #endif } diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 2067a2a6e..ee67b7d3c 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -15,7 +15,7 @@ template \ inline const Eigen::CwiseUnaryOp, const Derived> \ NAME(const Eigen::ArrayBase& x) { \ - return x.derived(); \ + return Eigen::CwiseUnaryOp, const Derived>(x.derived()); \ } #define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \ @@ -30,7 +30,7 @@ { \ static inline typename NAME##_retval >::type run(const Eigen::ArrayBase& x) \ { \ - return x.derived(); \ + return typename NAME##_retval >::type(x.derived()); \ } \ }; diff --git a/Eigen/src/Core/Inverse.h b/Eigen/src/Core/Inverse.h new file mode 100644 index 000000000..f3b0dff87 --- /dev/null +++ b/Eigen/src/Core/Inverse.h @@ -0,0 +1,129 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_INVERSE_H +#define EIGEN_INVERSE_H + +namespace Eigen { + +// TODO move the general declaration in Core, and rename this file DenseInverseImpl.h, or something like this... + +template class InverseImpl; + +namespace internal { + +template +struct traits > + : traits +{ + typedef typename XprType::PlainObject PlainObject; + typedef traits BaseTraits; + enum { + Flags = BaseTraits::Flags & RowMajorBit + }; +}; + +} // end namespace internal + +/** \class Inverse + * + * \brief Expression of the inverse of another expression + * + * \tparam XprType the type of the expression we are taking the inverse + * + * This class represents an abstract expression of A.inverse() + * and most of the time this is the only way it is used. + * + */ +template +class Inverse : public InverseImpl::StorageKind> +{ +public: + typedef typename XprType::Index Index; + typedef typename XprType::PlainObject PlainObject; + typedef typename internal::nested::type XprTypeNested; + typedef typename internal::remove_all::type XprTypeNestedCleaned; + + explicit Inverse(const XprType &xpr) + : m_xpr(xpr) + {} + + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } + + EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; } + +protected: + XprTypeNested m_xpr; +}; + +/** \internal + * Specialization of the Inverse expression for dense expressions. + * Direct access to the coefficients are discared. + * FIXME this intermediate class is probably not needed anymore. + */ +template +class InverseImpl + : public MatrixBase > +{ + typedef Inverse Derived; + +public: + + typedef MatrixBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + typedef typename internal::remove_all::type NestedExpression; + +private: + + Scalar coeff(Index row, Index col) const; + Scalar coeff(Index i) const; +}; + +namespace internal { + +/** \internal + * \brief Default evaluator for Inverse expression. + * + * This default evaluator for Inverse expression simply evaluate the inverse into a temporary + * by a call to internal::call_assignment_no_alias. + * Therefore, inverse implementers only have to specialize Assignment, ...> for + * there own nested expression. + * + * \sa class Inverse + */ +template +struct unary_evaluator > + : public evaluator::PlainObject>::type +{ + typedef Inverse InverseType; + typedef typename InverseType::PlainObject PlainObject; + typedef typename evaluator::type Base; + + typedef evaluator type; + typedef evaluator nestedType; + + enum { Flags = Base::Flags | EvalBeforeNestingBit }; + + unary_evaluator(const InverseType& inv_xpr) + : m_result(inv_xpr.rows(), inv_xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + internal::call_assignment_no_alias(m_result, inv_xpr); + } + +protected: + PlainObject m_result; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_INVERSE_H diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index ced1b76ba..098f1c096 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -79,22 +79,9 @@ struct traits > OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 ? int(PlainObjectType::OuterStrideAtCompileTime) : int(StrideType::OuterStrideAtCompileTime), - HasNoInnerStride = InnerStrideAtCompileTime == 1, - HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, - HasNoStride = HasNoInnerStride && HasNoOuterStride, IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), - IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, - KeepsPacketAccess = bool(HasNoInnerStride) - && ( bool(IsDynamicSize) - || HasNoOuterStride - || ( OuterStrideAtCompileTime!=Dynamic - && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ), Flags0 = TraitsBase::Flags & (~NestByRefBit), - Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), - Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) - ? int(Flags1) : int(Flags1 & ~LinearAccessBit), - Flags3 = is_lvalue::value ? int(Flags2) : (int(Flags2) & ~LvalueBit), - Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit) + Flags = is_lvalue::value ? int(Flags0) : (int(Flags0) & ~LvalueBit) }; private: enum { Options }; // Expressions don't have Options @@ -135,7 +122,7 @@ template class Ma * \param a_stride optional Stride object, passing the strides. */ EIGEN_DEVICE_FUNC - inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType()) + explicit inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType()) : Base(cast_to_pointer_type(dataPtr)), m_stride(a_stride) { PlainObjectType::Base::_check_template_params(); diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index e8ecb175b..1589cbaae 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -12,7 +12,7 @@ #define EIGEN_MAPBASE_H #define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \ - EIGEN_STATIC_ASSERT((int(internal::traits::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \ + EIGEN_STATIC_ASSERT((int(internal::evaluator::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \ YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT) namespace Eigen { @@ -85,7 +85,7 @@ template class MapBase * * \sa innerStride(), outerStride() */ - inline const Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; } EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index rowId, Index colId) const @@ -128,7 +128,7 @@ template class MapBase } EIGEN_DEVICE_FUNC - inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) + explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) checkSanity(); @@ -161,11 +161,7 @@ template class MapBase EIGEN_DEVICE_FUNC void checkSanity() const { - EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits::Flags&PacketAccessBit, - internal::inner_stride_at_compile_time::ret==1), - PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); - eigen_assert(EIGEN_IMPLIES(internal::traits::Flags&AlignedBit, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0) - && "data is not aligned"); + eigen_assert(EIGEN_IMPLIES(internal::traits::IsAligned, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned"); } PointerType m_data; @@ -176,6 +172,7 @@ template class MapBase template class MapBase : public MapBase { + typedef MapBase ReadOnlyMapBase; public: typedef MapBase Base; @@ -243,11 +240,13 @@ template class MapBase EIGEN_DEVICE_FUNC Derived& operator=(const MapBase& other) { - Base::Base::operator=(other); + ReadOnlyMapBase::Base::operator=(other); return derived(); } - using Base::Base::operator=; + // In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base, + // see bugs 821 and 920. + using ReadOnlyMapBase::Base::operator=; }; #undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index e9fed2e52..16ad2dc7e 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -14,7 +14,7 @@ namespace Eigen { // On WINCE, std::abs is defined for int only, so let's defined our own overloads: // This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too. -#if defined(_WIN32_WCE) && defined(_MSC_VER) && _MSC_VER<=1500 +#if EIGEN_OS_WINCE && EIGEN_COMP_MSVC && EIGEN_COMP_MSVC<=1500 long abs(long x) { return (labs(x)); } double abs(double x) { return (fabs(x)); } float abs(float x) { return (fabsf(x)); } @@ -360,50 +360,31 @@ inline NewType cast(const OldType& x) } /**************************************************************************** -* Implementation of atanh2 * +* Implementation of logp1 * ****************************************************************************/ template -struct atanh2_impl +struct log1p_impl { - static inline Scalar run(const Scalar& x, const Scalar& r) + static inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) - #if (__cplusplus >= 201103L) && !defined(__CYGWIN__) + // Let's be conservative and enable the default C++11 implementation only if we are sure it exists + #if (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \ + && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC) using std::log1p; - return log1p(2 * x / (r - x)) / 2; + return log1p(x); #else - using std::abs; + typedef typename NumTraits::Real RealScalar; using std::log; - using std::sqrt; - Scalar z = x / r; - if (r == 0 || abs(z) > sqrt(NumTraits::epsilon())) - return log((r + x) / (r - x)) / 2; - else - return z + z*z*z / 3; + Scalar x1p = RealScalar(1) + x; + return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); #endif } }; -template -struct atanh2_impl > -{ - typedef std::complex Scalar; - static inline Scalar run(const Scalar& x, const Scalar& r) - { - using std::log; - using std::norm; - using std::sqrt; - Scalar z = x / r; - if (r == Scalar(0) || norm(z) > NumTraits::epsilon()) - return RealScalar(0.5) * log((r + x) / (r - x)); - else - return z + z*z*z / RealScalar(3); - } -}; - template -struct atanh2_retval +struct log1p_retval { typedef Scalar type; }; @@ -591,6 +572,22 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() ****************************************************************************/ namespace numext { + +template +EIGEN_DEVICE_FUNC +inline T mini(const T& x, const T& y) +{ + EIGEN_USING_STD_MATH(min); + return min EIGEN_NOT_A_MACRO (x,y); +} + +template +EIGEN_DEVICE_FUNC +inline T maxi(const T& x, const T& y) +{ + EIGEN_USING_STD_MATH(max); + return max EIGEN_NOT_A_MACRO (x,y); +} template EIGEN_DEVICE_FUNC @@ -664,9 +661,9 @@ inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& template EIGEN_DEVICE_FUNC -inline EIGEN_MATHFUNC_RETVAL(atanh2, Scalar) atanh2(const Scalar& x, const Scalar& y) +inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(atanh2, Scalar)::run(x, y); + return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); } template @@ -694,6 +691,21 @@ bool (isfinite)(const std::complex& x) return isfinite(real(x)) && isfinite(imag(x)); } +// Log base 2 for 32 bits positive integers. +// Conveniently returns 0 for x==0. +inline int log2(int x) +{ + eigen_assert(x>=0); + unsigned int v(x); + static const int table[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return table[(v * 0x07C4ACDDU) >> 27]; +} + } // end namespace numext namespace internal { diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index 8c95ee3ca..0b3d90786 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -115,7 +115,8 @@ struct traits > MaxRowsAtCompileTime = _MaxRows, MaxColsAtCompileTime = _MaxCols, Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, - CoeffReadCost = NumTraits::ReadCost, + // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase + EvaluatorFlags = compute_matrix_evaluator_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, Options = _Options, InnerStrideAtCompileTime = 1, OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime @@ -213,7 +214,7 @@ class Matrix // FIXME is it still needed EIGEN_DEVICE_FUNC - Matrix(internal::constructor_without_unaligned_array_assert) + explicit Matrix(internal::constructor_without_unaligned_array_assert) : Base(internal::constructor_without_unaligned_array_assert()) { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } @@ -359,15 +360,6 @@ class Matrix *this = other; } - /** \internal - * \brief Override MatrixBase::swap() since for dynamic-sized matrices - * of same type it is enough to swap the data pointers. - */ - template - EIGEN_DEVICE_FUNC - void swap(MatrixBase const & other) - { this->_swap(other.derived()); } - EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; } EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); } diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 3cb5e04fd..86994cb36 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -66,8 +66,7 @@ template class MatrixBase using Base::MaxSizeAtCompileTime; using Base::IsVectorAtCompileTime; using Base::Flags; - using Base::CoeffReadCost; - + using Base::derived; using Base::const_cast_derived; using Base::rows; @@ -181,26 +180,20 @@ template class MatrixBase #ifdef __CUDACC__ template EIGEN_DEVICE_FUNC - const typename LazyProductReturnType::Type + const Product operator*(const MatrixBase &other) const { return this->lazyProduct(other); } #else -#ifdef EIGEN_TEST_EVALUATORS template const Product operator*(const MatrixBase &other) const; -#else - template - const typename ProductReturnType::Type - operator*(const MatrixBase &other) const; -#endif #endif template EIGEN_DEVICE_FUNC - const typename LazyProductReturnType::Type + const Product lazyProduct(const MatrixBase &other) const; template @@ -214,7 +207,7 @@ template class MatrixBase template EIGEN_DEVICE_FUNC - const DiagonalProduct + const Product operator*(const DiagonalBase &diagonal) const; template @@ -251,19 +244,14 @@ template class MatrixBase template EIGEN_DEVICE_FUNC typename ConstDiagonalIndexReturnType::Type diagonal() const; - - // Note: The "MatrixBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations. - // On the other hand they confuse MSVC8... - #if (defined _MSC_VER) && (_MSC_VER >= 1500) // 2008 or later - typename MatrixBase::template DiagonalIndexReturnType::Type diagonal(Index index); - typename MatrixBase::template ConstDiagonalIndexReturnType::Type diagonal(Index index) const; - #else - EIGEN_DEVICE_FUNC - typename DiagonalIndexReturnType::Type diagonal(Index index); + typedef Diagonal DiagonalDynamicIndexReturnType; + typedef typename internal::add_const >::type ConstDiagonalDynamicIndexReturnType; + EIGEN_DEVICE_FUNC - typename ConstDiagonalIndexReturnType::Type diagonal(Index index) const; - #endif + DiagonalDynamicIndexReturnType diagonal(Index index); + EIGEN_DEVICE_FUNC + ConstDiagonalDynamicIndexReturnType diagonal(Index index) const; template struct TriangularViewReturnType { typedef TriangularView Type; }; template struct ConstTriangularViewReturnType { typedef const TriangularView Type; }; @@ -334,10 +322,12 @@ template class MatrixBase NoAlias noalias(); - inline const ForceAlignedAccess forceAlignedAccess() const; - inline ForceAlignedAccess forceAlignedAccess(); - template inline typename internal::add_const_on_value_type,Derived&>::type>::type forceAlignedAccessIf() const; - template inline typename internal::conditional,Derived&>::type forceAlignedAccessIf(); + // TODO forceAlignedAccess is temporarily disabled + // Need to find a nicer workaround. + inline const Derived& forceAlignedAccess() const { return derived(); } + inline Derived& forceAlignedAccess() { return derived(); } + template inline const Derived& forceAlignedAccessIf() const { return derived(); } + template inline Derived& forceAlignedAccessIf() { return derived(); } Scalar trace() const; @@ -348,10 +338,10 @@ template class MatrixBase /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix * \sa ArrayBase::matrix() */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper array() { return derived(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper array() { return ArrayWrapper(derived()); } /** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix * \sa ArrayBase::matrix() */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper array() const { return derived(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper array() const { return ArrayWrapper(derived()); } /////////// LU module /////////// @@ -361,7 +351,8 @@ template class MatrixBase const PartialPivLU lu() const; EIGEN_DEVICE_FUNC - const internal::inverse_impl inverse() const; + const Inverse inverse() const; + template void computeInverseAndDetWithCheck( ResultType& inverse, @@ -394,6 +385,7 @@ template class MatrixBase /////////// SVD module /////////// JacobiSVD jacobiSvd(unsigned int computationOptions = 0) const; + BDCSVD bdcSvd(unsigned int computationOptions = 0) const; /////////// Geometry module /////////// diff --git a/Eigen/src/Core/NestByValue.h b/Eigen/src/Core/NestByValue.h index a893b1761..9aeaf8d18 100644 --- a/Eigen/src/Core/NestByValue.h +++ b/Eigen/src/Core/NestByValue.h @@ -40,29 +40,29 @@ template class NestByValue typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue) - inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} - inline Index rows() const { return m_expression.rows(); } - inline Index cols() const { return m_expression.cols(); } - inline Index outerStride() const { return m_expression.outerStride(); } - inline Index innerStride() const { return m_expression.innerStride(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } - inline const CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const { return m_expression.coeff(row, col); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_expression.const_cast_derived().coeffRef(row, col); } - inline const CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } @@ -91,7 +91,7 @@ template class NestByValue m_expression.const_cast_derived().template writePacket(index, x); } - operator const ExpressionType&() const { return m_expression; } + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } protected: const ExpressionType m_expression; diff --git a/Eigen/src/Core/NoAlias.h b/Eigen/src/Core/NoAlias.h index 0a1c32743..0ade75255 100644 --- a/Eigen/src/Core/NoAlias.h +++ b/Eigen/src/Core/NoAlias.h @@ -30,68 +30,35 @@ namespace Eigen { template class StorageBase> class NoAlias { - typedef typename ExpressionType::Scalar Scalar; public: - NoAlias(ExpressionType& expression) : m_expression(expression) {} - - /** Behaves like MatrixBase::lazyAssign(other) - * \sa MatrixBase::lazyAssign() */ + typedef typename ExpressionType::Scalar Scalar; + + explicit NoAlias(ExpressionType& expression) : m_expression(expression) {} + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase& other) - { return internal::assign_selector::run(m_expression,other.derived()); } - - /** \sa MatrixBase::operator+= */ + { + call_assignment_no_alias(m_expression, other.derived(), internal::assign_op()); + return m_expression; + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase& other) { - typedef SelfCwiseBinaryOp, ExpressionType, OtherDerived> SelfAdder; - SelfAdder tmp(m_expression); - typedef typename internal::nested::type OtherDerivedNested; - typedef typename internal::remove_all::type _OtherDerivedNested; - internal::assign_selector::run(tmp,OtherDerivedNested(other.derived())); + call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op()); return m_expression; } - - /** \sa MatrixBase::operator-= */ + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase& other) { - typedef SelfCwiseBinaryOp, ExpressionType, OtherDerived> SelfAdder; - SelfAdder tmp(m_expression); - typedef typename internal::nested::type OtherDerivedNested; - typedef typename internal::remove_all::type _OtherDerivedNested; - internal::assign_selector::run(tmp,OtherDerivedNested(other.derived())); + call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op()); return m_expression; } -#ifndef EIGEN_PARSED_BY_DOXYGEN - template - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE ExpressionType& operator+=(const ProductBase& other) - { other.derived().addTo(m_expression); return m_expression; } - - template - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase& other) - { other.derived().subTo(m_expression); return m_expression; } - - template - EIGEN_STRONG_INLINE ExpressionType& operator+=(const CoeffBasedProduct& other) - { return m_expression.derived() += CoeffBasedProduct(other.lhs(), other.rhs()); } - - template - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE ExpressionType& operator-=(const CoeffBasedProduct& other) - { return m_expression.derived() -= CoeffBasedProduct(other.lhs(), other.rhs()); } - - template - ExpressionType& operator=(const ReturnByValue& func) - { return m_expression = func; } -#endif - EIGEN_DEVICE_FUNC ExpressionType& expression() const { @@ -133,7 +100,7 @@ class NoAlias template NoAlias MatrixBase::noalias() { - return derived(); + return NoAlias(derived()); } } // end namespace Eigen diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h index 8aa4c8bc5..4846f2ae1 100644 --- a/Eigen/src/Core/PermutationMatrix.h +++ b/Eigen/src/Core/PermutationMatrix.h @@ -13,7 +13,8 @@ namespace Eigen { -template class PermutedImpl; +// TODO: this does not seems to be needed at all: +// template class PermutedImpl; /** \class PermutationBase * \ingroup Core_Module @@ -60,7 +61,6 @@ class PermutationBase : public EigenBase typedef typename Traits::IndicesType IndicesType; enum { Flags = Traits::Flags, - CoeffReadCost = Traits::CoeffReadCost, RowsAtCompileTime = Traits::RowsAtCompileTime, ColsAtCompileTime = Traits::ColsAtCompileTime, MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, @@ -73,6 +73,7 @@ class PermutationBase : public EigenBase typedef PermutationMatrix PlainPermutationType; using Base::derived; + typedef Transpose TransposeReturnType; #endif /** Copies the other permutation into *this */ @@ -118,7 +119,7 @@ class PermutationBase : public EigenBase void evalTo(MatrixBase& other) const { other.setZero(); - for (int i=0; i * * \returns a reference to *this. * - * \warning This is much slower than applyTranspositionOnTheRight(int,int): + * \warning This is much slower than applyTranspositionOnTheRight(Index,Index): * this has linear complexity and requires a lot of branching. * - * \sa applyTranspositionOnTheRight(int,int) + * \sa applyTranspositionOnTheRight(Index,Index) */ Derived& applyTranspositionOnTheLeft(Index i, Index j) { @@ -185,7 +186,7 @@ class PermutationBase : public EigenBase * * This is a fast operation, it only consists in swapping two indices. * - * \sa applyTranspositionOnTheLeft(int,int) + * \sa applyTranspositionOnTheLeft(Index,Index) */ Derived& applyTranspositionOnTheRight(Index i, Index j) { @@ -198,14 +199,14 @@ class PermutationBase : public EigenBase * * \note \note_try_to_help_rvo */ - inline Transpose inverse() const - { return derived(); } + inline TransposeReturnType inverse() const + { return TransposeReturnType(derived()); } /** \returns the tranpose permutation matrix. * * \note \note_try_to_help_rvo */ - inline Transpose transpose() const - { return derived(); } + inline TransposeReturnType transpose() const + { return TransposeReturnType(derived()); } /**** multiplication helpers to hopefully get RVO ****/ @@ -215,13 +216,13 @@ class PermutationBase : public EigenBase template void assignTranspose(const PermutationBase& other) { - for (int i=0; i void assignProduct(const Lhs& lhs, const Rhs& rhs) { eigen_assert(lhs.cols() == rhs.rows()); - for (int i=0; i > : traits > { + typedef PermutationStorage StorageKind; typedef Matrix<_StorageIndexType, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType; typedef typename IndicesType::Index Index; typedef _StorageIndexType StorageIndexType; @@ -287,6 +289,8 @@ class PermutationMatrix : public PermutationBase Traits; public: + typedef const PermutationMatrix& Nested; + #ifndef EIGEN_PARSED_BY_DOXYGEN typedef typename Traits::IndicesType IndicesType; typedef typename Traits::StorageIndexType StorageIndexType; @@ -298,8 +302,10 @@ class PermutationMatrix : public PermutationBase::highest()); + } /** Copy constructor. */ template @@ -370,7 +376,10 @@ class PermutationMatrix : public PermutationBase >& other) : m_indices(other.nestedPermutation().size()) { - for (int i=0; i::highest()); + StorageIndexType end = StorageIndexType(m_indices.size()); + for (StorageIndexType i=0; i PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs) @@ -391,6 +400,7 @@ template,_PacketAccess> > : traits > { + typedef PermutationStorage StorageKind; typedef Map, _PacketAccess> IndicesType; typedef typename IndicesType::Index Index; typedef _StorageIndexType StorageIndexType; @@ -462,8 +472,6 @@ class Map class TranspositionsWrapper; namespace internal { template @@ -477,10 +485,9 @@ struct traits > enum { RowsAtCompileTime = _IndicesType::SizeAtCompileTime, ColsAtCompileTime = _IndicesType::SizeAtCompileTime, - MaxRowsAtCompileTime = IndicesType::MaxRowsAtCompileTime, - MaxColsAtCompileTime = IndicesType::MaxColsAtCompileTime, - Flags = 0, - CoeffReadCost = _IndicesType::CoeffReadCost + MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime, + Flags = 0 }; }; } @@ -509,35 +516,39 @@ class PermutationWrapper : public PermutationBase -inline const internal::permut_matrix_product_retval -operator*(const MatrixBase& matrix, - const PermutationBase &permutation) +template +EIGEN_DEVICE_FUNC +const Product +operator*(const MatrixBase &matrix, + const PermutationBase& permutation) { - return internal::permut_matrix_product_retval - - (permutation.derived(), matrix.derived()); + return Product + (matrix.derived(), permutation.derived()); } /** \returns the matrix with the permutation applied to the rows. */ -template -inline const internal::permut_matrix_product_retval - +template +EIGEN_DEVICE_FUNC +const Product operator*(const PermutationBase &permutation, - const MatrixBase& matrix) + const MatrixBase& matrix) { - return internal::permut_matrix_product_retval - - (permutation.derived(), matrix.derived()); + return Product + (permutation.derived(), matrix.derived()); } namespace internal { template struct traits > + : traits { typedef typename MatrixType::PlainObject ReturnType; }; @@ -590,7 +601,7 @@ struct permut_matrix_product_retval } else { - for(int i = 0; i < n; ++i) + for(Index i = 0; i < n; ++i) { Block (dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i) @@ -617,6 +628,8 @@ struct traits > > } // end namespace internal +// TODO: the specificties should be handled by the evaluator, +// at the very least we should only specialize TransposeImpl template class Transpose > : public EigenBase > > @@ -631,26 +644,26 @@ class Transpose > typedef typename Derived::DenseMatrixType DenseMatrixType; enum { Flags = Traits::Flags, - CoeffReadCost = Traits::CoeffReadCost, RowsAtCompileTime = Traits::RowsAtCompileTime, ColsAtCompileTime = Traits::ColsAtCompileTime, MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, MaxColsAtCompileTime = Traits::MaxColsAtCompileTime }; typedef typename Traits::Scalar Scalar; + typedef typename Traits::Index Index; #endif Transpose(const PermutationType& p) : m_permutation(p) {} - inline int rows() const { return m_permutation.rows(); } - inline int cols() const { return m_permutation.cols(); } + inline Index rows() const { return m_permutation.rows(); } + inline Index cols() const { return m_permutation.cols(); } #ifndef EIGEN_PARSED_BY_DOXYGEN template void evalTo(MatrixBase& other) const { other.setZero(); - for (int i=0; i > /** \returns the matrix with the inverse permutation applied to the columns. */ template friend - inline const internal::permut_matrix_product_retval + const Product operator*(const MatrixBase& matrix, const Transpose& trPerm) { - return internal::permut_matrix_product_retval(trPerm.m_permutation, matrix.derived()); + return Product(matrix.derived(), trPerm.derived()); } /** \returns the matrix with the inverse permutation applied to the rows. */ template - inline const internal::permut_matrix_product_retval + const Product operator*(const MatrixBase& matrix) const { - return internal::permut_matrix_product_retval(m_permutation, matrix.derived()); + return Product(*this, matrix.derived()); } const PermutationType& nestedPermutation() const { return m_permutation; } @@ -690,6 +703,38 @@ const PermutationWrapper MatrixBase::asPermutation() con return derived(); } +namespace internal { + +// TODO currently a permutation matrix expression has the form PermutationMatrix or PermutationWrapper +// or their transpose; in the future shape should be defined by the expression traits +template +struct evaluator_traits > +{ + typedef typename storage_kind_to_evaluator_kind::Kind Kind; + typedef PermutationShape Shape; + static const int AssumeAliasing = 0; +}; + +template +struct evaluator_traits > +{ + typedef typename storage_kind_to_evaluator_kind::Kind Kind; + typedef PermutationShape Shape; + static const int AssumeAliasing = 0; +}; + +template +struct evaluator_traits > > +{ + typedef typename storage_kind_to_evaluator_kind::Kind Kind; + typedef PermutationShape Shape; + static const int AssumeAliasing = 0; +}; + +template<> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_PERMUTATIONMATRIX_H diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 69f34bd3e..06e326a05 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -128,7 +128,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type DenseStorage m_storage; public: - enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits::Flags & AlignedBit) != 0 }; + enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits::EvaluatorFlags & AlignedBit) != 0 }; EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) EIGEN_DEVICE_FUNC @@ -221,11 +221,11 @@ class PlainObjectBase : public internal::dense_xpr_base::type } /** \returns a const pointer to the data array of this matrix */ - EIGEN_STRONG_INLINE const Scalar *data() const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } /** \returns a pointer to the data array of this matrix */ - EIGEN_STRONG_INLINE Scalar *data() + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } /** Resizes \c *this to a \a rows x \a cols matrix. @@ -457,7 +457,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type // FIXME is it still needed ? /** \internal */ EIGEN_DEVICE_FUNC - PlainObjectBase(internal::constructor_without_unaligned_array_assert) + explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert) : m_storage(internal::constructor_without_unaligned_array_assert()) { // _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED @@ -639,22 +639,16 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \internal */ + // aliasing is dealt once in internall::call_assignment + // so at this stage we have to assume aliasing... and resising has to be done later. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& _set(const DenseBase& other) { - _set_selector(other.derived(), typename internal::conditional(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type()); + internal::call_assignment(this->derived(), other.derived()); return this->derived(); } - template - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); } - - template - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); } - /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which * is the case when creating a new matrix) so one can enforce lazy evaluation. * @@ -669,7 +663,8 @@ class PlainObjectBase : public internal::dense_xpr_base::type //_resize_to_match(other); // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because // it wouldn't allow to copy a row-vector into a column-vector. - return internal::assign_selector::run(this->derived(), other.derived()); + internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op()); + return this->derived(); } template @@ -704,9 +699,12 @@ class PlainObjectBase : public internal::dense_xpr_base::type m_storage.data()[1] = Scalar(val1); } + // The argument is convertible to the Index type and we either have a non 1x1 Matrix, or a dynamic-sized Array, + // then the argument is meant to be the size of the object. template EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if::value,T>::type* = 0) + EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if< (Base::SizeAtCompileTime!=1 || !internal::is_convertible::value) + && ((!internal::is_same::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0) { // NOTE MSVC 2008 complains if we directly put bool(NumTraits::IsInteger) as the EIGEN_STATIC_ASSERT argument. const bool is_integer = NumTraits::IsInteger; @@ -714,6 +712,8 @@ class PlainObjectBase : public internal::dense_xpr_base::type FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) resize(size); } + + // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type can be implicitely converted) template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if::value,T>::type* = 0) @@ -722,6 +722,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type m_storage.data()[0] = val0; } + // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type match the index type) template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const Index& val0, @@ -734,18 +735,21 @@ class PlainObjectBase : public internal::dense_xpr_base::type m_storage.data()[0] = Scalar(val0); } + // Initialize a fixed size matrix from a pointer to raw data template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const Scalar* data){ this->_set_noalias(ConstMapType(data)); } + // Initialize an arbitrary matrix from a dense expression template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const DenseBase& other){ this->_set_noalias(other); } + // Initialize an arbitrary matrix from a generic Eigen expression template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const EigenBase& other){ @@ -766,23 +770,58 @@ class PlainObjectBase : public internal::dense_xpr_base::type { this->derived() = r; } - + + // For fixed -size arrays: + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar& val0, + typename internal::enable_if< Base::SizeAtCompileTime!=Dynamic + && Base::SizeAtCompileTime!=1 + && internal::is_convertible::value + && internal::is_same::XprKind,ArrayXpr>::value,T>::type* = 0) + { + Base::setConstant(val0); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Index& val0, + typename internal::enable_if< (!internal::is_same::value) + && (internal::is_same::value) + && Base::SizeAtCompileTime!=Dynamic + && Base::SizeAtCompileTime!=1 + && internal::is_convertible::value + && internal::is_same::XprKind,ArrayXpr>::value,T*>::type* = 0) + { + Base::setConstant(val0); + } + template friend struct internal::matrix_swap_impl; - /** \internal generic implementation of swap for dense storage since for dynamic-sized matrices of same type it is enough to swap the - * data pointers. + public: + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal + * \brief Override DenseBase::swap() since for dynamic-sized matrices + * of same type it is enough to swap the data pointers. */ template EIGEN_DEVICE_FUNC - void _swap(DenseBase const & other) + void swap(DenseBase & other) { enum { SwapPointers = internal::is_same::value && Base::SizeAtCompileTime==Dynamic }; - internal::matrix_swap_impl::run(this->derived(), other.const_cast_derived()); + internal::matrix_swap_impl::run(this->derived(), other.derived()); } - - public: -#ifndef EIGEN_PARSED_BY_DOXYGEN + + /** \internal + * \brief const version forwarded to DenseBase::swap + */ + template + EIGEN_DEVICE_FUNC + void swap(DenseBase const & other) + { Base::swap(other.derived()); } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void _check_template_params() { @@ -797,10 +836,9 @@ class PlainObjectBase : public internal::dense_xpr_base::type && (Options & (DontAlign|RowMajor)) == Options), INVALID_MATRIX_TEMPLATE_PARAMETERS) } -#endif -private: - enum { ThisConstantIsPrivateInPlainObjectBase }; + enum { IsPlainObjectBase = 1 }; +#endif }; namespace internal { diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 5d3789be7..cb79543ef 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -12,8 +12,7 @@ namespace Eigen { -template class Product; -template class ProductImpl; +template class ProductImpl; /** \class Product * \ingroup Core_Module @@ -24,53 +23,108 @@ template class ProductImpl; * \param Rhs the type of the right-hand side expression * * This class represents an expression of the product of two arbitrary matrices. + * + * The other template parameters are: + * \tparam Option can be DefaultProduct or LazyProduct * */ -// Use ProductReturnType to get correct traits, in particular vectorization flags + namespace internal { -template -struct traits > - : traits::Type> -{ - // We want A+B*C to be of type Product and not Product - // TODO: This flag should eventually go in a separate evaluator traits class + +// Determine the scalar of Product. This is normally the same as Lhs::Scalar times +// Rhs::Scalar, but product with permutation matrices inherit the scalar of the other factor. +template::Shape, + typename RhsShape = typename evaluator_traits::Shape > +struct product_result_scalar +{ + typedef typename scalar_product_traits::ReturnType Scalar; +}; + +template +struct product_result_scalar +{ + typedef typename Rhs::Scalar Scalar; +}; + +template + struct product_result_scalar +{ + typedef typename Lhs::Scalar Scalar; +}; + +template +struct traits > +{ + typedef typename remove_all::type LhsCleaned; + typedef typename remove_all::type RhsCleaned; + typedef traits LhsTraits; + typedef traits RhsTraits; + + typedef MatrixXpr XprKind; + + typedef typename product_result_scalar::Scalar Scalar; + typedef typename product_promote_storage_type::ret>::ret StorageKind; + typedef typename promote_index_type::type Index; + enum { - Flags = traits::Type>::Flags & ~(EvalBeforeNestingBit | DirectAccessBit) + RowsAtCompileTime = LhsTraits::RowsAtCompileTime, + ColsAtCompileTime = RhsTraits::ColsAtCompileTime, + MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime, + + // FIXME: only needed by GeneralMatrixMatrixTriangular + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime), + + // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator. + Flags = ( (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) + || ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit)) + || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) + ? RowMajorBit : (MaxColsAtCompileTime==1 ? 0 : NoPreferredStorageOrderBit) }; }; + } // end namespace internal -template -class Product : public ProductImpl::StorageKind, - typename internal::traits::StorageKind>::ret> +template +class Product : public ProductImpl<_Lhs,_Rhs,Option, + typename internal::product_promote_storage_type::StorageKind, + typename internal::traits<_Rhs>::StorageKind, + internal::product_type<_Lhs,_Rhs>::ret>::ret> { public: + typedef _Lhs Lhs; + typedef _Rhs Rhs; + typedef typename ProductImpl< - Lhs, Rhs, - typename internal::promote_storage_type::ret>::Base Base; + Lhs, Rhs, Option, + typename internal::product_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + internal::product_type::ret>::ret>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(Product) - typedef typename Lhs::Nested LhsNested; - typedef typename Rhs::Nested RhsNested; + typedef typename internal::nested::type LhsNested; + typedef typename internal::nested::type RhsNested; typedef typename internal::remove_all::type LhsNestedCleaned; typedef typename internal::remove_all::type RhsNestedCleaned; - Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) + EIGEN_DEVICE_FUNC Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { eigen_assert(lhs.cols() == rhs.rows() && "invalid matrix product" && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); } - inline Index rows() const { return m_lhs.rows(); } - inline Index cols() const { return m_rhs.cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); } - const LhsNestedCleaned& lhs() const { return m_lhs; } - const RhsNestedCleaned& rhs() const { return m_rhs; } + EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; } + EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; } protected: @@ -78,14 +132,77 @@ class Product : public ProductImpl -class ProductImpl : public internal::dense_xpr_base >::type -{ - typedef Product Derived; - public: +namespace internal { + +template::ret> +class dense_product_base + : public internal::dense_xpr_base >::type +{}; - typedef typename internal::dense_xpr_base >::type Base; +/** Convertion to scalar for inner-products */ +template +class dense_product_base + : public internal::dense_xpr_base >::type +{ + typedef Product ProductXpr; + typedef typename internal::dense_xpr_base::type Base; +public: + using Base::derived; + typedef typename Base::Scalar Scalar; + typedef typename Base::Index Index; + + operator const Scalar() const + { + return typename internal::evaluator::type(derived()).coeff(0,0); + } +}; + +} // namespace internal + +// Generic API dispatcher +template +class ProductImpl : public internal::generic_xpr_base, MatrixXpr, StorageKind>::type +{ + public: + typedef typename internal::generic_xpr_base, MatrixXpr, StorageKind>::type Base; +}; + +template +class ProductImpl + : public internal::dense_product_base +{ + typedef Product Derived; + + public: + + typedef typename internal::dense_product_base Base; EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + protected: + enum { + IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) && + (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic), + EnableCoeff = IsOneByOne || Option==LazyProduct + }; + + public: + + EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const + { + EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); + eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); + + return typename internal::evaluator::type(derived()).coeff(row,col); + } + + EIGEN_DEVICE_FUNC Scalar coeff(Index i) const + { + EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); + eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); + + return typename internal::evaluator::type(derived()).coeff(i); + } + + }; /*************************************************************************** @@ -102,6 +219,15 @@ prod(const Lhs& lhs, const Rhs& rhs) return Product(lhs,rhs); } +/** \internal used to test the evaluator only + */ +template +const Product +lazyprod(const Lhs& lhs, const Rhs& rhs) +{ + return Product(lhs,rhs); +} + } // end namespace Eigen #endif // EIGEN_PRODUCT_H diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h index 483914a9b..050343b2d 100644 --- a/Eigen/src/Core/ProductBase.h +++ b/Eigen/src/Core/ProductBase.h @@ -12,253 +12,6 @@ namespace Eigen { -/** \class ProductBase - * \ingroup Core_Module - * - */ - -namespace internal { -template -struct traits > -{ - typedef MatrixXpr XprKind; - typedef typename remove_all<_Lhs>::type Lhs; - typedef typename remove_all<_Rhs>::type Rhs; - typedef typename scalar_product_traits::ReturnType Scalar; - typedef typename promote_storage_type::StorageKind, - typename traits::StorageKind>::ret StorageKind; - typedef typename promote_index_type::Index, - typename traits::Index>::type Index; - enum { - RowsAtCompileTime = traits::RowsAtCompileTime, - ColsAtCompileTime = traits::ColsAtCompileTime, - MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, - MaxColsAtCompileTime = traits::MaxColsAtCompileTime, - Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0) - | EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit, - // Note that EvalBeforeNestingBit and NestByRefBit - // are not used in practice because nested is overloaded for products - CoeffReadCost = 0 // FIXME why is it needed ? - }; -}; -} - -#define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived) \ - typedef ProductBase Base; \ - EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \ - typedef typename Base::LhsNested LhsNested; \ - typedef typename Base::_LhsNested _LhsNested; \ - typedef typename Base::LhsBlasTraits LhsBlasTraits; \ - typedef typename Base::ActualLhsType ActualLhsType; \ - typedef typename Base::_ActualLhsType _ActualLhsType; \ - typedef typename Base::RhsNested RhsNested; \ - typedef typename Base::_RhsNested _RhsNested; \ - typedef typename Base::RhsBlasTraits RhsBlasTraits; \ - typedef typename Base::ActualRhsType ActualRhsType; \ - typedef typename Base::_ActualRhsType _ActualRhsType; \ - using Base::m_lhs; \ - using Base::m_rhs; - -template -class ProductBase : public MatrixBase -{ - public: - typedef MatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(ProductBase) - - typedef typename Lhs::Nested LhsNested; - typedef typename internal::remove_all::type _LhsNested; - typedef internal::blas_traits<_LhsNested> LhsBlasTraits; - typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; - typedef typename internal::remove_all::type _ActualLhsType; - typedef typename internal::traits::Scalar LhsScalar; - - typedef typename Rhs::Nested RhsNested; - typedef typename internal::remove_all::type _RhsNested; - typedef internal::blas_traits<_RhsNested> RhsBlasTraits; - typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; - typedef typename internal::remove_all::type _ActualRhsType; - typedef typename internal::traits::Scalar RhsScalar; - - // Diagonal of a product: no need to evaluate the arguments because they are going to be evaluated only once - typedef CoeffBasedProduct FullyLazyCoeffBaseProductType; - - public: - - typedef typename Base::PlainObject PlainObject; - - ProductBase(const Lhs& a_lhs, const Rhs& a_rhs) - : m_lhs(a_lhs), m_rhs(a_rhs) - { - eigen_assert(a_lhs.cols() == a_rhs.rows() - && "invalid matrix product" - && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); - } - - inline Index rows() const { return m_lhs.rows(); } - inline Index cols() const { return m_rhs.cols(); } - - template - inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); } - - template - inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); } - - template - inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); } - - template - inline void scaleAndAddTo(Dest& dst, const Scalar& alpha) const { derived().scaleAndAddTo(dst,alpha); } - - const _LhsNested& lhs() const { return m_lhs; } - const _RhsNested& rhs() const { return m_rhs; } - - // Implicit conversion to the nested type (trigger the evaluation of the product) - operator const PlainObject& () const - { - m_result.resize(m_lhs.rows(), m_rhs.cols()); - derived().evalTo(m_result); - return m_result; - } - - const Diagonal diagonal() const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); } - - template - const Diagonal diagonal() const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); } - - const Diagonal diagonal(Index index) const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); } - - // restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isn't an Lvalue expression - typename Base::CoeffReturnType coeff(Index row, Index col) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - Matrix result = *this; - return result.coeff(row,col); - } - - typename Base::CoeffReturnType coeff(Index i) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - Matrix result = *this; - return result.coeff(i); - } - - const Scalar& coeffRef(Index row, Index col) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - return derived().coeffRef(row,col); - } - - const Scalar& coeffRef(Index i) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - return derived().coeffRef(i); - } - - protected: - - LhsNested m_lhs; - RhsNested m_rhs; - - mutable PlainObject m_result; -}; - -// here we need to overload the nested rule for products -// such that the nested type is a const reference to a plain matrix -namespace internal { -template -struct nested, N, PlainObject> -{ - typedef PlainObject const& type; -}; -} - -template -class ScaledProduct; - -// Note that these two operator* functions are not defined as member -// functions of ProductBase, because, otherwise we would have to -// define all overloads defined in MatrixBase. Furthermore, Using -// "using Base::operator*" would not work with MSVC. -// -// Also note that here we accept any compatible scalar types -template -const ScaledProduct -operator*(const ProductBase& prod, const typename Derived::Scalar& x) -{ return ScaledProduct(prod.derived(), x); } - -template -typename internal::enable_if::value, - const ScaledProduct >::type -operator*(const ProductBase& prod, const typename Derived::RealScalar& x) -{ return ScaledProduct(prod.derived(), x); } - - -template -const ScaledProduct -operator*(const typename Derived::Scalar& x,const ProductBase& prod) -{ return ScaledProduct(prod.derived(), x); } - -template -typename internal::enable_if::value, - const ScaledProduct >::type -operator*(const typename Derived::RealScalar& x,const ProductBase& prod) -{ return ScaledProduct(prod.derived(), x); } - -namespace internal { -template -struct traits > - : traits, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> > -{ - typedef typename traits::StorageKind StorageKind; -}; -} - -template -class ScaledProduct - : public ProductBase, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> -{ - public: - typedef ProductBase, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> Base; - typedef typename Base::Scalar Scalar; - typedef typename Base::PlainObject PlainObject; -// EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct) - - ScaledProduct(const NestedProduct& prod, const Scalar& x) - : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {} - - template - inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); } - - template - inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); } - - template - inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); } - - template - inline void scaleAndAddTo(Dest& dst, const Scalar& a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); } - - const Scalar& alpha() const { return m_alpha; } - - protected: - const NestedProduct& m_prod; - Scalar m_alpha; -}; - /** \internal * Overloaded to perform an efficient C = (A*B).lazy() */ template diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 855914f2e..488eee00c 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -16,95 +16,347 @@ namespace Eigen { namespace internal { + +/** \internal + * Evaluator of a product expression. + * Since products require special treatments to handle all possible cases, + * we simply deffer the evaluation logic to a product_evaluator class + * which offers more partial specialization possibilities. + * + * \sa class product_evaluator + */ +template +struct evaluator > + : public product_evaluator > +{ + typedef Product XprType; + typedef product_evaluator Base; -// We can evaluate the product either all at once, like GeneralProduct and its evalTo() function, or -// traverse the matrix coefficient by coefficient, like CoeffBasedProduct. Use the existing logic -// in ProductReturnType to decide. - -template -struct product_evaluator_dispatcher; - -template -struct evaluator_impl > - : product_evaluator_dispatcher, typename ProductReturnType::Type> + typedef evaluator type; + typedef evaluator nestedType; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +// Catch scalar * ( A * B ) and transform it to (A*scalar) * B +// TODO we should apply that rule only if that's really helpful +template +struct evaluator, const Product > > + : public evaluator,const Lhs>, Rhs, DefaultProduct> > { - typedef Product XprType; - typedef product_evaluator_dispatcher::Type> Base; - - evaluator_impl(const XprType& xpr) : Base(xpr) - { } + typedef CwiseUnaryOp, const Product > XprType; + typedef evaluator,const Lhs>, Rhs, DefaultProduct> > Base; + + typedef evaluator type; + typedef evaluator nestedType; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs()) + {} }; -template -struct product_evaluator_traits_dispatcher; -template -struct evaluator_traits > - : product_evaluator_traits_dispatcher, typename ProductReturnType::Type> -{ - static const int AssumeAliasing = 1; +template +struct evaluator, DiagIndex> > + : public evaluator, DiagIndex> > +{ + typedef Diagonal, DiagIndex> XprType; + typedef evaluator, DiagIndex> > Base; + + typedef evaluator type; + typedef evaluator nestedType; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : Base(Diagonal, DiagIndex>( + Product(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), + xpr.index() )) + {} }; -// Case 1: Evaluate all at once -// -// We can view the GeneralProduct class as a part of the product evaluator. -// Four sub-cases: InnerProduct, OuterProduct, GemmProduct and GemvProduct. -// InnerProduct is special because GeneralProduct does not have an evalTo() method in this case. + +// Helper class to perform a matrix product with the destination at hand. +// Depending on the sizes of the factors, there are different evaluation strategies +// as controlled by internal::product_type. +template< typename Lhs, typename Rhs, + typename LhsShape = typename evaluator_traits::Shape, + typename RhsShape = typename evaluator_traits::Shape, + int ProductType = internal::product_type::value> +struct generic_product_impl; template -struct product_evaluator_traits_dispatcher, GeneralProduct > +struct evaluator_traits > + : evaluator_traits_base > { - static const int HasEvalTo = 0; + enum { AssumeAliasing = 1 }; }; -template -struct product_evaluator_dispatcher, GeneralProduct > - : public evaluator::PlainObject>::type +// This is the default evaluator implementation for products: +// It creates a temporary and call generic_product_impl +template +struct product_evaluator, ProductTag, LhsShape, RhsShape, typename traits::Scalar, typename traits::Scalar> + : public evaluator::PlainObject>::type { - typedef Product XprType; + typedef Product XprType; typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator::type evaluator_base; + typedef typename evaluator::type Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit +// CoeffReadCost = 0 // FIXME why is it needed? (this was already the case before the evaluators, see traits) + }; - // TODO: Computation is too early (?) - product_evaluator_dispatcher(const XprType& xpr) : evaluator_base(m_result) + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) { - m_result.coeffRef(0,0) = (xpr.lhs().transpose().cwiseProduct(xpr.rhs())).sum(); + ::new (static_cast(this)) Base(m_result); + +// FIXME shall we handle nested_eval here? +// typedef typename internal::nested_eval::type LhsNested; +// typedef typename internal::nested_eval::type RhsNested; +// typedef typename internal::remove_all::type LhsNestedCleaned; +// typedef typename internal::remove_all::type RhsNestedCleaned; +// +// const LhsNested lhs(xpr.lhs()); +// const RhsNested rhs(xpr.rhs()); +// +// generic_product_impl::evalTo(m_result, lhs, rhs); + + generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); } protected: PlainObject m_result; }; -// For the other three subcases, simply call the evalTo() method of GeneralProduct -// TODO: GeneralProduct should take evaluators, not expression objects. - -template -struct product_evaluator_traits_dispatcher, GeneralProduct > +// Dense = Product +template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> +struct Assignment, internal::assign_op, Dense2Dense, Scalar> { - static const int HasEvalTo = 1; + typedef Product SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { + // FIXME shall we handle nested_eval here? + generic_product_impl::evalTo(dst, src.lhs(), src.rhs()); + } }; -template -struct product_evaluator_dispatcher, GeneralProduct > +// Dense += Product +template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> +struct Assignment, internal::add_assign_op, Dense2Dense, Scalar> { - typedef Product XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator::type evaluator_base; - - product_evaluator_dispatcher(const XprType& xpr) : m_xpr(xpr) - { } - - template - void evalTo(DstEvaluatorType /* not used */, DstXprType& dst) const + typedef Product SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { - dst.resize(m_xpr.rows(), m_xpr.cols()); - GeneralProduct(m_xpr.lhs(), m_xpr.rhs()).evalTo(dst); + // FIXME shall we handle nested_eval here? + generic_product_impl::addTo(dst, src.lhs(), src.rhs()); + } +}; + +// Dense -= Product +template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> +struct Assignment, internal::sub_assign_op, Dense2Dense, Scalar> +{ + typedef Product SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + { + // FIXME shall we handle nested_eval here? + generic_product_impl::subTo(dst, src.lhs(), src.rhs()); + } +}; + + +// Dense ?= scalar * Product +// TODO we should apply that rule if that's really helpful +// for instance, this is not good for inner products +template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis> +struct Assignment, + const Product >, AssignFunc, Dense2Dense, Scalar> +{ + typedef CwiseUnaryOp, + const Product > SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) + { + // TODO use operator* instead of prod() once we have made enough progress + call_assignment(dst.noalias(), prod(src.functor().m_other * src.nestedExpression().lhs(), src.nestedExpression().rhs()), func); + } +}; + + +template +struct generic_product_impl +{ + template + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); } -protected: - const XprType& m_xpr; + template + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); + } + + template + static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } }; + +/*********************************************************************** +* Implementation of outer dense * dense vector product +***********************************************************************/ + +// Column major result +template +EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) +{ + typedef typename Dst::Index Index; + typename evaluator::type rhsEval(rhs); + // FIXME make sure lhs is sequentially stored + // FIXME not very good if rhs is real and lhs complex while alpha is real too + // FIXME we should probably build an evaluator for dst + const Index cols = dst.cols(); + for (Index j=0; j +EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) { + typedef typename Dst::Index Index; + typename evaluator::type lhsEval(lhs); + // FIXME make sure rhs is sequentially stored + // FIXME not very good if lhs is real and rhs complex while alpha is real too + // FIXME we should probably build an evaluator for dst + const Index rows = dst.rows(); + for (Index i=0; i +struct generic_product_impl +{ + template struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; + typedef typename Product::Scalar Scalar; + + // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose + struct set { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; + struct add { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; + struct sub { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; + struct adds { + Scalar m_scale; + explicit adds(const Scalar& s) : m_scale(s) {} + template void operator()(const Dst& dst, const Src& src) const { + dst.const_cast_derived() += m_scale * src; + } + }; + + template + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, set(), IsRowMajor()); + } + + template + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, add(), IsRowMajor()); + } + + template + static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, sub(), IsRowMajor()); + } + + template + static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), IsRowMajor()); + } + +}; + + +// This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo +template +struct generic_product_impl_base +{ + typedef typename Product::Scalar Scalar; + + template + static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } + + template + static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } + + template + static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } + + template + static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } + +}; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; + typedef typename internal::conditional::type MatrixType; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + internal::gemv_dense_sense_selector::HasUsableDirectAccess) + >::run(lhs, rhs, dst, alpha); + } +}; + +template +struct generic_product_impl +{ + typedef typename Product::Scalar Scalar; + + template + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // TODO: use the following instead of calling call_assignment, same for the other methods + // dst = lazyprod(lhs,rhs); + call_assignment(dst, lazyprod(lhs,rhs), internal::assign_op()); + } + + template + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // dst += lazyprod(lhs,rhs); + call_assignment(dst, lazyprod(lhs,rhs), internal::add_assign_op()); + } + + template + static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // dst -= lazyprod(lhs,rhs); + call_assignment(dst, lazyprod(lhs,rhs), internal::sub_assign_op()); + } + +// template +// static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) +// { dst += alpha * lazyprod(lhs,rhs); } +}; + +// This specialization enforces the use of a coefficient-based evaluation strategy +template +struct generic_product_impl + : generic_product_impl {}; + // Case 2: Evaluate coeff by coeff // // This is mostly taken from CoeffBasedProduct.h @@ -117,65 +369,116 @@ struct etor_product_coeff_impl; template struct etor_product_packet_impl; -template -struct product_evaluator_traits_dispatcher, CoeffBasedProduct > +template +struct product_evaluator, ProductTag, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar > + : evaluator_base > { - static const int HasEvalTo = 0; -}; - -template -struct product_evaluator_dispatcher, CoeffBasedProduct > - : evaluator_impl_base > -{ - typedef Product XprType; - typedef CoeffBasedProduct CoeffBasedProductType; - - product_evaluator_dispatcher(const XprType& xpr) - : m_lhsImpl(xpr.lhs()), - m_rhsImpl(xpr.rhs()), - m_innerDim(xpr.lhs().cols()) - { } - + typedef Product XprType; typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::PacketScalar PacketScalar; typedef typename XprType::PacketReturnType PacketReturnType; + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : m_lhs(xpr.lhs()), + m_rhs(xpr.rhs()), + m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that! + m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed, + // or perhaps declare them on the fly on the packet method... We have experiment to check what's best. + m_innerDim(xpr.lhs().cols()) + { } + // Everything below here is taken from CoeffBasedProduct.h + typedef typename internal::nested_eval::type LhsNested; + typedef typename internal::nested_eval::type RhsNested; + + typedef typename internal::remove_all::type LhsNestedCleaned; + typedef typename internal::remove_all::type RhsNestedCleaned; + + typedef typename evaluator::type LhsEtorType; + typedef typename evaluator::type RhsEtorType; + enum { - RowsAtCompileTime = traits::RowsAtCompileTime, + RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, + ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime, + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime), + MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime, + PacketSize = packet_traits::size, - InnerSize = traits::InnerSize, - CoeffReadCost = traits::CoeffReadCost, + + LhsCoeffReadCost = LhsEtorType::CoeffReadCost, + RhsCoeffReadCost = RhsEtorType::CoeffReadCost, + CoeffReadCost = (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits::AddCost==Dynamic || NumTraits::MulCost==Dynamic) ? Dynamic + : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + + (InnerSize - 1) * NumTraits::AddCost, + Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, - CanVectorizeInner = traits::CanVectorizeInner + + LhsFlags = LhsEtorType::Flags, + RhsFlags = RhsEtorType::Flags, + + LhsRowMajor = LhsFlags & RowMajorBit, + RhsRowMajor = RhsFlags & RowMajorBit, + + SameType = is_same::value, + + CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) + && (ColsAtCompileTime == Dynamic + || ( (ColsAtCompileTime % packet_traits::size) == 0 + && (RhsFlags&AlignedBit) + ) + ), + + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) + && (RowsAtCompileTime == Dynamic + || ( (RowsAtCompileTime % packet_traits::size) == 0 + && (LhsFlags&AlignedBit) + ) + ), + + EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 + : (RhsRowMajor && !CanVectorizeLhs), + + Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) + | (EvalToRowMajor ? RowMajorBit : 0) + | (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0) + | (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0) + // TODO enable vectorization for mixed types + | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), + + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside + * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner + * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect + * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. + */ + CanVectorizeInner = SameType + && LhsRowMajor + && (!RhsRowMajor) + && (LhsFlags & RhsFlags & ActualPacketAccessBit) + && (LhsFlags & RhsFlags & AlignedBit) + && (InnerSize % packet_traits::size == 0) }; - - typedef typename evaluator::type LhsEtorType; - typedef typename evaluator::type RhsEtorType; - typedef etor_product_coeff_impl CoeffImpl; - - const CoeffReturnType coeff(Index row, Index col) const + + EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index row, Index col) const { - Scalar res; - CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); - return res; + // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function + return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); } /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, * which is why we don't set the LinearAccessBit. + * TODO: this seems possible when the result is a vector */ - const CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const { - Scalar res; const Index row = RowsAtCompileTime == 1 ? 0 : index; const Index col = RowsAtCompileTime == 1 ? index : 0; - CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); - return res; + // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function + return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); } template @@ -183,158 +486,42 @@ struct product_evaluator_dispatcher, CoeffBasedProduct PacketImpl; + Unroll ? InnerSize-1 : Dynamic, + LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl; + PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); return res; } protected: - typename evaluator::type m_lhsImpl; - typename evaluator::type m_rhsImpl; + const LhsNested m_lhs; + const RhsNested m_rhs; + + LhsEtorType m_lhsImpl; + RhsEtorType m_rhsImpl; // TODO: Get rid of m_innerDim if known at compile time Index m_innerDim; }; -/*************************************************************************** -* Normal product .coeff() implementation (with meta-unrolling) -***************************************************************************/ - -/************************************** -*** Scalar path - no vectorization *** -**************************************/ - -template -struct etor_product_coeff_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res) - { - etor_product_coeff_impl::run(row, col, lhs, rhs, innerDim, res); - res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); - } -}; - -template -struct etor_product_coeff_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res) - { - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - } -}; - -template -struct etor_product_coeff_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res) - { - eigen_assert(innerDim>0 && "you are using a non initialized matrix"); - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - for(Index i = 1; i < innerDim; ++i) - res += lhs.coeff(row, i) * rhs.coeff(i, col); - } -}; - -/******************************************* -*** Scalar path with inner vectorization *** -*******************************************/ - -template -struct etor_product_coeff_vectorized_unroller -{ - typedef typename Lhs::Index Index; - enum { PacketSize = packet_traits::size }; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres) - { - etor_product_coeff_vectorized_unroller::run(row, col, lhs, rhs, innerDim, pres); - pres = padd(pres, pmul( lhs.template packet(row, UnrollingIndex) , rhs.template packet(UnrollingIndex, col) )); - } -}; - -template -struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres) - { - pres = pmul(lhs.template packet(row, 0) , rhs.template packet(0, col)); - } -}; - -template -struct etor_product_coeff_impl -{ - typedef typename Lhs::PacketScalar Packet; - typedef typename Lhs::Index Index; - enum { PacketSize = packet_traits::size }; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res) - { - Packet pres; - etor_product_coeff_vectorized_unroller::run(row, col, lhs, rhs, innerDim, pres); - etor_product_coeff_impl::run(row, col, lhs, rhs, innerDim, res); - res = predux(pres); - } -}; - -template -struct etor_product_coeff_vectorized_dyn_selector -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) - { - res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); - } -}; - -// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower -// NOTE maybe they are now useless since we have a specialization for Block -template -struct etor_product_coeff_vectorized_dyn_selector -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) - { - res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); - } -}; - -template -struct etor_product_coeff_vectorized_dyn_selector -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) - { - res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); - } -}; - template -struct etor_product_coeff_vectorized_dyn_selector +struct product_evaluator, LazyCoeffBasedProductMode, DenseShape, DenseShape, typename traits::Scalar, typename traits::Scalar > + : product_evaluator, CoeffBasedProductMode, DenseShape, DenseShape, typename traits::Scalar, typename traits::Scalar > { - typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) - { - res = lhs.transpose().cwiseProduct(rhs).sum(); - } + typedef Product XprType; + typedef Product BaseProduct; + typedef product_evaluator Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit + }; + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(BaseProduct(xpr.lhs(),xpr.rhs())) + {} }; -template -struct etor_product_coeff_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res) - { - etor_product_coeff_vectorized_dyn_selector::run(row, col, lhs, rhs, innerDim, res); - } -}; - -/******************* -*** Packet path *** -*******************/ +/**************************************** +*** Coeff based product, Packet path *** +****************************************/ template struct etor_product_packet_impl @@ -404,6 +591,280 @@ struct etor_product_packet_impl } }; + +/*************************************************************************** +* Triangular products +***************************************************************************/ +template +struct triangular_product_impl; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + triangular_product_impl + ::run(dst, lhs.nestedExpression(), rhs, alpha); + } +}; + +template +struct generic_product_impl +: generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + triangular_product_impl::run(dst, lhs, rhs.nestedExpression(), alpha); + } +}; + + +/*************************************************************************** +* SelfAdjoint products +***************************************************************************/ +template +struct selfadjoint_product_impl; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + selfadjoint_product_impl::run(dst, lhs.nestedExpression(), rhs, alpha); + } +}; + +template +struct generic_product_impl +: generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + selfadjoint_product_impl::run(dst, lhs, rhs.nestedExpression(), alpha); + } +}; + + +/*************************************************************************** +* Diagonal products +***************************************************************************/ + +template +struct diagonal_product_evaluator_base + : evaluator_base +{ + typedef typename MatrixType::Index Index; + typedef typename scalar_product_traits::ReturnType Scalar; + typedef typename internal::packet_traits::type PacketScalar; +public: + enum { + CoeffReadCost = NumTraits::MulCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost, + + MatrixFlags = evaluator::Flags, + DiagFlags = evaluator::Flags, + _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor, + _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) + ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), + _SameTypes = is_same::value, + // FIXME currently we need same types, but in the future the next rule should be the one + //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), + _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), + _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, + Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit + //(int(MatrixFlags)&int(DiagFlags)&AlignedBit), + }; + + diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) + : m_diagImpl(diag), m_matImpl(mat) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const + { + return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); + } + +protected: + template + EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const + { + return internal::pmul(m_matImpl.template packet(row, col), + internal::pset1(m_diagImpl.coeff(id))); + } + + template + EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const + { + enum { + InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, + DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagFlags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned) + }; + return internal::pmul(m_matImpl.template packet(row, col), + m_diagImpl.template packet(id)); + } + + typename evaluator::nestedType m_diagImpl; + typename evaluator::nestedType m_matImpl; +}; + +// diagonal * dense +template +struct product_evaluator, ProductTag, DiagonalShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar> + : diagonal_product_evaluator_base, OnTheLeft> +{ + typedef diagonal_product_evaluator_base, OnTheLeft> Base; + using Base::m_diagImpl; + using Base::m_matImpl; + using Base::coeff; + using Base::packet_impl; + typedef typename Base::Scalar Scalar; + typedef typename Base::Index Index; + typedef typename Base::PacketScalar PacketScalar; + + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + + enum { + StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor + }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(xpr.rhs(), xpr.lhs().diagonal()) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + { + return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col); + } + +#ifndef __CUDACC__ + template + EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const + { + // NVCC complains about template keyword, so we disable this function in CUDA mode + return this->template packet_impl(row,col, row, + typename internal::conditional::type()); + } + + template + EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const + { + return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } +#endif +}; + +// dense * diagonal +template +struct product_evaluator, ProductTag, DenseShape, DiagonalShape, typename Lhs::Scalar, typename Rhs::Scalar> + : diagonal_product_evaluator_base, OnTheRight> +{ + typedef diagonal_product_evaluator_base, OnTheRight> Base; + using Base::m_diagImpl; + using Base::m_matImpl; + using Base::coeff; + using Base::packet_impl; + typedef typename Base::Scalar Scalar; + typedef typename Base::Index Index; + typedef typename Base::PacketScalar PacketScalar; + + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + + enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(xpr.lhs(), xpr.rhs().diagonal()) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + { + return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col); + } + +#ifndef __CUDACC__ + template + EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const + { + return this->template packet_impl(row,col, col, + typename internal::conditional::type()); + } + + template + EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const + { + return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } +#endif +}; + +/*************************************************************************** +* Products with permutation matrices +***************************************************************************/ + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + permut_matrix_product_retval pmpr(lhs, rhs); + pmpr.evalTo(dst); + } +}; + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + permut_matrix_product_retval pmpr(rhs, lhs); + pmpr.evalTo(dst); + } +}; + +template +struct generic_product_impl, Rhs, PermutationShape, DenseShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Transpose& lhs, const Rhs& rhs) + { + permut_matrix_product_retval pmpr(lhs.nestedPermutation(), rhs); + pmpr.evalTo(dst); + } +}; + +template +struct generic_product_impl, DenseShape, PermutationShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Transpose& rhs) + { + permut_matrix_product_retval pmpr(rhs.nestedPermutation(), lhs); + pmpr.evalTo(dst); + } +}; + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index c626946ba..f6546917e 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -65,6 +65,25 @@ public: ? CompleteUnrolling : NoUnrolling }; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() + { + std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl; + std::cerr.setf(std::ios::hex, std::ios::basefield); + EIGEN_DEBUG_VAR(Derived::Flags) + std::cerr.unsetf(std::ios::hex); + EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(PacketSize) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + EIGEN_DEBUG_VAR(Traversal) + EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(Unrolling) + std::cerr << std::endl; + } +#endif }; /*************************************************************************** @@ -174,7 +193,7 @@ struct redux_impl typedef typename Derived::Scalar Scalar; typedef typename Derived::Index Index; EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) + static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); Scalar res; @@ -200,10 +219,10 @@ struct redux_impl typedef typename packet_traits::type PacketScalar; typedef typename Derived::Index Index; - static Scalar run(const Derived& mat, const Func& func) + static Scalar run(const Derived &mat, const Func& func) { const Index size = mat.size(); - eigen_assert(size && "you are using an empty matrix"); + const Index packetSize = packet_traits::size; const Index alignedStart = internal::first_aligned(mat); enum { @@ -258,7 +277,7 @@ struct redux_impl typedef typename packet_traits::type PacketScalar; typedef typename Derived::Index Index; - static Scalar run(const Derived& mat, const Func& func) + EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); const Index innerSize = mat.innerSize(); @@ -300,7 +319,7 @@ struct redux_impl Size = Derived::SizeAtCompileTime, VectorizedSize = (Size / PacketSize) * PacketSize }; - static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); if (VectorizedSize > 0) { @@ -315,6 +334,66 @@ struct redux_impl } }; +// evaluator adaptor +template +class redux_evaluator +{ +public: + typedef _XprType XprType; + EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketScalar PacketScalar; + typedef typename XprType::PacketReturnType PacketReturnType; + + enum { + MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = XprType::MaxColsAtCompileTime, + // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator + Flags = evaluator::Flags & ~DirectAccessBit, + IsRowMajor = XprType::IsRowMajor, + SizeAtCompileTime = XprType::SizeAtCompileTime, + InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime, + CoeffReadCost = evaluator::CoeffReadCost + }; + + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } + EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); } + EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); } + EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeff(Index row, Index col) const + { return m_evaluator.coeff(row, col); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeff(Index index) const + { return m_evaluator.coeff(index); } + + template + PacketReturnType packet(Index row, Index col) const + { return m_evaluator.template packet(row, col); } + + template + PacketReturnType packet(Index index) const + { return m_evaluator.template packet(index); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeffByOuterInner(Index outer, Index inner) const + { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + + template + PacketReturnType packetByOuterInner(Index outer, Index inner) const + { return m_evaluator.template packet(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + +protected: + typename internal::evaluator::nestedType m_evaluator; + const XprType &m_xpr; +}; + } // end namespace internal /*************************************************************************** @@ -325,7 +404,7 @@ struct redux_impl /** \returns the result of a full redux operation on the whole matrix or vector using \a func * * The template parameter \a BinaryOp is the type of the functor \a func which must be - * an associative operator. Both current STL and TR1 functor styles are handled. + * an associative operator. Both current C++98 and C++11 functor styles are handled. * * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise() */ @@ -334,9 +413,22 @@ template EIGEN_STRONG_INLINE typename internal::result_of::Scalar)>::type DenseBase::redux(const Func& func) const { - typedef typename internal::remove_all::type ThisNested; - return internal::redux_impl - ::run(derived(), func); + eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); + + // FIXME, eval_nest should be handled by redux_evaluator, however: + // - it is currently difficult to provide the right Flags since they are still handled by the expressions + // - handling it here might reduce the number of template instantiations +// typedef typename internal::nested_eval::type ThisNested; +// typedef typename internal::remove_all::type ThisNestedCleaned; +// typedef typename internal::redux_evaluator ThisEvaluator; +// +// ThisNested thisNested(derived()); +// ThisEvaluator thisEval(thisNested); + + typedef typename internal::redux_evaluator ThisEvaluator; + ThisEvaluator thisEval(derived()); + + return internal::redux_impl::run(thisEval, func); } /** \returns the minimum of all coefficients of \c *this. @@ -346,7 +438,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::minCoeff() const { - return this->redux(Eigen::internal::scalar_min_op()); + return derived().redux(Eigen::internal::scalar_min_op()); } /** \returns the maximum of all coefficients of \c *this. @@ -356,7 +448,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::maxCoeff() const { - return this->redux(Eigen::internal::scalar_max_op()); + return derived().redux(Eigen::internal::scalar_max_op()); } /** \returns the sum of all coefficients of *this @@ -369,7 +461,7 @@ DenseBase::sum() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) return Scalar(0); - return this->redux(Eigen::internal::scalar_sum_op()); + return derived().redux(Eigen::internal::scalar_sum_op()); } /** \returns the mean of all coefficients of *this @@ -380,7 +472,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::mean() const { - return Scalar(this->redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); + return Scalar(derived().redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); } /** \returns the product of all coefficients of *this @@ -396,7 +488,7 @@ DenseBase::prod() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) return Scalar(1); - return this->redux(Eigen::internal::scalar_product_op()); + return derived().redux(Eigen::internal::scalar_product_op()); } /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index 92614c6e2..6e6adbd31 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -12,10 +12,6 @@ namespace Eigen { -template class RefBase; -template,OuterStride<> >::type > class Ref; - /** \class Ref * \ingroup Core_Module * @@ -131,12 +127,12 @@ public: typedef MapBase Base; EIGEN_DENSE_PUBLIC_INTERFACE(RefBase) - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; } - inline Index outerStride() const + EIGEN_DEVICE_FUNC inline Index outerStride() const { return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() : IsVectorAtCompileTime ? this->size() @@ -144,7 +140,7 @@ public: : this->rows(); } - RefBase() + EIGEN_DEVICE_FUNC RefBase() : Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime), // Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values: m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime, @@ -158,7 +154,7 @@ protected: typedef Stride StrideBase; template - void construct(Expression& expr) + EIGEN_DEVICE_FUNC void construct(Expression& expr) { if(PlainObjectType::RowsAtCompileTime==1) { @@ -188,6 +184,8 @@ template class Ref : public RefBase > { typedef internal::traits Traits; + template + EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase& expr); public: typedef RefBase Base; @@ -196,20 +194,21 @@ template class Ref #ifndef EIGEN_PARSED_BY_DOXYGEN template - inline Ref(PlainObjectBase& expr, - typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) + EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase& expr) { - Base::construct(expr); + EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + Base::construct(expr.derived()); } template - inline Ref(const DenseBase& expr, - typename internal::enable_if::value&&bool(Traits::template match::MatchAtCompileTime)),Derived>::type* = 0, - int = Derived::ThisConstantIsPrivateInPlainObjectBase) + EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr) #else template inline Ref(DenseBase& expr) #endif { + EIGEN_STATIC_ASSERT(bool(internal::is_lvalue::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); Base::construct(expr.const_cast_derived()); } @@ -228,7 +227,7 @@ template class Ref< EIGEN_DENSE_PUBLIC_INTERFACE(Ref) template - inline Ref(const DenseBase& expr) + EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr) { // std::cout << match_helper::HasDirectAccess << "," << match_helper::OuterStrideMatch << "," << match_helper::InnerStrideMatch << "\n"; // std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n"; @@ -236,18 +235,27 @@ template class Ref< construct(expr.derived(), typename Traits::template match::type()); } + EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) { + // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy + } + + template + EIGEN_DEVICE_FUNC inline Ref(const RefBase& other) { + construct(other.derived(), typename Traits::template match::type()); + } + protected: template - void construct(const Expression& expr,internal::true_type) + EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type) { Base::construct(expr); } template - void construct(const Expression& expr, internal::false_type) + EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type) { - m_object.lazyAssign(expr); + internal::call_assignment_no_alias(m_object,expr,internal::assign_op()); Base::construct(m_object); } diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h index dde86a834..3777049ee 100644 --- a/Eigen/src/Core/Replicate.h +++ b/Eigen/src/Core/Replicate.h @@ -53,8 +53,9 @@ struct traits > IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1 : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0 : (MatrixType::Flags & RowMajorBit) ? 1 : 0, - Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0), - CoeffReadCost = _MatrixTypeNested::CoeffReadCost + + // FIXME enable DirectAccess with negative strides? + Flags = IsRowMajor ? RowMajorBit : 0 }; }; } @@ -68,6 +69,7 @@ template class Replicate typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Replicate) + typedef typename internal::remove_all::type NestedExpression; template inline explicit Replicate(const OriginalMatrixType& a_matrix) diff --git a/Eigen/src/Core/ReturnByValue.h b/Eigen/src/Core/ReturnByValue.h index 7834f6cbc..af01a5567 100644 --- a/Eigen/src/Core/ReturnByValue.h +++ b/Eigen/src/Core/ReturnByValue.h @@ -38,9 +38,10 @@ struct traits > * So internal::nested always gives the plain return matrix type. * * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ?? + * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators */ template -struct nested, n, PlainObject> +struct nested_eval, n, PlainObject> { typedef typename traits::ReturnType type; }; @@ -48,7 +49,7 @@ struct nested, n, PlainObject> } // end namespace internal template class ReturnByValue - : internal::no_assignment_operator, public internal::dense_xpr_base< ReturnByValue >::type + : public internal::dense_xpr_base< ReturnByValue >::type, internal::no_assignment_operator { public: typedef typename internal::traits::ReturnType ReturnType; @@ -73,6 +74,7 @@ template class ReturnByValue const Unusable& coeff(Index,Index) const { return *reinterpret_cast(this); } Unusable& coeffRef(Index) { return *reinterpret_cast(this); } Unusable& coeffRef(Index,Index) { return *reinterpret_cast(this); } +#undef Unusable #endif }; @@ -84,6 +86,36 @@ Derived& DenseBase::operator=(const ReturnByValue& other) return derived(); } +namespace internal { + +// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that +// when a ReturnByValue expression is assigned, the evaluator is not constructed. +// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world + +template +struct evaluator > + : public evaluator::ReturnType>::type +{ + typedef ReturnByValue XprType; + typedef typename internal::traits::ReturnType PlainObject; + typedef typename evaluator::type Base; + + typedef evaluator type; + typedef evaluator nestedType; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + xpr.evalTo(m_result); + } + +protected: + PlainObject m_result; +}; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_RETURNBYVALUE_H diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h index e30ae3d28..291300a4a 100644 --- a/Eigen/src/Core/Reverse.h +++ b/Eigen/src/Core/Reverse.h @@ -44,14 +44,7 @@ struct traits > ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - - // let's enable LinearAccess only with vectorization because of the product overhead - LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) ) - ? LinearAccessBit : 0, - - Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess), - - CoeffReadCost = _MatrixTypeNested::CoeffReadCost + Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit) }; }; @@ -74,6 +67,7 @@ template class Reverse typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Reverse) + typedef typename internal::remove_all::type NestedExpression; using Base::IsRowMajor; // next line is necessary because otherwise const version of operator() @@ -95,47 +89,47 @@ template class Reverse typedef internal::reverse_packet_cond reverse_packet; public: - inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } + EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse) - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return -m_matrix.innerStride(); } - inline Scalar& operator()(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& operator()(Index row, Index col) { eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); return coeffRef(row, col); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row, ReverseCol ? m_matrix.cols() - col - 1 : col); } - inline CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index col) const { return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row, ReverseCol ? m_matrix.cols() - col - 1 : col); } - inline CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const { return m_matrix.coeff(m_matrix.size() - index - 1); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1); } - inline Scalar& operator()(Index index) + EIGEN_DEVICE_FUNC inline Scalar& operator()(Index index) { eigen_assert(index >= 0 && index < m_matrix.size()); return coeffRef(index); @@ -170,7 +164,7 @@ template class Reverse m_matrix.const_cast_derived().template writePacket(m_matrix.size() - index - PacketSize, internal::preverse(x)); } - const typename internal::remove_all::type& + EIGEN_DEVICE_FUNC const typename internal::remove_all::type& nestedExpression() const { return m_matrix; @@ -190,7 +184,7 @@ template inline typename DenseBase::ReverseReturnType DenseBase::reverse() { - return derived(); + return ReverseReturnType(derived()); } /** This is the const version of reverse(). */ @@ -198,7 +192,7 @@ template inline const typename DenseBase::ConstReverseReturnType DenseBase::reverse() const { - return derived(); + return ConstReverseReturnType(derived()); } /** This is the "in place" version of reverse: it reverses \c *this. diff --git a/Eigen/src/Core/Select.h b/Eigen/src/Core/Select.h index 87993bbb5..79eec1b5b 100644 --- a/Eigen/src/Core/Select.h +++ b/Eigen/src/Core/Select.h @@ -43,23 +43,21 @@ struct traits > ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime, - Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits, - CoeffReadCost = traits::type>::CoeffReadCost - + EIGEN_SIZE_MAX(traits::type>::CoeffReadCost, - traits::type>::CoeffReadCost) + Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit }; }; } template -class Select : internal::no_assignment_operator, - public internal::dense_xpr_base< Select >::type +class Select : public internal::dense_xpr_base< Select >::type, + internal::no_assignment_operator { public: typedef typename internal::dense_xpr_base